### Get Coins with < $100 Million Markt Cap  from Binance API and save to a csv file

In [34]:
import os 
from binance.client import Client 
import pandas as pd 
from dotenv  import  load_dotenv 

load_dotenv()

API_KEY = os.getenv('BINANCE_API_KEY')
API_SECRET = os.getenv('BINANCE_API_SECRET')

client  = Client(API_KEY,API_SECRET)

def get_small_cap_coins(market_cap_threshold=100_000_000):
    """
    Fetch recently listed coins with market cap under the specified threshold.
    """
    #  all tickers
    tickers = client.get_all_tickers()
    
    # (market cap under $100M)
    small_cap_coins = []
    for ticker in tickers:
        symbol = ticker['symbol']
        if symbol.endswith('USDT'):  
            try:
                # 24hr ticker data for market cap calculation
                ticker_24hr = client.get_ticker(symbol=symbol)
                volume = float(ticker_24hr['quoteVolume'])
                price = float(ticker_24hr['lastPrice'])
                market_cap = volume * price
                
                if market_cap < market_cap_threshold:
                    small_cap_coins.append(symbol)
            except Exception as e:
                print(f"Error processing {symbol}: {e}")
    
    return small_cap_coins

def fetch_price_volume_data(symbol, interval='5m', limit=60):
    """
    Fetch OHLC and volume data for a given symbol.
    """
    try:
        klines = client.get_klines(symbol=symbol, interval=interval, limit=limit)
        return [{
            'timestamp': kline[0],
            'open': float(kline[1]),
            'high': float(kline[2]),
            'low': float(kline[3]),
            'close': float(kline[4]),
            'volume': float(kline[5])
        } for kline in klines]
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return None


if __name__ == "__main__":
  
    small_cap_coins = get_small_cap_coins()
    print(f"Small-cap coins: {small_cap_coins}")

    small_cap_df = pd.DataFrame(small_cap_coins , columns=['Symbol'])
    small_cap_df.to_csv('small_cap_coins.csv', index = False)
    
    if small_cap_coins:
        coin_data = fetch_price_volume_data(small_cap_coins[0])
        if coin_data:
            coin_df = pd.DataFrame(coin_data)
            coin_df.to_csv(f'{small_cap_coins[0]}_price_volume_data.csv', index=False)
        print(f"Sample data for {small_cap_coins[0]}: {coin_data[:1]}")



### Improved Code and added progression bar   for script

In [None]:
import os
from binance.client import Client
import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm

# Load environment variables
load_dotenv()

API_KEY = os.getenv('BINANCE_API_KEY')
API_SECRET = os.getenv('BINANCE_API_SECRET')

client = Client(API_KEY, API_SECRET)

def get_filtered_symbols(market_cap_threshold=100_000_000, min_volume=1_000_000, min_days_listed=90):
    """
    Fetch USDT trading pairs with specific conditions:
      - Market cap below the threshold.
      - Minimum 24-hour trading volume.
      - Listed for at least the specified number of days.

    Args:
        market_cap_threshold (float): Maximum market cap.
        min_volume (float): Minimum 24-hour trading volume.
        min_days_listed (int): Minimum number of days listed.

    Returns:
        list: Filtered symbols meeting the criteria.
    """
    print("Fetching exchange info...")
    exchange_info = client.get_exchange_info()
    symbols = [
        symbol_info['symbol'] for symbol_info in exchange_info['symbols']
        if symbol_info['symbol'].endswith('USDT') and symbol_info['status'] == 'TRADING'
    ]

    filtered_symbols = []
    print("Processing symbols...")
    for symbol in tqdm(symbols):
        try:
            ticker_24hr = client.get_ticker(symbol=symbol)
            volume = float(ticker_24hr['quoteVolume'])
            price = float(ticker_24hr['lastPrice'])
            market_cap = volume * price

            # Fetch listing age (minimum 90 daily klines required)
            klines = client.get_klines(symbol=symbol, interval='1d', limit=min_days_listed)

            if (
                market_cap < market_cap_threshold
                and volume >= min_volume
                and len(klines) >= min_days_listed
            ):
                filtered_symbols.append({
                    'symbol': symbol,
                    'market_cap': market_cap,
                    'volume': volume,
                    'days_listed': len(klines)
                })
        except Exception as e:
            print(f"Error processing {symbol}: {e}")

    return sorted(filtered_symbols, key=lambda x: x['market_cap'])

def fetch_historical_data(symbol, interval='15m', days=14):
    """
    Fetch historical OHLC and volume data for a given symbol.

    Args:
        symbol (str): The cryptocurrency trading pair symbol (e.g., BTCUSDT).
        interval (str): The time interval for the kline data (e.g., '15m').
        days (int): Number of days to fetch data for.

    Returns:
        pd.DataFrame: DataFrame containing OHLC and volume data.
    """
    try:
        total_bars = days * 96  # 96 bars per day for 15m intervals
        print(f"Fetching historical data for {symbol} ({interval}, {days} days)...")
        klines = client.get_klines(symbol=symbol, interval=interval, limit=total_bars)

        data = [{
            'timestamp': pd.to_datetime(kline[0], unit='ms'),
            'open': float(kline[1]),
            'high': float(kline[2]),
            'low': float(kline[3]),
            'close': float(kline[4]),
            'volume': float(kline[5])
        } for kline in klines]

        return pd.DataFrame(data)

    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return pd.DataFrame()

def save_to_csv(data, filename):
    """
    Save a list of dictionaries or DataFrame to a CSV file.

    Args:
        data (list or pd.DataFrame): Data to save.
        filename (str): File name for the CSV.
    """
    if isinstance(data, list):
        df = pd.DataFrame(data)
    else:
        df = data

    df.to_csv(filename, index=False)
    print(f"Saved data to {filename}.")

if __name__ == "__main__":
    # Step 1: Filter symbols
    filtered_symbols = get_filtered_symbols()

    if not filtered_symbols:
        print("No symbols met the criteria.")
    else:
        print(f"Filtered symbols found: {len(filtered_symbols)}")
        save_to_csv(filtered_symbols, "filtered_symbols.csv")

        # Step 2: Fetch historical data for each filtered symbol
        for symbol_info in filtered_symbols:
            symbol_name = symbol_info['symbol']
            historical_data = fetch_historical_data(symbol_name, interval='15m', days=14)

            if not historical_data.empty:
                filename = f"{symbol_name}_15m_data.csv"
                save_to_csv(historical_data, filename)
            else:
                print(f"No historical data found for {symbol_name}.")


### get the circulating supply for every symbol you fetch from the Binance API and then use the CoinGecko API  total ciruclating supply to get market cap 

In [None]:


import requests

import concurrent.futures




def get_binance_symbols():
    """Fetch all trading pairs from Binance."""
    exchange_info = binance_client.get_exchange_info()
    symbols = [symbol_info['symbol'] for symbol_info in exchange_info['symbols'] if symbol_info['status'] == 'TRADING']
    return symbols

def get_price(symbol):
    """Get current price for a given symbol from Binance."""
    try:
        ticker = binance_client.get_ticker(symbol=symbol)
        return float(ticker['lastPrice'])
    except Exception as e:
        print(f"Error fetching price for {symbol}: {e}")
        return None

def get_circulating_supply(symbol):
    """Fetch circulating supply from CoinGecko."""
    coingecko_url = f"https://api.coingecko.com/api/v3/simple/price?ids={symbol}&vs_currencies=usd&include_market_cap=true"
    try:
        response = requests.get(coingecko_url)
        data = response.json()
        return data[symbol]['market_cap']
    except Exception as e:
        print(f"Error fetching circulating supply for {symbol}: {e}")
        return None

def calculate_market_cap(symbol):
    """Calculate market cap using price and circulating supply."""
    # Remove USDT from the symbol
    base_symbol = symbol.replace('USDT', '')
    
    price = get_price(symbol)
    circulating_supply = get_circulating_supply(base_symbol)
    
    if price is not None and circulating_supply is not None:
        market_cap = circulating_supply * price
        return symbol, market_cap
    return symbol, None

def main():
    symbols = get_binance_symbols()
    
    # Filter USDT pairs
    usdt_symbols = [symbol for symbol in symbols if symbol.endswith('USDT')]
    
    # Use ThreadPoolExecutor to fetch data in parallel
    with concurrent.futures.ThreadPoolExecutor() as executor:
        market_caps = list(executor.map(calculate_market_cap, usdt_symbols))
    
    # Filter out None values and create DataFrame
    valid_market_caps = [(symbol, cap) for symbol, cap in market_caps if cap is not None]
    
    # Create a DataFrame and save to CSV
    # df = pd.DataFrame(valid_market_caps, columns=['Symbol', 'Market Cap'])
    # df.to_csv('market_caps.csv', index=False)
    
    # print("Market caps calculated and saved to market_caps.csv")

if __name__ == "__main__":
    main()



### Binance API doesnt support giving market Cap info directly so i am using  2 weeks trading volume to approximate market cap  and it seems binance does not provide the date a coin was  listed so i would be fetching  historical 90 days of trading .

In [None]:
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
from binance.client import Client
from dotenv import load_dotenv
import os 
from tqdm import tqdm


load_dotenv()

API_KEY = os.getenv('BINANCE_API_KEY')
API_SECRET = os.getenv('BINANCE_API_SECRET')

client = Client(API_KEY, API_SECRET)



def get_filtered_symbols_parallel(market_cap_threshold=100_000_000, min_volume=1_000_000, min_days_listed=90):
    """
    Fetch USDT trading pairs with specific conditions using parallel processing:
      - Market cap below the threshold.
      - Minimum 24-hour trading volume.
      - Listed for at least the specified number of days.

    Args:
        market_cap_threshold (float): Maximum market cap.
        min_volume (float): Minimum 24-hour trading volume.
        min_days_listed (int): Minimum number of days listed.

    Returns:
        list: Filtered symbols meeting the criteria.
    """
    print("Fetching exchange info...")
    exchange_info = client.get_exchange_info()
    symbols = [
        symbol_info['symbol'] for symbol_info in exchange_info['symbols']
        if symbol_info['symbol'].endswith('USDT') and symbol_info['status'] == 'TRADING'
    ]

    def process_symbol(symbol):
        try:
            ticker_24hr = client.get_ticker(symbol=symbol)
            volume = float(ticker_24hr['quoteVolume'])
            price = float(ticker_24hr['lastPrice'])
            market_cap = volume * price

            # Fetch listing age (minimum 90 daily klines required)
            klines = client.get_klines(symbol=symbol, interval='1d', limit=min_days_listed)

            if (
                market_cap < market_cap_threshold
                and volume >= min_volume
                and len(klines) >= min_days_listed
            ):
                return {
                    'symbol': symbol,
                    'market_cap': market_cap,
                    'volume': volume,
                    'days_listed': len(klines)
                }
        except Exception as e:
            print(f"Error processing {symbol}: {e}")
        return None

    print("Processing symbols in parallel...")
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_symbol, symbols), total=len(symbols), desc="Processing symbols"))

    filtered_symbols = [result for result in results if result is not None]
    return sorted(filtered_symbols, key=lambda x: x['market_cap'])



def main():
    
    filtered_symbols = get_filtered_symbols_parallel()

    if not filtered_symbols:
        print("No symbols met the criteria.")
    else:
        print(f"Filtered symbols found: {len(filtered_symbols)}")
        
        # Print out the processed symbols
        for symbol_data in filtered_symbols:
            print(symbol_data)
    
    
    
    # df = pd.DataFrame(valid_market_caps, columns=['Symbol', 'Market Cap'])
    # df.to_csv('market_caps.csv', index=False)
    
    # print("Market caps calculated and saved to market_caps.csv")

if __name__ == "__main__":
    main()

Fetching exchange info...


Processing symbols in parallel...


Processing symbols: 100%|██████████| 388/388 [00:43<00:00,  8.84it/s]

Filtered symbols found: 271
{'symbol': 'BTTCUSDT', 'market_cap': 3.431525483660725, 'volume': 3036748.21562896, 'days_listed': 90}
{'symbol': 'XECUSDT', 'market_cap': 124.0905513619951, 'volume': 3351081.5922764, 'days_listed': 90}
{'symbol': 'WINUSDT', 'market_cap': 260.7597851457095, 'volume': 2501292.90307635, 'days_listed': 90}
{'symbol': 'LUNCUSDT', 'market_cap': 902.8821380995248, 'volume': 7712985.9738555, 'days_listed': 90}
{'symbol': 'SPELLUSDT', 'market_cap': 1616.107384083942, 'volume': 1995194.3013382, 'days_listed': 90}
{'symbol': 'SHIBUSDT', 'market_cap': 1731.5824583994297, 'volume': 71701136.9937652, 'days_listed': 90}
{'symbol': 'BONKUSDT', 'market_cap': 2368.479958411594, 'volume': 68255906.58246668, 'days_listed': 90}
{'symbol': 'VTHOUSDT', 'market_cap': 3622.840636113856, 'volume': 1250980.882636, 'days_listed': 90}
{'symbol': '1000SATSUSDT', 'market_cap': 4383.214340042455, 'volume': 23045290.9571107, 'days_listed': 90}
{'symbol': 'LEVERUSDT', 'market_cap': 4431.33




## use pandas to filter  coins with marketcap =<  100 Million Dollars

In [12]:
import os
from binance.client import Client
import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

# Load environment variables
load_dotenv()

API_KEY = os.getenv('BINANCE_API_KEY')
API_SECRET = os.getenv('BINANCE_API_SECRET')

client = Client(API_KEY, API_SECRET)

def get_filtered_symbols_parallel(market_cap_threshold=100_000_000, min_volume=1_000_000, min_days_listed=90):
    """
    Fetch USDT trading pairs with specific conditions using parallel processing:
      - Market cap below the threshold.
      - Minimum 24-hour trading volume.
      - Listed for at least the specified number of days.

    Returns:
        list: Filtered symbols meeting the criteria.
    """
    print("Fetching exchange info...")
    exchange_info = client.get_exchange_info()
    symbols = [
        symbol_info['symbol'] for symbol_info in exchange_info['symbols']
        if symbol_info['symbol'].endswith('USDT') and symbol_info['status'] == 'TRADING'
    ]

    def process_symbol(symbol):
        try:
            ticker_24hr = client.get_ticker(symbol=symbol)
            volume = float(ticker_24hr['quoteVolume'])
            price = float(ticker_24hr['lastPrice'])
            market_cap = volume * price

            # Fetch listing age (minimum 90 daily klines required)
            klines = client.get_klines(symbol=symbol, interval='1d', limit=min_days_listed)

            if (
                market_cap < market_cap_threshold
                and volume >= min_volume
                and len(klines) >= min_days_listed
            ):
                return {
                    'symbol': symbol,
                    'market_cap': market_cap,
                    'volume': volume,
                    'days_listed': len(klines)
                }
        except Exception as e:
            print(f"Error processing {symbol}: {e}")
        return None

    print("Processing symbols in parallel...")
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_symbol, symbols), total=len(symbols), desc="Processing symbols"))

    filtered_symbols = [result for result in results if result is not None]
    return sorted(filtered_symbols, key=lambda x: x['market_cap'])

def fetch_historical_data(symbol, interval='15m', days=14):
    """
    Fetch historical OHLC and volume data for a given symbol.

    Args:
        symbol (str): The cryptocurrency trading pair symbol (e.g., BonkUSDT).
        interval (str): The time interval for the kline data (e.g., '15m').
        days (int): Number of days to fetch data for.

    Returns:
        pd.DataFrame: DataFrame containing OHLC and volume data.
    """
    try:
        print(f"Fetching historical data for {symbol} ({interval}, {days} days)...")
        since = int((pd.Timestamp.utcnow() - pd.Timedelta(days=days)).timestamp() * 1000)

        klines = client.get_historical_klines(symbol, interval, start_str=since)

        data = [{
            'timestamp': pd.to_datetime(kline[0], unit='ms'),
            'open': float(kline[1]),
            'high': float(kline[2]),
            'low': float(kline[3]),
            'close': float(kline[4]),
            'volume': float(kline[5])
        } for kline in klines]

        return pd.DataFrame(data)

    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return pd.DataFrame()

def main():
   
    filtered_symbols = get_filtered_symbols_parallel()

    if not filtered_symbols:
        print("No symbols met the criteria.")
    else:
        print(f"Filtered symbols found: {len(filtered_symbols)}")

        all_data = []
        for symbol_data in tqdm(filtered_symbols[:4], desc="Fetching historical data"):
            symbol = symbol_data['symbol']
            ohlc_data = fetch_historical_data(symbol, interval='15m', days=14)

            if not ohlc_data.empty:
                ohlc_data['symbol'] = symbol.replace('USDT', '')                                    
                ohlc_data['market_cap'] = symbol_data['market_cap']
                # ohlc_data['market_cap'] = ohlc_data['market_cap'].apply(lambda x: "{:,.2f}".format(x * 1_000_000))
                ohlc_data['market_cap'] = ohlc_data['market_cap'] * 1e6
                ohlc_data['volume'] = symbol_data['volume']

                


                ohlc_data[['open', 'high', 'low', 'close']] = ohlc_data[['open', 'high', 'low', 'close']].map(lambda x: f"{x:.8f}")
               
                all_data.append(ohlc_data[['timestamp','symbol', 'market_cap','open', 'high', 'low', 'close', 'volume']])

        
        if all_data:
            combined_data = pd.concat(all_data, ignore_index=True)
            combined_data.to_csv("small_capped_coins.csv", index=False)
            print("Historical data and market cap information saved to small_capped_coins.csv.")
        else:
            print("No historical data available for the filtered symbols.")

if __name__ == "__main__":
    main()


Fetching exchange info...
Processing symbols in parallel...


Processing symbols:  25%|██▍       | 96/388 [00:10<00:28, 10.37it/s]

Processing symbols: 100%|██████████| 388/388 [00:40<00:00,  9.65it/s]


Filtered symbols found: 278


Fetching historical data:   0%|          | 0/4 [00:00<?, ?it/s]

Fetching historical data for BTTCUSDT (15m, 14 days)...


Fetching historical data:  25%|██▌       | 1/4 [00:03<00:11,  3.88s/it]

Fetching historical data for XECUSDT (15m, 14 days)...


Fetching historical data:  50%|█████     | 2/4 [00:07<00:07,  3.52s/it]

Fetching historical data for WINUSDT (15m, 14 days)...


Fetching historical data:  75%|███████▌  | 3/4 [00:10<00:03,  3.51s/it]

Fetching historical data for LUNCUSDT (15m, 14 days)...


Fetching historical data: 100%|██████████| 4/4 [00:14<00:00,  3.56s/it]


Historical data and market cap information saved to small_capped_coins.csv.


### Preprocess , Prepare  and Normalise the Data

In [15]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

def preprocess_and_normalize_data(input_file, output_file, min_volume_threshold=1_000_000, max_market_cap=100_000_000):
    """
    Preprocess and normalize the data:
    - Handle missing values.
    - Remove low-volume assets.
    - Remove high market cap assets.
    - Normalize numeric columns.
    - Save the preprocessed data to a new file.

    Args:
        input_file (str): Path to the CSV file to preprocess.
        output_file (str): Path to save the preprocessed and normalized data.
        min_volume_threshold (float): Minimum volume threshold to filter assets.
        max_market_cap (float): Maximum market cap threshold to filter assets.
    """
    print(f"Loading data from {input_file}...")
    try:
        data = pd.read_csv(input_file)

        # Handle missing values by interpolation
        print("Handling missing values...")
        for col in tqdm(data.columns, desc="Interpolating missing values"):
            data[col] = data[col].interpolate(method='linear', limit_direction='forward')
        data.dropna(inplace=True)

        # Remove low-volume assets (if 'volume' column exists)
        if 'volume' in data.columns:
            print("Removing low-volume assets...")
            data = data[data['volume'] >= min_volume_threshold]

        # Remove high market cap assets (if 'market_cap' column exists)
        if 'market_cap' in data.columns:
            print("Removing high market cap assets...")
            data = data[data['market_cap'] <= max_market_cap]

        # Normalize numeric columns
        print("Normalizing data...")
        numeric_cols = ['open', 'high', 'low', 'close']
        scaler = MinMaxScaler()
        for col in tqdm(numeric_cols, desc="Normalizing numeric columns"):
            data[col] = scaler.fit_transform(data[[col]])

        # Round volume and market_cap values to 2 decimal places
        data['volume'] = data['volume'].round(2)
        data['market_cap'] = data['market_cap'].round(2)

        # Save the preprocessed data
        print(f"Saving preprocessed data to {output_file}...")
        data.to_csv(output_file, index=False)
        print("Preprocessing complete.")

    except Exception as e:
        print(f"Error during preprocessing: {e}")

if __name__ == "__main__":
    input_csv = "small_capped_coins.csv"  
    output_csv = "preprocessed_small_capped_coins.csv" 
    preprocess_and_normalize_data(input_csv, output_csv)

Loading data from small_capped_coins.csv...
Handling missing values...


  data[col] = data[col].interpolate(method='linear', limit_direction='forward')
Interpolating missing values: 100%|██████████| 8/8 [00:00<00:00, 211.97it/s]


Removing low-volume assets...
Removing high market cap assets...
Normalizing data...


Normalizing numeric columns: 100%|██████████| 4/4 [00:00<00:00, 57.39it/s]


Saving preprocessed data to preprocessed_small_capped_coins.csv...
Preprocessing complete.


Merge from Data Collection to Preprocessing and Normalization together


In [20]:
import os
from binance.client import Client
import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from sklearn.preprocessing import MinMaxScaler

# Load environment variables
load_dotenv()

API_KEY = os.getenv('BINANCE_API_KEY')
API_SECRET = os.getenv('BINANCE_API_SECRET')

client = Client(API_KEY, API_SECRET)

def preprocess_and_normalize_data(data, min_volume_threshold=1_000_000, max_market_cap=100_000_000):
    """
    Preprocess and normalize the data:
    - Handle missing values.
    - Remove low-volume assets.
    - Remove high market cap assets.
    - Normalize numeric columns.

    Args:
        data (pd.DataFrame): DataFrame containing the data to preprocess.
        min_volume_threshold (float): Minimum volume threshold to filter assets.
        max_market_cap (float): Maximum market cap threshold to filter assets.

    Returns:
        pd.DataFrame: Preprocessed and normalized DataFrame.
    """
    # Handle missing values by interpolation
    print("Handling missing values...")
    for col in tqdm(data.columns, desc="Interpolating missing values"):
        data[col] = data[col].interpolate(method='linear', limit_direction='forward')
    data.dropna(inplace=True)

    # Remove low-volume assets (if 'volume' column exists)
    if 'volume' in data.columns:
        print("Removing low-volume assets...")
        data = data[data['volume'] >= min_volume_threshold]

    # Remove high market cap assets (if 'market_cap' column exists)
    if 'market_cap' in data.columns:
        print("Removing high market cap assets...")
        data = data[data['market_cap'] <= max_market_cap]

    # Normalize numeric columns
    print("Normalizing data...")
    numeric_cols = ['open', 'high', 'low', 'close']
    scaler = MinMaxScaler()
    for col in tqdm(numeric_cols, desc="Normalizing numeric columns"):
        data[col] = scaler.fit_transform(data[[col]])

    # Round volume and market_cap values to 2 decimal places
    data['volume'] = data['volume'].round(2)
    data['market_cap'] = data['market_cap'].round(2)

    return data

def get_filtered_symbols_parallel(market_cap_threshold=100_000_000, min_volume=1_000_000, min_days_listed=90):
    """
    Fetch USDT trading pairs with specific conditions using parallel processing:
      - Market cap below the threshold.
      - Minimum 24-hour trading volume.
      - Listed for at least the specified number of days.

    Returns:
        list: Filtered symbols meeting the criteria.
    """
    print("Fetching exchange info...")
    exchange_info = client.get_exchange_info()
    symbols = [
        symbol_info['symbol'] for symbol_info in exchange_info['symbols']
        if symbol_info['symbol'].endswith('USDT') and symbol_info['status'] == 'TRADING'
    ]

    def process_symbol(symbol):
        try:
            ticker_24hr = client.get_ticker(symbol=symbol)
            volume = float(ticker_24hr['quoteVolume'])
            price = float(ticker_24hr['lastPrice'])
            market_cap = volume * price

            # Fetch listing age (minimum 90 daily klines required)
            klines = client.get_klines(symbol=symbol, interval='1d', limit=min_days_listed)

            if (
                market_cap < market_cap_threshold
                and volume >= min_volume
                and len(klines) >= min_days_listed
            ):
                return {
                    'symbol': symbol,
                    'market_cap': market_cap,
                    'volume': volume,
                    'days_listed': len(klines)
                }
        except Exception as e:
            print(f"Error processing {symbol}: {e}")
        return None

    print("Processing symbols in parallel...")
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_symbol, symbols), total=len(symbols), desc="Processing symbols"))

    filtered_symbols = [result for result in results if result is not None]
    return sorted(filtered_symbols, key=lambda x: x['market_cap'])

def fetch_historical_data(symbol, interval='15m', days=14):
    """
    Fetch historical OHLC and volume data for a given symbol.

    Args:
        symbol (str): The cryptocurrency trading pair symbol (e.g., BonkUSDT).
        interval (str): The time interval for the kline data (e.g., '15m').
        days (int): Number of days to fetch data for.

    Returns:
        pd.DataFrame: DataFrame containing OHLC and volume data.
    """
    try:
        print(f"Fetching historical data for {symbol} ({interval}, {days} days)...")
        since = int((pd.Timestamp.utcnow() - pd.Timedelta(days=days)).timestamp() * 1000)

        klines = client.get_historical_klines(symbol, interval, start_str=since)

        data = [{
            'timestamp': pd.to_datetime(kline[0], unit='ms'),
            'open': float(kline[1]),
            'high': float(kline[2]),
            'low': float(kline[3]),
            'close': float(kline[4]),
            'volume': float(kline[5])
        } for kline in klines]

        return pd.DataFrame(data)

    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return pd.DataFrame()

def main():
   
    filtered_symbols = get_filtered_symbols_parallel()

    if not filtered_symbols:
        print("No symbols met the criteria.")
    else:
        print(f"Filtered symbols found: {len(filtered_symbols)}")

        all_data = pd.DataFrame()
        for symbol_data in tqdm(filtered_symbols[:4], desc="Fetching historical data"):
            symbol = symbol_data['symbol']
            ohlc_data = fetch_historical_data(symbol, interval='15m', days=14)

            if not ohlc_data.empty:
                modified_symbol = symbol.replace('USDT', '')  
                ohlc_data['symbol'] = modified_symbol  
                ohlc_data['market_cap'] = symbol_data['market_cap']
                ohlc_data['volume'] = symbol_data['volume']

                ohlc_data[['open', 'high', 'low', 'close']] = ohlc_data[['open', 'high', 'low', 'close']].map(lambda x: f"{x:.8f}")
               
                all_data = pd.concat([all_data, ohlc_data], ignore_index=True)

        
        if not all_data.empty:
            preprocessed_data = preprocess_and_normalize_data(all_data)
            preprocessed_data.to_csv("small_capped_coinss.csv", index=False)
            print("Historical data and market cap information saved to small_capped_coins.csv.")
        else:
            print("No historical data available for the filtered symbols.")

if __name__ == "__main__":
    main()

Fetching exchange info...


Processing symbols in parallel...


Processing symbols: 100%|██████████| 388/388 [00:42<00:00,  9.20it/s]


Filtered symbols found: 273


Fetching historical data:   0%|          | 0/4 [00:00<?, ?it/s]

Fetching historical data for BTTCUSDT (15m, 14 days)...


Fetching historical data:  25%|██▌       | 1/4 [00:02<00:08,  2.97s/it]

Fetching historical data for XECUSDT (15m, 14 days)...


Fetching historical data:  50%|█████     | 2/4 [00:06<00:07,  3.53s/it]

Fetching historical data for WINUSDT (15m, 14 days)...


Fetching historical data:  75%|███████▌  | 3/4 [00:10<00:03,  3.63s/it]

Fetching historical data for LUNCUSDT (15m, 14 days)...


Fetching historical data: 100%|██████████| 4/4 [00:13<00:00,  3.38s/it]


Handling missing values...


  data[col] = data[col].interpolate(method='linear', limit_direction='forward')
Interpolating missing values: 100%|██████████| 8/8 [00:00<00:00, 116.19it/s]


Removing low-volume assets...
Removing high market cap assets...
Normalizing data...


Normalizing numeric columns: 100%|██████████| 4/4 [00:00<00:00, 23.07it/s]


Historical data and market cap information saved to small_capped_coins.csv.


# TECHNICAL INDICATORS