In [40]:
!poetry add $(cat requirements.txt)

The following packages are already present in the pyproject.toml and will be skipped:

  - [36mpandas[39m
  - [36mscikit-learn[39m
  - [36mpython-binance[39m
  - [36mjoblib[39m

If you want to update it to the latest compatible version, you can use `poetry update package`.
If you prefer to upgrade it to the latest available version, you can use `poetry add package@latest`.

Nothing to add.


## COLLECT DATA


In [52]:
from binance.client import Client

client = Client()
exchange_info = client.get_exchange_info()

print(f"market info: {len(exchange_info['symbols'])}")


market info: 2873


In [53]:
import pandas as pd

tickers = client.get_ticker()
tickers_df = pd.DataFrame(tickers).set_index('symbol')
tickers_df['quoteVolume'] = pd.to_numeric(tickers_df['quoteVolume'])

print(tickers_df.head(5))

         priceChange priceChangePercent weightedAvgPrice prevClosePrice  \
symbol                                                                    
ETHBTC    0.00099000              2.736       0.03666556     0.03617000   
LTCBTC    0.00002400              2.141       0.00114036     0.00112100   
BNBBTC    0.00000900              0.123       0.00727369     0.00730000   
NEOBTC    0.00000740              4.701       0.00016320     0.00015730   
QTUMETH  -0.00000200             -0.209       0.00095592     0.00095600   

          lastPrice      lastQty    bidPrice       bidQty    askPrice  \
symbol                                                                  
ETHBTC   0.03717000   0.49170000  0.03717000  27.07010000  0.03718000   
LTCBTC   0.00114500   1.54700000  0.00114400  76.67700000  0.00114500   
BNBBTC   0.00730800   7.44600000  0.00730600   2.17400000  0.00730700   
NEOBTC   0.00016480   4.24000000  0.00016480  34.36000000  0.00016490   
QTUMETH  0.00095300  83.70000000  0.

In [54]:
import os


cache_dir = "./.cache"
data_dir = "./data"

os.makedirs(cache_dir, exist_ok=True)
os.makedirs(data_dir, exist_ok=True)

In [55]:
from os import makedirs


cache_dir = "./.cache"
data_dir = "./data"
makedirs(cache_dir, exist_ok=True)
makedirs(data_dir, exist_ok=True)

In [56]:
from joblib import Memory

memory = Memory(cache_dir)

@memory.cache
def get_klines_cached(symbol, interval, limit):
    return client.get_klines(symbol=symbol, interval=interval, limit=limit)


In [57]:
MARKET_FILTERS = {
    'MAX_DAYS': 90,
    'MIN_VOLUME': 50_000,
    'MAX_VOLUME': 10_000_000,
    'MIN_MARKET_CAP': 1_000_000,
    'MAX_MARKET_CAP': 100_000_000
}

In [58]:
BATCH_SIZE = 50
SLEEP_TIME = 1

@memory.cache
def get_klines_cached(symbol, interval, limit):
    return client.get_klines(symbol=symbol, interval=interval, limit=limit)

# Get initial market data
exchange_info = client.get_exchange_info()
tickers = client.get_ticker()
tickers_df = pd.DataFrame(tickers).set_index('symbol')
tickers_df['quoteVolume'] = pd.to_numeric(tickers_df['quoteVolume'])

# Filter USDT pairs
valid_pairs = pd.DataFrame(exchange_info['symbols'])
valid_pairs = valid_pairs[
    (valid_pairs['symbol'].str.endswith('USDT')) & 
    (valid_pairs['status'] == 'TRADING')
]['symbol']

# Filter by volume
volume_mask = tickers_df['quoteVolume'].between(50_000, 10_000_000)
filtered_pairs = tickers_df[tickers_df.index.isin(valid_pairs) & volume_mask]

print(f"Found {len(filtered_pairs)} pairs meeting initial criteria")

Found 257 pairs meeting initial criteria


In [60]:
import time

candidates_data = []

# Split into batches
symbols = list(filtered_pairs.index)
batches = [symbols[i:i + BATCH_SIZE] for i in range(0, len(symbols), BATCH_SIZE)]

for batch in batches:
    # Process batch
    for symbol in batch:
        try:
            klines = get_klines_cached(symbol, "1d", 90)
            if not klines or len(klines) >= 90:
                continue
                
            ticker = filtered_pairs.loc[symbol]
            market_cap = float(ticker.get('marketCap', float(ticker['quoteVolume']) * 7))
            
            if not (1_000_000 <= market_cap <= 100_000_000):
                continue

            candidates_data.append({
                'symbol': symbol,
                'days_listed': len(klines),
                'market_cap': market_cap,
                'volume_24h': ticker['quoteVolume']
            })

        except Exception as e:
            print(f"Error processing {symbol}: {str(e)}")
            continue
    
    time.sleep(SLEEP_TIME)

candidates_df = pd.DataFrame(candidates_data).set_index('symbol')
print(f"Found {len(candidates_df)} final candidates")



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('NEOUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.7s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('QTUMUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('TUSDUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
____________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('MTLUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('DENTUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('WANUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
_____________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('DATAUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.7s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('CTSIUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('CHRUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
____________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('LUNAUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('PAXGUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('TRBUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
____________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('ROSEUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.7s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('AVAUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('SKLUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
_____________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('FISUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('DEGOUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('ALICEUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.6s, 0.0min
___________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('C98USDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('CLVUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.6s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('QNTUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.7s, 0.0min
______________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('SYSUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('FIDAUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('RADUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
_____________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('FLUXUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('FXSUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('VOXELUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
___________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('BSWUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('BIFIUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('NEXOUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
____________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('AMBUSDT', '1d', 90)
________________________________________________get_klines_cached - 1.0s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('USTCUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.9s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('GASUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
_____________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('NTRNUSDT', '1d', 90)
________________________________________________get_klines_cached - 1.8s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('PIVXUSDT', '1d', 90)
________________________________________________get_klines_cached - 1.5s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('VICUSDT', '1d', 90)
________________________________________________get_klines_cached - 1.7s, 0.0min
____________________________________________



________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('SAGAUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('OMNIUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.4s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached('REZUSDT', '1d', 90)
________________________________________________get_klines_cached - 0.5s, 0.0min
____________________________________________

Processing batches: 100%|██████████| 13/13 [02:50<00:00, 13.08s/it]

Found 4 final candidates





In [61]:
historical_data = {}

for symbol in candidates_df.index:
    try:
        klines = get_klines_cached(
            symbol=symbol,
            interval="15m",
            limit=MARKET_FILTERS['MAX_DAYS'] * 24
        )

        df = pd.DataFrame(klines, columns=[
            'timestamp', 'open', 'high', 'low', 'close', 'volume',
            'close_time', 'quote_volume', 'trades',
            'taker_buy_volume', 'taker_buy_quote_volume', 'ignore'
        ])

        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
        df.set_index('timestamp', inplace=True)
        
        for col in df.columns:
            if col not in ['close_time', 'ignore']:
                df[col] = pd.to_numeric(df[col])

        historical_data[symbol] = df

    except Exception as e:
        print(f"Error fetching history for {symbol}: {str(e)}")

________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached(symbol='BNSOLUSDT', interval='15m', limit=2160)
________________________________________________get_klines_cached - 1.6s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached(symbol='LUMIAUSDT', interval='15m', limit=2160)
________________________________________________get_klines_cached - 0.8s, 0.0min
________________________________________________________________________________
[Memory] Calling __main__--var-folders-2m-46s827d11c138msl2pcj2_4m0000gn-T-ipykernel-2938877048.get_klines_cached...
get_klines_cached(symbol='KAIAUSDT', interval='15m', limit=2160)
_______________________________________________