### Importazione librerie

In [None]:
import os
import requests
import zipfile
import pandas as pd
from io import BytesIO
import ccxt
import time
import numpy as np

from binance import AsyncClient, BinanceSocketManager
import nest_asyncio
from pybit.unified_trading import WebSocket

In [None]:
# https://data.binance.vision/?prefix=

### Download dati "bookDepth" da Binance

In [None]:
def download_and_extract_zip(url, extract_to='.'):
    response = requests.get(url)
    if response.status_code == 200:
        with zipfile.ZipFile(BytesIO(response.content)) as thezip:
            thezip.extractall(extract_to)
    else:
        print(f"Errore nello scaricare {url}: Codice {response.status_code}")

def process_order_book_data(file_path):
    try:
        df = pd.read_csv(file_path)
        return df
    except Exception as e:
        print(f"Errore nel processare {file_path}: {e}")
        return None

In [3]:
# Parametri
year = 2024
months = range(1, 13)
base_url = 'https://data.binance.vision/data/futures/um/daily/bookDepth/BTCUSDT/'
output_csv = 'BTCUSDT_order_book_2024.csv'

# Lista per contenere tutti i DataFrame
all_data = []

# Scarica, estrai e processa i dati per ogni giorno del 2024
for month in months:
    for day in range(1, 32):
        date_str = f'{year}-{month:02d}-{day:02d}'
        zip_filename = f'BTCUSDT-bookDepth-{date_str}.zip'
        url = f'{base_url}{zip_filename}'
        extract_path = f'./data/{date_str}/'
        os.makedirs(extract_path, exist_ok=True)
        
        try:
            download_and_extract_zip(url, extract_to=extract_path)
            csv_filename = zip_filename.replace('.zip', '.csv')
            file_path = os.path.join(extract_path, csv_filename)
            
            if os.path.exists(file_path):
                df = process_order_book_data(file_path)
                if df is not None:
                    all_data.append(df)
            else:
                print(f"File non trovato: {file_path}")
        except Exception as e:
            print(f"Errore nel processare {date_str}: {e}")

# Concatenare tutti i DataFrame in uno solo
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    combined_df.to_csv(output_csv, index=False)
    print(f"Dati concatenati salvati in {output_csv}")
else:
    print("Nessun dato da concatenare.")

Errore nel scaricare https://data.binance.vision/data/futures/um/daily/bookDepth/BTCUSDT/BTCUSDT-bookDepth-2024-02-30.zip: Codice 404
File non trovato: ./data/2024-02-30/BTCUSDT-bookDepth-2024-02-30.csv
Errore nel scaricare https://data.binance.vision/data/futures/um/daily/bookDepth/BTCUSDT/BTCUSDT-bookDepth-2024-02-31.zip: Codice 404
File non trovato: ./data/2024-02-31/BTCUSDT-bookDepth-2024-02-31.csv
Errore nel scaricare https://data.binance.vision/data/futures/um/daily/bookDepth/BTCUSDT/BTCUSDT-bookDepth-2024-04-18.zip: Codice 404
File non trovato: ./data/2024-04-18/BTCUSDT-bookDepth-2024-04-18.csv
Errore nel scaricare https://data.binance.vision/data/futures/um/daily/bookDepth/BTCUSDT/BTCUSDT-bookDepth-2024-04-31.zip: Codice 404
File non trovato: ./data/2024-04-31/BTCUSDT-bookDepth-2024-04-31.csv
Errore nel scaricare https://data.binance.vision/data/futures/um/daily/bookDepth/BTCUSDT/BTCUSDT-bookDepth-2024-06-31.zip: Codice 404
File non trovato: ./data/2024-06-31/BTCUSDT-bookDepth-

### Download dati "metrics" da Binance

In [38]:
def download_and_extract_zip(url, extract_to='.'):
    response = requests.get(url)
    if response.status_code == 200:
        with zipfile.ZipFile(BytesIO(response.content)) as thezip:
            thezip.extractall(extract_to)
    else:
        print(f"Errore nello scaricare {url}: Codice {response.status_code}")

def process_order_book_data(file_path):
    try:
        df = pd.read_csv(file_path)
        return df
    except Exception as e:
        print(f"Errore nel processare {file_path}: {e}")
        return None

In [39]:
# Parametri
year = 2024
months = range(1, 13)
base_url = 'https://data.binance.vision/data/futures/um/daily/metrics/BTCUSDT/'
output_csv = 'BTCUSDT_metrics_2024.csv'

# Lista per contenere tutti i DataFrame
all_data = []

# Scarica, estrai e processa i dati per ogni giorno del 2024
for month in months:
    for day in range(1, 32):
        date_str = f'{year}-{month:02d}-{day:02d}'
        zip_filename = f'BTCUSDT-metrics-{date_str}.zip'
        url = f'{base_url}{zip_filename}'
        extract_path = f'./data/{date_str}/'
        os.makedirs(extract_path, exist_ok=True)
        
        try:
            download_and_extract_zip(url, extract_to=extract_path)
            csv_filename = zip_filename.replace('.zip', '.csv')
            file_path = os.path.join(extract_path, csv_filename)
            
            if os.path.exists(file_path):
                df = process_order_book_data(file_path)
                if df is not None:
                    all_data.append(df)
            else:
                print(f"File non trovato: {file_path}")
        except Exception as e:
            print(f"Errore nel processare {date_str}: {e}")

# Concatenare tutti i DataFrame in uno solo
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    combined_df.to_csv(output_csv, index=False)
    print(f"Dati concatenati salvati in {output_csv}")
else:
    print("Nessun dato da concatenare.")

Errore nello scaricare https://data.binance.vision/data/futures/um/daily/metrics/BTCUSDT/BTCUSDT-metrics-2024-02-30.zip: Codice 404
File non trovato: ./data/2024-02-30/BTCUSDT-metrics-2024-02-30.csv
Errore nello scaricare https://data.binance.vision/data/futures/um/daily/metrics/BTCUSDT/BTCUSDT-metrics-2024-02-31.zip: Codice 404
File non trovato: ./data/2024-02-31/BTCUSDT-metrics-2024-02-31.csv
Errore nello scaricare https://data.binance.vision/data/futures/um/daily/metrics/BTCUSDT/BTCUSDT-metrics-2024-04-31.zip: Codice 404
File non trovato: ./data/2024-04-31/BTCUSDT-metrics-2024-04-31.csv
Errore nello scaricare https://data.binance.vision/data/futures/um/daily/metrics/BTCUSDT/BTCUSDT-metrics-2024-06-31.zip: Codice 404
File non trovato: ./data/2024-06-31/BTCUSDT-metrics-2024-06-31.csv
Errore nello scaricare https://data.binance.vision/data/futures/um/daily/metrics/BTCUSDT/BTCUSDT-metrics-2024-09-31.zip: Codice 404
File non trovato: ./data/2024-09-31/BTCUSDT-metrics-2024-09-31.csv
Error

### Preprocessing order_book_df

#### Lettura order_book_df

In [None]:
# Leggiamo il CSV e convertiamo la colonna "timestamp" in datetime
order_book_df = pd.read_csv('BTCUSDT_bookDepth_2024.csv', parse_dates=['timestamp'])
order_book_df

Timestamp unici trovati: 1047474


Unnamed: 0,timestamp,percentage,depth,notional
0,2024-01-01 00:00:10,-5,11063.461,4.573793e+08
1,2024-01-01 00:00:10,-4,9238.593,3.835965e+08
2,2024-01-01 00:00:10,-3,7446.324,3.103917e+08
3,2024-01-01 00:00:10,-2,5563.144,2.326627e+08
4,2024-01-01 00:00:10,-1,2115.404,8.904219e+07
...,...,...,...,...
10474735,2024-12-31 23:59:31,1,982.935,9.233265e+07
10474736,2024-12-31 23:59:31,2,1719.843,1.623170e+08
10474737,2024-12-31 23:59:31,3,2564.155,2.432439e+08
10474738,2024-12-31 23:59:31,4,3128.343,2.978457e+08


#### Comprimo e allargo

In [6]:
# Creiamo il pivot per 'depth'
# pivot_depth = order_book_df.pivot(index='timestamp', columns='percentage', values='depth')
# pivot_depth.columns.name = None  # Rimuove il nome 'percentage' dall'indice delle colonne
# # Rinominiamo le colonne: ad esempio, il livello -5 diventerà 'depth_-5'
# pivot_depth = pivot_depth.rename(columns=lambda x: f'depth_{x}')

# Creiamo il pivot per 'notional'
pivot_notional = order_book_df.pivot(index='timestamp', columns='percentage', values='notional')
pivot_notional.columns.name = None  # Rimuove il nome 'percentage'
pivot_notional = pivot_notional.rename(columns=lambda x: f'notional_{x}')

order_book_df = pivot_notional.copy().reset_index()

order_book_df.set_index('timestamp', inplace=True)

order_book_df.index = pd.to_datetime(order_book_df.index)

# Visualizziamo le prime righe per controllo
order_book_df

Unnamed: 0_level_0,notional_-5,notional_-4,notional_-3,notional_-2,notional_-1,notional_1,notional_2,notional_3,notional_4,notional_5
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-01-01 00:00:10,4.573793e+08,3.835965e+08,3.103917e+08,2.326627e+08,8.904219e+07,6.138488e+07,1.655435e+08,2.243606e+08,3.094913e+08,3.338707e+08
2024-01-01 00:00:39,4.698861e+08,3.965018e+08,3.234177e+08,2.466879e+08,1.037588e+08,7.441209e+07,1.769790e+08,2.354876e+08,3.209383e+08,3.454925e+08
2024-01-01 00:01:02,4.672116e+08,3.951836e+08,3.222620e+08,2.454034e+08,9.290719e+07,7.773906e+07,1.954975e+08,2.438128e+08,3.240160e+08,3.507592e+08
2024-01-01 00:01:30,4.652296e+08,3.921109e+08,3.198639e+08,2.443515e+08,9.257850e+07,7.688565e+07,1.955076e+08,2.435765e+08,3.230847e+08,3.591870e+08
2024-01-01 00:02:02,4.618363e+08,3.890863e+08,3.170328e+08,2.353286e+08,9.216747e+07,7.944649e+07,1.977384e+08,2.455497e+08,3.251416e+08,3.612228e+08
...,...,...,...,...,...,...,...,...,...,...
2024-12-31 23:57:31,8.617161e+08,7.436043e+08,4.669481e+08,3.129128e+08,1.656252e+08,9.856389e+07,1.738287e+08,2.545384e+08,3.091885e+08,3.754364e+08
2024-12-31 23:58:01,8.624637e+08,7.425072e+08,4.755094e+08,3.124196e+08,1.654890e+08,9.721174e+07,1.730429e+08,2.523438e+08,3.069253e+08,3.734684e+08
2024-12-31 23:58:30,8.648124e+08,7.454401e+08,4.778244e+08,3.149477e+08,1.676078e+08,9.718822e+07,1.711980e+08,2.523917e+08,3.050670e+08,3.734145e+08
2024-12-31 23:59:02,8.540163e+08,7.339014e+08,4.695423e+08,3.076486e+08,1.622543e+08,9.501509e+07,1.647492e+08,2.450605e+08,2.997077e+08,3.664387e+08


### Preprocessing metrics_df

#### Lettura metrics_df

In [47]:
# Leggiamo il CSV e convertiamo la colonna "timestamp" in datetime
metrics_df = pd.read_csv('BTCUSDT_metrics_2024.csv', parse_dates=['create_time'])
metrics_df = metrics_df.drop(columns=['symbol', 'sum_open_interest'])
metrics_df.rename(columns={'create_time': 'timestamp'}, inplace=True)
metrics_df.set_index('timestamp', inplace=True)
metrics_df.index = pd.to_datetime(metrics_df.index)
metrics_df

Unnamed: 0_level_0,sum_open_interest_value,count_toptrader_long_short_ratio,sum_toptrader_long_short_ratio,count_long_short_ratio,sum_taker_long_short_vol_ratio
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01 00:00:00,3.131494e+09,1.368203,1.253668,1.507109,1.311745
2024-01-01 00:05:00,3.133155e+09,1.361972,1.260603,1.507566,2.837214
2024-01-01 00:10:00,3.136770e+09,1.355922,1.261364,1.502659,1.217697
2024-01-01 00:15:00,3.147195e+09,1.353287,1.262726,1.499593,1.520019
2024-01-01 00:20:00,3.139644e+09,1.343043,1.261973,1.485684,1.056207
...,...,...,...,...,...
2024-12-31 23:40:00,8.527129e+09,1.802805,2.109118,1.809816,1.074214
2024-12-31 23:45:00,8.529586e+09,1.807319,2.110396,1.815404,0.947209
2024-12-31 23:50:00,8.534817e+09,1.810354,2.109759,1.819173,1.414375
2024-12-31 23:55:00,8.541648e+09,1.808813,2.113731,1.819643,1.801901


### Integrazione dei dati classici sulle candele

In [49]:
candles_df = pd.read_csv('Bitcoin (USDT) 2024 timeframe 5m Binance.csv')
candles_df.set_index('timestamp', inplace=True)
candles_df.index = pd.to_datetime(candles_df.index)
candles_df

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01 00:00:00,42283.58,42397.23,42261.02,42397.23,155.25731
2024-01-01 00:05:00,42397.22,42432.74,42385.26,42409.96,141.31102
2024-01-01 00:10:00,42409.97,42488.09,42409.96,42488.00,135.14249
2024-01-01 00:15:00,42488.00,42554.57,42458.58,42458.85,163.32541
2024-01-01 00:20:00,42458.85,42491.10,42445.84,42473.93,117.27112
...,...,...,...,...,...
2024-12-31 23:40:00,93464.27,93504.00,93456.00,93490.87,23.44102
2024-12-31 23:45:00,93490.86,93544.50,93484.30,93544.49,23.00051
2024-12-31 23:50:00,93544.49,93702.15,93544.49,93646.96,20.99684
2024-12-31 23:55:00,93646.97,93676.98,93576.00,93576.00,17.03553


#### Unione tra candles_df e order_book_df pesando i dati di order_book_df con la mia logica

In [31]:
# def aggregate_order_book_for_timestamp(T, df, window=30):
#     """
#     Per il timestamp target T, seleziona i record in df che hanno un timestamp
#     compreso nell'intervallo [T - window, T + window] (window in secondi).
#     Se viene trovato un record esatto (distanza zero), lo usa (o ne fa la media se addirittura più di un record è esatto per quel minuto).
#     Altrimenti, calcola la media pesata dei record usando peso = 1 / d, dove d è la distanza in secondi.
#     Restituisce una pd.Series con la media pesata per ciascuna colonna in df.
#     """
#     window_start = T - pd.Timedelta(seconds=window)
#     window_end = T + pd.Timedelta(seconds=window)
#     df_window = df.loc[(df.index >= window_start) & (df.index <= window_end)]
    
#     # Se non ci sono dati, restituisci NaN per tutte le colonne
#     if df_window.empty:
#         return pd.Series({col: np.nan for col in df.columns})
    
#     # Calcola la distanza in secondi di ogni record da T
#     distances = np.abs((df_window.index - T) / pd.Timedelta(seconds=1))
    
#     # Se esiste almeno un record con distanza 0, uso quello (se sono di più, ne prendo la media)
#     if (distances == 0).any():
#         exact_rows = df_window.loc[distances == 0]
#         return exact_rows.mean()
    
#     # Calcola i pesi: 1/distanza
#     weights = pd.Series(1 / distances, index=df_window.index)
    
#     # Calcola la media pesata per ciascuna colonna
#     weighted_avg = (df_window.multiply(weights, axis=0)).sum() / weights.sum()
#     return weighted_avg

In [32]:
# # Supponiamo che candles_df e order_book_df siano già caricati
# # candles_df: indice = timestamp (precisione 1 minuto) con colonne: open, high, low, close, volume, ecc.
# # order_book_df: indice = timestamp (con secondi) con colonne: ad es. notional_-5, notional_-4, …, notional_5

# # Assicuriamoci che gli indici siano in formato datetime
# candles_df.index = pd.to_datetime(candles_df.index)
# order_book_df.index = pd.to_datetime(order_book_df.index)

# # Itera sui timestamp di candles_df e calcola la media pesata per order_book_df
# aggregated_list = []
# for T in candles_df.index:
#     agg_values = aggregate_order_book_for_timestamp(T, order_book_df, window=30)
#     agg_values.name = T  # Imposta il nome della serie come timestamp
#     aggregated_list.append(agg_values)

# # Costruisci un DataFrame aggregato: ogni riga corrisponde a un timestamp (minuto) con i dati di order_book_df aggregati
# order_book_agg = pd.DataFrame(aggregated_list)

# # Ora unisci candles_df (che ha tutti i minuti) con l'aggregato di order_book_df
# final_df = candles_df.join(order_book_agg, how='left')

# # Verifica quali timestamp di candles_df non hanno dati aggregati da order_book_df:
# missing = final_df[order_book_agg.columns].isna().all(axis=1)
# missing_timestamps = final_df.index[missing]

# print("Timestamp mancanti (senza dati order_book aggregati):")
# print(missing_timestamps)
# print("Numero di timestamp mancanti:", len(missing_timestamps))

# # final_df contiene tutte le colonne di candles_df (OHLCV) unite alle colonne aggregate da order_book_df

In [50]:
def aggregate_order_book_for_minute_fast(T, order_times, df, window_seconds=150):
    """
    Per il timestamp target T (pd.Timestamp), usa np.searchsorted per ottenere rapidamente
    l'intervallo dei record in df con timestamp compresi in [T-window, T+window].
    Calcola quindi la media pesata per ciascuna colonna di df usando peso = 1/distanza (in secondi).
    Se esiste almeno un record con distanza zero, usa solo quelli (se ce n'è solo 1 esatto usa quello, altrimenti fa la media di quelli esatti (ma in realtà non dovrebbe mai esserci più di un timestamp esatto in order_book_df per ogni T di riferimento in candles_df)).
    """
    T_sec = T.timestamp()  # Convertiamo T in secondi (float)
    left_bound = T_sec - window_seconds
    right_bound = T_sec + window_seconds

    # Trova gli indici in order_times che soddisfano la condizione
    left_idx = np.searchsorted(order_times, left_bound, side='left')
    right_idx = np.searchsorted(order_times, right_bound, side='right')
    
    if left_idx >= right_idx:
        # Nessun dato in questo intervallo: restituisci NaN per tutte le colonne di df
        return pd.Series({col: np.nan for col in df.columns})
    
    # Seleziona il sotto-DataFrame
    slice_df = df.iloc[left_idx:right_idx]
    # Otteniamo gli orari corrispondenti (in secondi)
    slice_times = order_times[left_idx:right_idx].astype(np.float64)
    # Calcoliamo le distanze assolute (in secondi) rispetto a T
    distances = np.abs(slice_times - T_sec)
    
    # Se c'è almeno un record con distanza zero (cioè T esatto), usalo (o fai la media se ce ne sono più di uno)
    if np.any(np.isclose(distances, 0, atol=1e-6)):
        mask = np.isclose(distances, 0, atol=1e-6)
        return slice_df[mask].mean()
    
    # Altrimenti, calcola i pesi come 1/distanza
    weights = 1 / distances
    # Calcola la media pesata per ogni colonna
    weighted_avg = (slice_df.multiply(weights, axis=0)).sum() / weights.sum()
    return weighted_avg

In [51]:
# --- Preparazione dei DataFrame ---
# candles_df: DataFrame con indice timestamp (precisione 1 minuto) e colonne OHLCV (e altre se presenti)
# order_book_df: DataFrame con indice timestamp (con secondi) e colonne appiattite (es. notional_-5, depth_-5, ecc.)

# Assicuriamoci che gli indici siano in formato datetime
candles_df.index = pd.to_datetime(candles_df.index)
order_book_df.index = pd.to_datetime(order_book_df.index)

# Pre-calcoliamo l'array degli orari di order_book_df in secondi (usando np.int64 per maggiore efficienza)
order_times = order_book_df.index.astype(np.int64) // 10**9
order_times = order_times.to_numpy()  # Array numpy

# --- Aggregazione ---
# Creiamo una lista per memorizzare i risultati aggregati per ogni timestamp in candles_df
aggregated_results = []
# Per ogni timestamp in candles_df (che sono regolarmente spaziati ogni 5 minuti)
for T in candles_df.index:
    agg_series = aggregate_order_book_for_minute_fast(T, order_times, order_book_df, window_seconds=150)
    agg_series.name = T  # Imposta il nome della serie come il timestamp
    aggregated_results.append(agg_series)

# Creiamo un DataFrame con i risultati aggregati
order_book_agg = pd.DataFrame(aggregated_results)
order_book_agg.index.name = 'timestamp'

# --- Unione con candles_df ---
# Join: manteniamo i timestamp (minuti) di candles_df
final_df = candles_df.join(order_book_agg, how='left')
final_df

Unnamed: 0_level_0,open,high,low,close,volume,notional_-5,notional_-4,notional_-3,notional_-2,notional_-1,notional_1,notional_2,notional_3,notional_4,notional_5
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2024-01-01 00:00:00,42283.58,42397.23,42261.02,42397.23,155.25731,4.611230e+08,3.876777e+08,3.146451e+08,2.371130e+08,9.217481e+07,6.708451e+07,1.740684e+08,2.304834e+08,3.145019e+08,3.405500e+08
2024-01-01 00:05:00,42397.22,42432.74,42385.26,42409.96,141.31102,4.525774e+08,3.737719e+08,3.089653e+08,2.242065e+08,8.635133e+07,7.313096e+07,1.934627e+08,2.439645e+08,3.166603e+08,3.549727e+08
2024-01-01 00:10:00,42409.97,42488.09,42409.96,42488.00,135.14249,4.653009e+08,3.866021e+08,3.222435e+08,2.356223e+08,8.542778e+07,7.275846e+07,1.921220e+08,2.432343e+08,3.154108e+08,3.548055e+08
2024-01-01 00:15:00,42488.00,42554.57,42458.58,42458.85,163.32541,4.536125e+08,3.754343e+08,3.108361e+08,2.021011e+08,6.510441e+07,7.661924e+07,1.927887e+08,2.566216e+08,3.184219e+08,3.536189e+08
2024-01-01 00:20:00,42458.85,42491.10,42445.84,42473.93,117.27112,4.574555e+08,3.800394e+08,3.133549e+08,2.131649e+08,6.864037e+07,7.133745e+07,1.834275e+08,2.403538e+08,3.106494e+08,3.508125e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-31 23:40:00,93464.27,93504.00,93456.00,93490.87,23.44102,8.939847e+08,7.628307e+08,5.104306e+08,3.353413e+08,1.824000e+08,9.365534e+07,1.730503e+08,2.493083e+08,3.060481e+08,3.639250e+08
2024-12-31 23:45:00,93490.86,93544.50,93484.30,93544.49,23.00051,8.940680e+08,7.583909e+08,5.041382e+08,3.324905e+08,1.801727e+08,8.778805e+07,1.693768e+08,2.473854e+08,3.007457e+08,3.583649e+08
2024-12-31 23:50:00,93544.49,93702.15,93544.49,93646.96,20.99684,8.882012e+08,7.544450e+08,4.935072e+08,3.229468e+08,1.728211e+08,9.540240e+07,1.716078e+08,2.508386e+08,3.047005e+08,3.694190e+08
2024-12-31 23:55:00,93646.97,93676.98,93576.00,93576.00,17.03553,8.600473e+08,7.434125e+08,4.690769e+08,3.144676e+08,1.684542e+08,1.035354e+08,1.819212e+08,2.618028e+08,3.167798e+08,3.829981e+08


In [52]:
final_df.isna().sum()

open             0
high             0
low              0
close            0
volume           0
notional_-5    623
notional_-4    623
notional_-3    623
notional_-2    623
notional_-1    623
notional_1     623
notional_2     623
notional_3     623
notional_4     623
notional_5     623
dtype: int64

#### Gestione dei NaN

In [70]:
# Dato che so che per order_book_df manca interamente la data del 18 aprile 2024, tengo i dati dal 19 aprile
final_df_cut = final_df[final_df.index >= pd.to_datetime("2024-04-19 00:00:00")]
final_df_cut

Unnamed: 0_level_0,open,high,low,close,volume,notional_-5,notional_-4,notional_-3,notional_-2,notional_-1,notional_1,notional_2,notional_3,notional_4,notional_5
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2024-04-19 00:00:00,63470.09,63470.09,63272.60,63283.05,171.66048,2.805868e+08,2.546757e+08,2.235747e+08,1.712686e+08,8.151701e+07,8.958049e+07,2.323875e+08,2.885844e+08,3.980131e+08,4.338895e+08
2024-04-19 00:05:00,63283.05,63283.05,63072.99,63089.99,129.03663,2.979414e+08,2.626244e+08,2.317270e+08,1.866406e+08,8.880829e+07,7.741155e+07,2.146845e+08,2.813204e+08,3.904839e+08,4.277976e+08
2024-04-19 00:10:00,63090.00,63090.00,62912.64,62913.99,209.10677,3.093996e+08,2.624514e+08,2.300250e+08,1.892556e+08,8.843862e+07,7.034354e+07,1.965954e+08,2.765930e+08,3.623605e+08,4.223972e+08
2024-04-19 00:15:00,62914.00,63073.00,62913.99,63068.00,123.27576,3.168050e+08,2.667421e+08,2.336174e+08,1.932235e+08,9.231861e+07,7.493420e+07,1.840083e+08,2.711391e+08,3.150917e+08,4.182899e+08
2024-04-19 00:20:00,63068.00,63106.57,62990.95,62992.11,56.42708,3.300506e+08,2.808887e+08,2.477095e+08,2.006171e+08,1.016706e+08,7.919234e+07,2.027902e+08,2.755209e+08,3.523499e+08,4.187225e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-31 23:40:00,93464.27,93504.00,93456.00,93490.87,23.44102,8.939847e+08,7.628307e+08,5.104306e+08,3.353413e+08,1.824000e+08,9.365534e+07,1.730503e+08,2.493083e+08,3.060481e+08,3.639250e+08
2024-12-31 23:45:00,93490.86,93544.50,93484.30,93544.49,23.00051,8.940680e+08,7.583909e+08,5.041382e+08,3.324905e+08,1.801727e+08,8.778805e+07,1.693768e+08,2.473854e+08,3.007457e+08,3.583649e+08
2024-12-31 23:50:00,93544.49,93702.15,93544.49,93646.96,20.99684,8.882012e+08,7.544450e+08,4.935072e+08,3.229468e+08,1.728211e+08,9.540240e+07,1.716078e+08,2.508386e+08,3.047005e+08,3.694190e+08
2024-12-31 23:55:00,93646.97,93676.98,93576.00,93576.00,17.03553,8.600473e+08,7.434125e+08,4.690769e+08,3.144676e+08,1.684542e+08,1.035354e+08,1.819212e+08,2.618028e+08,3.167798e+08,3.829981e+08


In [71]:
final_df_cut.isna().sum()

open             0
high             0
low              0
close            0
volume           0
notional_-5    313
notional_-4    313
notional_-3    313
notional_-2    313
notional_-1    313
notional_1     313
notional_2     313
notional_3     313
notional_4     313
notional_5     313
dtype: int64

In [72]:
# Trova le righe con almeno un NaN
nan_mask = final_df_cut.isna().any(axis=1)

# Filtra gli indici dove il mask è True
indici_con_nan = final_df_cut.index[nan_mask]

# Mostra gli indici
print("Indici con almeno un NaN:")
indici_con_nan

Indici con almeno un NaN:


DatetimeIndex(['2024-04-19 07:50:00', '2024-05-02 02:00:00',
               '2024-05-03 03:30:00', '2024-05-16 02:25:00',
               '2024-05-16 02:30:00', '2024-05-16 05:50:00',
               '2024-05-16 05:55:00', '2024-05-21 06:40:00',
               '2024-05-21 06:45:00', '2024-06-12 00:05:00',
               ...
               '2024-09-12 05:35:00', '2024-09-12 05:40:00',
               '2024-09-12 06:20:00', '2024-09-12 06:25:00',
               '2024-09-30 07:45:00', '2024-09-30 07:50:00',
               '2024-10-21 02:10:00', '2024-10-21 02:15:00',
               '2024-12-18 06:40:00', '2024-12-18 06:45:00'],
              dtype='datetime64[ns]', name='timestamp', length=313, freq=None)

In [73]:
# Riempio i valori NaN che rimangono copiando il valore precedente
final_df_cut_filled = final_df_cut.fillna(method='ffill')
final_df_cut_filled

  final_df_cut_filled = final_df_cut.fillna(method='ffill')


Unnamed: 0_level_0,open,high,low,close,volume,notional_-5,notional_-4,notional_-3,notional_-2,notional_-1,notional_1,notional_2,notional_3,notional_4,notional_5
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2024-04-19 00:00:00,63470.09,63470.09,63272.60,63283.05,171.66048,2.805868e+08,2.546757e+08,2.235747e+08,1.712686e+08,8.151701e+07,8.958049e+07,2.323875e+08,2.885844e+08,3.980131e+08,4.338895e+08
2024-04-19 00:05:00,63283.05,63283.05,63072.99,63089.99,129.03663,2.979414e+08,2.626244e+08,2.317270e+08,1.866406e+08,8.880829e+07,7.741155e+07,2.146845e+08,2.813204e+08,3.904839e+08,4.277976e+08
2024-04-19 00:10:00,63090.00,63090.00,62912.64,62913.99,209.10677,3.093996e+08,2.624514e+08,2.300250e+08,1.892556e+08,8.843862e+07,7.034354e+07,1.965954e+08,2.765930e+08,3.623605e+08,4.223972e+08
2024-04-19 00:15:00,62914.00,63073.00,62913.99,63068.00,123.27576,3.168050e+08,2.667421e+08,2.336174e+08,1.932235e+08,9.231861e+07,7.493420e+07,1.840083e+08,2.711391e+08,3.150917e+08,4.182899e+08
2024-04-19 00:20:00,63068.00,63106.57,62990.95,62992.11,56.42708,3.300506e+08,2.808887e+08,2.477095e+08,2.006171e+08,1.016706e+08,7.919234e+07,2.027902e+08,2.755209e+08,3.523499e+08,4.187225e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-31 23:40:00,93464.27,93504.00,93456.00,93490.87,23.44102,8.939847e+08,7.628307e+08,5.104306e+08,3.353413e+08,1.824000e+08,9.365534e+07,1.730503e+08,2.493083e+08,3.060481e+08,3.639250e+08
2024-12-31 23:45:00,93490.86,93544.50,93484.30,93544.49,23.00051,8.940680e+08,7.583909e+08,5.041382e+08,3.324905e+08,1.801727e+08,8.778805e+07,1.693768e+08,2.473854e+08,3.007457e+08,3.583649e+08
2024-12-31 23:50:00,93544.49,93702.15,93544.49,93646.96,20.99684,8.882012e+08,7.544450e+08,4.935072e+08,3.229468e+08,1.728211e+08,9.540240e+07,1.716078e+08,2.508386e+08,3.047005e+08,3.694190e+08
2024-12-31 23:55:00,93646.97,93676.98,93576.00,93576.00,17.03553,8.600473e+08,7.434125e+08,4.690769e+08,3.144676e+08,1.684542e+08,1.035354e+08,1.819212e+08,2.618028e+08,3.167798e+08,3.829981e+08


In [None]:
# Verifico che effettivamente ora non ci siano più NaN
final_df_cut_filled.isna().sum()

open           0
high           0
low            0
close          0
volume         0
notional_-5    0
notional_-4    0
notional_-3    0
notional_-2    0
notional_-1    0
notional_1     0
notional_2     0
notional_3     0
notional_4     0
notional_5     0
dtype: int64

#### Unione di metrics_df

In [75]:
final_df_2 = final_df_cut_filled.join(metrics_df, how="inner")
final_df_2

Unnamed: 0_level_0,open,high,low,close,volume,notional_-5,notional_-4,notional_-3,notional_-2,notional_-1,notional_1,notional_2,notional_3,notional_4,notional_5,sum_open_interest_value,count_toptrader_long_short_ratio,sum_toptrader_long_short_ratio,count_long_short_ratio,sum_taker_long_short_vol_ratio
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2024-04-19 00:00:00,63470.09,63470.09,63272.60,63283.05,171.66048,2.805868e+08,2.546757e+08,2.235747e+08,1.712686e+08,8.151701e+07,8.958049e+07,2.323875e+08,2.885844e+08,3.980131e+08,4.338895e+08,4.291016e+09,1.591518,1.138015,1.602177,0.649644
2024-04-19 00:05:00,63283.05,63283.05,63072.99,63089.99,129.03663,2.979414e+08,2.626244e+08,2.317270e+08,1.866406e+08,8.880829e+07,7.741155e+07,2.146845e+08,2.813204e+08,3.904839e+08,4.277976e+08,4.282146e+09,1.592211,1.140545,1.601198,0.596355
2024-04-19 00:10:00,63090.00,63090.00,62912.64,62913.99,209.10677,3.093996e+08,2.624514e+08,2.300250e+08,1.892556e+08,8.843862e+07,7.034354e+07,1.965954e+08,2.765930e+08,3.623605e+08,4.223972e+08,4.272457e+09,1.596364,1.137717,1.596248,0.396320
2024-04-19 00:15:00,62914.00,63073.00,62913.99,63068.00,123.27576,3.168050e+08,2.667421e+08,2.336174e+08,1.932235e+08,9.231861e+07,7.493420e+07,1.840083e+08,2.711391e+08,3.150917e+08,4.182899e+08,4.264116e+09,1.595923,1.143222,1.590101,0.782142
2024-04-19 00:20:00,63068.00,63106.57,62990.95,62992.11,56.42708,3.300506e+08,2.808887e+08,2.477095e+08,2.006171e+08,1.016706e+08,7.919234e+07,2.027902e+08,2.755209e+08,3.523499e+08,4.187225e+08,4.272201e+09,1.607132,1.143171,1.609152,1.378507
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-31 23:35:00,93546.03,93546.03,93456.02,93464.26,37.27614,8.928054e+08,7.590884e+08,4.991608e+08,3.300921e+08,1.791560e+08,9.927997e+07,1.716579e+08,2.532281e+08,3.071396e+08,3.720481e+08,8.535339e+09,1.799981,2.106589,1.807930,0.395590
2024-12-31 23:40:00,93464.27,93504.00,93456.00,93490.87,23.44102,8.939847e+08,7.628307e+08,5.104306e+08,3.353413e+08,1.824000e+08,9.365534e+07,1.730503e+08,2.493083e+08,3.060481e+08,3.639250e+08,8.527129e+09,1.802805,2.109118,1.809816,1.074214
2024-12-31 23:45:00,93490.86,93544.50,93484.30,93544.49,23.00051,8.940680e+08,7.583909e+08,5.041382e+08,3.324905e+08,1.801727e+08,8.778805e+07,1.693768e+08,2.473854e+08,3.007457e+08,3.583649e+08,8.529586e+09,1.807319,2.110396,1.815404,0.947209
2024-12-31 23:50:00,93544.49,93702.15,93544.49,93646.96,20.99684,8.882012e+08,7.544450e+08,4.935072e+08,3.229468e+08,1.728211e+08,9.540240e+07,1.716078e+08,2.508386e+08,3.047005e+08,3.694190e+08,8.534817e+09,1.810354,2.109759,1.819173,1.414375


In [76]:
final_df_2.isna().sum()

open                                 0
high                                 0
low                                  0
close                                0
volume                               0
notional_-5                          0
notional_-4                          0
notional_-3                          0
notional_-2                          0
notional_-1                          0
notional_1                           0
notional_2                           0
notional_3                           0
notional_4                           0
notional_5                           0
sum_open_interest_value              0
count_toptrader_long_short_ratio     1
sum_toptrader_long_short_ratio      11
count_long_short_ratio               1
sum_taker_long_short_vol_ratio       0
dtype: int64

#### Gestione dei NaN

In [77]:
final_df_2_filled = final_df_2.fillna(method='ffill')
final_df_2_filled

  final_df_2_filled = final_df_2.fillna(method='ffill')


Unnamed: 0_level_0,open,high,low,close,volume,notional_-5,notional_-4,notional_-3,notional_-2,notional_-1,notional_1,notional_2,notional_3,notional_4,notional_5,sum_open_interest_value,count_toptrader_long_short_ratio,sum_toptrader_long_short_ratio,count_long_short_ratio,sum_taker_long_short_vol_ratio
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2024-04-19 00:00:00,63470.09,63470.09,63272.60,63283.05,171.66048,2.805868e+08,2.546757e+08,2.235747e+08,1.712686e+08,8.151701e+07,8.958049e+07,2.323875e+08,2.885844e+08,3.980131e+08,4.338895e+08,4.291016e+09,1.591518,1.138015,1.602177,0.649644
2024-04-19 00:05:00,63283.05,63283.05,63072.99,63089.99,129.03663,2.979414e+08,2.626244e+08,2.317270e+08,1.866406e+08,8.880829e+07,7.741155e+07,2.146845e+08,2.813204e+08,3.904839e+08,4.277976e+08,4.282146e+09,1.592211,1.140545,1.601198,0.596355
2024-04-19 00:10:00,63090.00,63090.00,62912.64,62913.99,209.10677,3.093996e+08,2.624514e+08,2.300250e+08,1.892556e+08,8.843862e+07,7.034354e+07,1.965954e+08,2.765930e+08,3.623605e+08,4.223972e+08,4.272457e+09,1.596364,1.137717,1.596248,0.396320
2024-04-19 00:15:00,62914.00,63073.00,62913.99,63068.00,123.27576,3.168050e+08,2.667421e+08,2.336174e+08,1.932235e+08,9.231861e+07,7.493420e+07,1.840083e+08,2.711391e+08,3.150917e+08,4.182899e+08,4.264116e+09,1.595923,1.143222,1.590101,0.782142
2024-04-19 00:20:00,63068.00,63106.57,62990.95,62992.11,56.42708,3.300506e+08,2.808887e+08,2.477095e+08,2.006171e+08,1.016706e+08,7.919234e+07,2.027902e+08,2.755209e+08,3.523499e+08,4.187225e+08,4.272201e+09,1.607132,1.143171,1.609152,1.378507
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-31 23:35:00,93546.03,93546.03,93456.02,93464.26,37.27614,8.928054e+08,7.590884e+08,4.991608e+08,3.300921e+08,1.791560e+08,9.927997e+07,1.716579e+08,2.532281e+08,3.071396e+08,3.720481e+08,8.535339e+09,1.799981,2.106589,1.807930,0.395590
2024-12-31 23:40:00,93464.27,93504.00,93456.00,93490.87,23.44102,8.939847e+08,7.628307e+08,5.104306e+08,3.353413e+08,1.824000e+08,9.365534e+07,1.730503e+08,2.493083e+08,3.060481e+08,3.639250e+08,8.527129e+09,1.802805,2.109118,1.809816,1.074214
2024-12-31 23:45:00,93490.86,93544.50,93484.30,93544.49,23.00051,8.940680e+08,7.583909e+08,5.041382e+08,3.324905e+08,1.801727e+08,8.778805e+07,1.693768e+08,2.473854e+08,3.007457e+08,3.583649e+08,8.529586e+09,1.807319,2.110396,1.815404,0.947209
2024-12-31 23:50:00,93544.49,93702.15,93544.49,93646.96,20.99684,8.882012e+08,7.544450e+08,4.935072e+08,3.229468e+08,1.728211e+08,9.540240e+07,1.716078e+08,2.508386e+08,3.047005e+08,3.694190e+08,8.534817e+09,1.810354,2.109759,1.819173,1.414375


In [None]:
# Verifico che effettivamente ora non ci siano più NaN
final_df_2_filled.isna().sum()

open                                0
high                                0
low                                 0
close                               0
volume                              0
notional_-5                         0
notional_-4                         0
notional_-3                         0
notional_-2                         0
notional_-1                         0
notional_1                          0
notional_2                          0
notional_3                          0
notional_4                          0
notional_5                          0
sum_open_interest_value             0
count_toptrader_long_short_ratio    0
sum_toptrader_long_short_ratio      0
count_long_short_ratio              0
sum_taker_long_short_vol_ratio      0
dtype: int64

In [79]:
# Esporto il dataset pulito finale con cui farò machine learning
# final_df_2_filled.to_csv('Bitcoin (USDT) arricchito 2024 timeframe 5m.csv')

### Ottengo l'order book in tempo reale

#### Binance

In [None]:
async def stream_order_book(symbol="BTCUSDT", save_interval=60): # Il parametro save_interval=60 nel codice fa salvare i dati su file CSV ogni 60 secondi
    client = await AsyncClient.create()
    bm = BinanceSocketManager(client)
    ob_stream = bm.depth_socket(symbol, interval=100)
    
    order_book_data = []
    last_save = time.time()
    
    try:
        async with ob_stream as stream:
            while True:
                res = await stream.recv()
                data = {
                "timestamp": res["E"],
                "bids": [[float(price), float(qty)] for price, qty in res["b"]],
                "asks": [[float(price), float(qty)] for price, qty in res["a"]]
            }
                order_book_data.append(data)
                
                if time.time() - last_save > save_interval:
                    # Formatta il timestamp senza caratteri illegali
                    timestamp = pd.Timestamp.now().strftime("%Y-%m-%d_%H-%M-%S")
                    df = pd.DataFrame(order_book_data)
                    df.to_csv(f"order_book_binance_{symbol}_{timestamp}.csv", index=False)
                    order_book_data = []
                    last_save = time.time()
                    break
    finally:
        await client.close_connection()

In [16]:
# Esegui con await (in Jupyter)
nest_asyncio.apply()
await stream_order_book()

In [None]:
realtime_binance = pd.read_csv('order_book_binance_BTCUSDT_2025-03-13_15-48-18.csv')
realtime_binance

Unnamed: 0,timestamp,bids,asks
0,1741877238514,"[[82108.99, 0.60797], [82108.55, 0.12811], [82...","[[82130.24, 0.0006], [82131.98, 0.03055], [821..."
1,1741877238614,"[[82108.99, 0.93205], [82108.98, 0.00058], [82...","[[82109.0, 2.45228], [82111.16, 0.0622], [8211..."
2,1741877238714,"[[82108.99, 1.16798], [82108.98, 0.00048]]","[[82109.54, 0.0012], [82248.54, 0.0], [82255.6..."
3,1741877238814,"[[82108.99, 1.16806], [82108.92, 7e-05], [8210...","[[82109.0, 2.00184], [82111.2, 0.0487], [82112..."
4,1741877238914,"[[82108.99, 2.64556], [82108.92, 7e-05], [8210...","[[82109.0, 1.37229], [82109.02, 0.28092], [821..."
...,...,...,...
586,1741877297114,"[[81893.16, 7e-05], [81893.15, 0.80607], [8188...","[[81894.8, 1.36445], [81894.85, 0.00014], [818..."
587,1741877297214,"[[81894.27, 0.39285], [81886.59, 0.0], [81885....","[[81899.99, 7e-05], [81900.0, 0.0488], [81900...."
588,1741877297314,"[[81886.0, 0.06105], [81884.1, 0.1221], [81878...","[[81894.8, 1.08618], [81894.9, 0.00014], [8189..."
589,1741877297414,"[[81838.88, 0.66968], [81814.57, 0.041], [8181...","[[81894.8, 1.08712], [81895.71, 0.0], [82080.6..."


In [16]:
nest_asyncio.apply()

async def stream_order_book(symbol="BTCUSDT", duration=300):
    client = await AsyncClient.create()
    bm = BinanceSocketManager(client)
    ob_stream = bm.depth_socket(symbol, interval=100)
    
    order_book_data = []
    start_time = time.time()
    
    try:
        async with ob_stream as stream:
            print(f"Avvio raccolta dati per {duration} secondi...")
            while time.time() - start_time < duration:
                res = await stream.recv()
                data = {
                    "timestamp": res["E"],
                    "bids": [[float(price), float(qty)] for price, qty in res["b"]],
                    "asks": [[float(price), float(qty)] for price, qty in res["a"]]
                }
                order_book_data.append(data)
                
    finally:
        await client.close_connection()
        print("Connessione chiusa")
    
    # Crea DataFrame finale
    final_df = pd.DataFrame(order_book_data)
    
    if not final_df.empty:
        final_df['timestamp'] = pd.to_datetime(final_df['timestamp'], unit='ms')
        print("\nAnteprima del DataFrame:")
        display(final_df)
    else:
        print("Nessun dato raccolto durante l'intervallo specificato")
    
    return final_df

In [17]:
# Esegui la raccolta dati (in Jupyter)
final_df = await stream_order_book(duration=10)

Avvio raccolta dati per 10 secondi...
Connessione chiusa

Anteprima del DataFrame:


Unnamed: 0,timestamp,bids,asks
0,2025-03-13 17:16:06.014,"[[80517.61, 1.68661], [80517.6, 0.00028], [805...","[[80506.25, 0.0], [80506.26, 0.0], [80506.96, ..."
1,2025-03-13 17:16:06.114,"[[80517.61, 2.3201], [80517.6, 0.00069], [8051...","[[80517.62, 1.38322], [80517.63, 0.00066], [80..."
2,2025-03-13 17:16:06.214,"[[80517.61, 2.64767], [80510.9, 1.62137], [805...","[[80517.62, 1.38319], [80517.63, 0.00066], [80..."
3,2025-03-13 17:16:06.314,"[[80517.61, 2.70607], [80498.39, 0.0003], [804...","[[80518.6, 0.00307], [80519.05, 7e-05], [80519..."
4,2025-03-13 17:16:06.414,"[[80510.91, 0.90176], [80506.8, 7e-05], [80487...","[[80517.62, 1.68959], [80524.0, 0.0496], [8052..."
...,...,...,...
85,2025-03-13 17:16:14.529,"[[80510.91, 0.0], [80510.9, 0.0], [80510.87, 0...","[[80500.01, 0.70758], [80500.02, 0.47831], [80..."
86,2025-03-13 17:16:14.614,"[[80500.0, 14.76637], [80499.99, 0.00096], [80...","[[80500.01, 6.43545], [80500.02, 0.00034], [80..."
87,2025-03-13 17:16:14.714,"[[80500.0, 15.05717], [80499.72, 0.00014], [80...","[[80500.01, 4.87424], [80500.02, 0.00034], [80..."
88,2025-03-13 17:16:14.814,"[[80499.99, 0.00109], [80499.84, 0.00014], [80...","[[80500.01, 4.10777], [80500.02, 0.00047], [80..."


In [None]:
# Calcola lo spread tra bid e ask
final_df['best_bid'] = final_df['bids'].apply(lambda x: x[0][0])
final_df['best_ask'] = final_df['asks'].apply(lambda x: x[0][0])
final_df['spread'] = final_df['best_ask'] - final_df['best_bid']

print(f"\nSpread medio: {final_df['spread'].mean():.4f}")
final_df


Spread medio: 75.6744
Numero totale di osservazioni: 90


Unnamed: 0,timestamp,bids,asks,best_bid,best_ask,spread
0,2025-03-13 17:16:06.014,"[[80517.61, 1.68661], [80517.6, 0.00028], [805...","[[80506.25, 0.0], [80506.26, 0.0], [80506.96, ...",80517.61,80506.25,-11.36
1,2025-03-13 17:16:06.114,"[[80517.61, 2.3201], [80517.6, 0.00069], [8051...","[[80517.62, 1.38322], [80517.63, 0.00066], [80...",80517.61,80517.62,0.01
2,2025-03-13 17:16:06.214,"[[80517.61, 2.64767], [80510.9, 1.62137], [805...","[[80517.62, 1.38319], [80517.63, 0.00066], [80...",80517.61,80517.62,0.01
3,2025-03-13 17:16:06.314,"[[80517.61, 2.70607], [80498.39, 0.0003], [804...","[[80518.6, 0.00307], [80519.05, 7e-05], [80519...",80517.61,80518.60,0.99
4,2025-03-13 17:16:06.414,"[[80510.91, 0.90176], [80506.8, 7e-05], [80487...","[[80517.62, 1.68959], [80524.0, 0.0496], [8052...",80510.91,80517.62,6.71
...,...,...,...,...,...,...
85,2025-03-13 17:16:14.529,"[[80510.91, 0.0], [80510.9, 0.0], [80510.87, 0...","[[80500.01, 0.70758], [80500.02, 0.47831], [80...",80510.91,80500.01,-10.90
86,2025-03-13 17:16:14.614,"[[80500.0, 14.76637], [80499.99, 0.00096], [80...","[[80500.01, 6.43545], [80500.02, 0.00034], [80...",80500.00,80500.01,0.01
87,2025-03-13 17:16:14.714,"[[80500.0, 15.05717], [80499.72, 0.00014], [80...","[[80500.01, 4.87424], [80500.02, 0.00034], [80...",80500.00,80500.01,0.01
88,2025-03-13 17:16:14.814,"[[80499.99, 0.00109], [80499.84, 0.00014], [80...","[[80500.01, 4.10777], [80500.02, 0.00047], [80...",80499.99,80500.01,0.02


#### Bybit

In [19]:
def handle_order_book(message):
    global order_book_data
    
    if "data" not in message or "ts" not in message:
        return  # Rimuoviamo i log non essenziali
        
    try:
        data = message["data"]
        bids = [[float(price), float(qty)] for price, qty in data.get("b", [])]
        asks = [[float(price), float(qty)] for price, qty in data.get("a", [])]
        
        if bids and asks:
            order_book_data.append({
                "timestamp": message["ts"],
                "bids": bids,
                "asks": asks
            })
                
    except Exception as e:
        print(f"Errore: {str(e)}")  # Manteniamo solo gli errori critici

In [20]:
symbol = "BTCUSDT"
duration = 10  # Tempo di esecuzione in secondi (modifica a piacere)
order_book_data = []



# Configurazione WebSocket
ws = WebSocket(
    channel_type="linear",
    testnet=False,
    ping_interval=20,
    ping_timeout=15
)

ws.orderbook_stream(50, symbol, handle_order_book)

print(f"Avvio raccolta dati per {duration} secondi...")
start_time = time.time()

try:
    while time.time() - start_time < duration:
        time.sleep(0.1)  # Riduciamo il carico sulla CPU
except KeyboardInterrupt:
    print("\nInterruzione manuale rilevata")
finally:
    ws.exit()
    print("Connessione chiusa")

# Creazione del DataFrame finale
final_df = pd.DataFrame(order_book_data)

# Opzionale: conversione timestamp
if not final_df.empty:
    final_df['timestamp'] = pd.to_datetime(final_df['timestamp'], unit='ms')
    print("\nAnteprima del DataFrame:")
    display(final_df)
else:
    print("Nessun dato raccolto durante l'intervallo specificato")

INFO:pybit._websocket_stream:WebSocket Unified V5 attempting connection...
INFO:websocket:Websocket connected
INFO:pybit._websocket_stream:WebSocket Unified V5 connected


Avvio raccolta dati per 10 secondi...
Connessione chiusa

Anteprima del DataFrame:


Unnamed: 0,timestamp,bids,asks
0,2025-03-13 17:18:38.833,"[[80216.6, 3.776], [80216.2, 0.009], [80216.1,...","[[80216.7, 3.09], [80218.0, 0.084], [80219.6, ..."
1,2025-03-13 17:18:38.853,"[[80216.6, 4.023], [80216.2, 0.009], [80216.1,...","[[80216.7, 3.134], [80218.0, 0.084], [80219.6,..."
2,2025-03-13 17:18:38.873,"[[80216.6, 4.023], [80216.2, 0.007], [80216.1,...","[[80216.7, 3.09], [80218.0, 0.084], [80219.6, ..."
3,2025-03-13 17:18:38.894,"[[80216.6, 4.032], [80216.1, 0.488], [80216.0,...","[[80216.7, 3.09], [80218.0, 0.084], [80219.6, ..."
4,2025-03-13 17:18:38.913,"[[80216.6, 4.032], [80216.1, 0.488], [80216.0,...","[[80216.7, 3.215], [80218.0, 0.084], [80219.6,..."
...,...,...,...
478,2025-03-13 17:18:48.653,"[[80231.7, 0.27], [80229.4, 0.004], [80228.7, ...","[[80244.9, 2.416], [80248.0, 0.084], [80248.8,..."
479,2025-03-13 17:18:48.673,"[[80231.7, 0.27], [80229.4, 0.004], [80228.7, ...","[[80244.9, 2.416], [80248.0, 0.084], [80248.8,..."
480,2025-03-13 17:18:48.693,"[[80231.7, 0.27], [80229.4, 0.004], [80228.7, ...","[[80244.9, 2.416], [80248.0, 0.084], [80248.8,..."
481,2025-03-13 17:18:48.733,"[[80231.7, 0.27], [80229.4, 0.004], [80228.7, ...","[[80244.9, 2.356], [80248.0, 0.084], [80248.8,..."


In [21]:
# Analizza lo spread medio
if not final_df.empty:
    final_df['best_bid'] = final_df['bids'].apply(lambda x: x[0][0])
    final_df['best_ask'] = final_df['asks'].apply(lambda x: x[0][0])
    final_df['spread'] = final_df['best_ask'] - final_df['best_bid']
    print(f"\nSpread medio: {final_df['spread'].mean():.2f}")

final_df


Spread medio: 12.81


Unnamed: 0,timestamp,bids,asks,best_bid,best_ask,spread
0,2025-03-13 17:18:38.833,"[[80216.6, 3.776], [80216.2, 0.009], [80216.1,...","[[80216.7, 3.09], [80218.0, 0.084], [80219.6, ...",80216.6,80216.7,0.1
1,2025-03-13 17:18:38.853,"[[80216.6, 4.023], [80216.2, 0.009], [80216.1,...","[[80216.7, 3.134], [80218.0, 0.084], [80219.6,...",80216.6,80216.7,0.1
2,2025-03-13 17:18:38.873,"[[80216.6, 4.023], [80216.2, 0.007], [80216.1,...","[[80216.7, 3.09], [80218.0, 0.084], [80219.6, ...",80216.6,80216.7,0.1
3,2025-03-13 17:18:38.894,"[[80216.6, 4.032], [80216.1, 0.488], [80216.0,...","[[80216.7, 3.09], [80218.0, 0.084], [80219.6, ...",80216.6,80216.7,0.1
4,2025-03-13 17:18:38.913,"[[80216.6, 4.032], [80216.1, 0.488], [80216.0,...","[[80216.7, 3.215], [80218.0, 0.084], [80219.6,...",80216.6,80216.7,0.1
...,...,...,...,...,...,...
478,2025-03-13 17:18:48.653,"[[80231.7, 0.27], [80229.4, 0.004], [80228.7, ...","[[80244.9, 2.416], [80248.0, 0.084], [80248.8,...",80231.7,80244.9,13.2
479,2025-03-13 17:18:48.673,"[[80231.7, 0.27], [80229.4, 0.004], [80228.7, ...","[[80244.9, 2.416], [80248.0, 0.084], [80248.8,...",80231.7,80244.9,13.2
480,2025-03-13 17:18:48.693,"[[80231.7, 0.27], [80229.4, 0.004], [80228.7, ...","[[80244.9, 2.416], [80248.0, 0.084], [80248.8,...",80231.7,80244.9,13.2
481,2025-03-13 17:18:48.733,"[[80231.7, 0.27], [80229.4, 0.004], [80228.7, ...","[[80244.9, 2.356], [80248.0, 0.084], [80248.8,...",80231.7,80244.9,13.2


#### Bozza di strategia

In [22]:
# Configurazione
symbol = "BTCUSDT"
initial_balance = 100  # USD
leverage = 5  # Max leva consigliata
risk_per_trade = 0.02  # 2% del capitale per trade
position_history = []
current_balance = initial_balance

# Indicatori strategia
def calculate_order_book_metrics(bids, asks):
    best_bid = bids[0][0]
    best_ask = asks[0][0]
    spread = best_ask - best_bid
    
    # Calcola profondità dell'order book
    depth_level = 0.005  # 0.5%
    bid_depth = sum(qty for price, qty in bids if price >= best_bid * (1 - depth_level))
    ask_depth = sum(qty for price, qty in asks if price <= best_ask * (1 + depth_level))
    
    # Order Book Imbalance
    ob_imbalance = (bid_depth - ask_depth) / (bid_depth + ask_depth)
    
    # Volume Weighted Mid Price
    vwap_bid = sum(price*qty for price, qty in bids) / sum(qty for _, qty in bids)
    vwap_ask = sum(price*qty for price, qty in asks) / sum(qty for _, qty in asks)
    fair_value = (vwap_bid + vwap_ask) / 2
    
    return {
        'spread': spread,
        'ob_imbalance': ob_imbalance,
        'fair_value': fair_value,
        'bid_depth': bid_depth,
        'ask_depth': ask_depth
    }

# Strategy Logic
def trading_decision(data):
    global current_balance
    
    # Calcola indicatori
    metrics = calculate_order_book_metrics(data['bids'], data['asks'])
    
    # Segnale di trading
    signal = "HOLD"
    entry_price = None
    stop_loss = None
    take_profit = None
    
    # Condizione Long: Squilibrio positivo e spread stretto
    if metrics['ob_imbalance'] > 0.2 and metrics['spread'] < 0.0005:
        signal = "LONG"
        entry_price = data['asks'][0][0]  # Prezzo di mercato
        stop_loss = entry_price * (1 - 0.005)  # 0.5% stop loss
        take_profit = entry_price * (1 + 0.015)  # 1.5% take profit
        
    # Condizione Short: Squilibrio negativo e spread stretto
    elif metrics['ob_imbalance'] < -0.2 and metrics['spread'] < 0.0005:
        signal = "SHORT"
        entry_price = data['bids'][0][0]  # Prezzo di mercato
        stop_loss = entry_price * (1 + 0.005)  # 0.5% stop loss
        take_profit = entry_price * (1 - 0.015)  # 1.5% take profit
    
    # Calcolo dimensione posizione
    if signal != "HOLD":
        risk_amount = current_balance * risk_per_trade
        price_diff = abs(entry_price - stop_loss)
        position_size = (risk_amount / price_diff) * leverage
        
        return {
            'signal': signal,
            'size': round(position_size, 3),
            'entry': entry_price,
            'stop_loss': stop_loss,
            'take_profit': take_profit,
            'leverage': leverage,
            'timestamp': data['timestamp']
        }
    
    return None

# Gestione Order Book
def handle_order_book(message):
    global order_book_data
    
    if "data" not in message or "ts" not in message:
        return
        
    try:
        data = message["data"]
        bids = sorted([[float(p), float(q)] for p, q in data.get("b", [])], reverse=True)
        asks = sorted([[float(p), float(q)] for p, q in data.get("a", [])])
        
        if bids and asks:
            ob_data = {
                "timestamp": message["ts"],
                "bids": bids,
                "asks": asks
            }
            
            # Prendi decisione di trading
            decision = trading_decision(ob_data)
            if decision:
                execute_trade(decision)
                
            order_book_data.append(ob_data)
                
    except Exception as e:
        print(f"Errore: {str(e)}")

# Simulazione esecuzione trade
def execute_trade(decision):
    global current_balance, position_history
    
    print(f"\n--- NUOVA OPERAZIONE ---")
    print(f"Direzione: {decision['signal']}")
    print(f"Dimensione: {decision['size']} contratti")
    print(f"Entry Price: {decision['entry']}")
    print(f"Stop Loss: {decision['stop_loss']}")
    print(f"Take Profit: {decision['take_profit']}")
    
    # Aggiungi logica di chiusura posizione (simulata)
    position_history.append(decision)
    
    # Simula chiusura posizione dopo 5 minuti o a raggiungimento TP/SL
    # (Nella realtà va implementata con WebSocket per price updates)

# Configurazione WebSocket
ws = WebSocket(
    channel_type="linear",
    testnet=True,  # Usa testnet per demo
    ping_interval=20,
    ping_timeout=15
)

ws.orderbook_stream(50, symbol, handle_order_book)

print(f"Avvio trading automatico...")
start_time = time.time()

try:
    while True:
        time.sleep(0.1)
        # Aggiorna dashboard ogni 10 secondi
        if time.time() - start_time > 10:
            print(f"\nStato Attuale: Capitale ${current_balance:.2f}")
            print(f"Operazioni Effettuate: {len(position_history)}")
            start_time = time.time()
            
except KeyboardInterrupt:
    print("\nInterruzione manuale rilevata")
finally:
    ws.exit()
    print("Connessione chiusa")
    
    # Salva storico trade
    if position_history:
        pd.DataFrame(position_history).to_csv("trading_history.csv", index=False)

INFO:pybit._websocket_stream:WebSocket Unified V5 attempting connection...
INFO:websocket:Websocket connected
INFO:pybit._websocket_stream:WebSocket Unified V5 connected


Avvio trading automatico...

Stato Attuale: Capitale $100.00
Operazioni Effettuate: 0

Stato Attuale: Capitale $100.00
Operazioni Effettuate: 0

Stato Attuale: Capitale $100.00
Operazioni Effettuate: 0

Interruzione manuale rilevata
Connessione chiusa
