In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ccxt
import time
from itertools import product

In [2]:
def download_and_plot(cryptos, market, timeframe_download, start_date, end_date, plot):
    
    # exchange = ccxt.binance()  # Usa Binance come exchange
    exchange_class = getattr(ccxt, market)  # Ottieni la classe dell'exchange
    exchange = exchange_class()  # Crea un'istanza dell'exchange
    exchange.load_markets()  # Carica i mercati disponibili
    cryptos_pairs = [f"{crypto}/USDT" for crypto in cryptos]  # Adatta i simboli per Binance

    # Converti date in timestamp
    start_timestamp = int(pd.Timestamp(start_date).timestamp() * 1000)
    end_timestamp = int(pd.Timestamp(end_date).timestamp() * 1000)

    data_frames = {}

    for pair in cryptos_pairs:
        if pair not in exchange.markets:
            raise ValueError(f"Il mercato {pair} non è disponibile su Binance.")

        print(f"Scarico i dati per {pair} dal {start_date} al {end_date}...")
        
        ohlcv = []
        since = start_timestamp

        # Scarica i dati in blocchi fino a raggiungere la data di fine
        while since < end_timestamp:
            batch = exchange.fetch_ohlcv(pair, timeframe_download, since)
            if not batch:
                break
            ohlcv.extend(batch)
            since = batch[-1][0] + 1  # Avanza al prossimo blocco
            time.sleep(exchange.rateLimit / 1000)  # Rispetta il rate limit dell'API

        # Crea un DataFrame dai dati
        df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
        df.set_index('timestamp', inplace=True)

        # Filtra i dati oltre la data di fine
        df = df[df.index <= pd.Timestamp(end_date)]
        data_frames[pair] = df

        if plot:
            # Plot dei dati
            plt.figure(figsize=(14, 8))

            # Grafico della prima crypto
            plt.subplot(3, 1, 1)
            plt.plot(data_frames[pair].index, data_frames[pair]['close'], label=f"{pair} Price")
            plt.title(f"{pair} Price")
            plt.ylabel('Price (USDT)')
            plt.grid()
            # plt.legend()

            plt.tight_layout()
            plt.show()

    return {k.replace('/USDT', ''): v for k, v in data_frames.items()}

In [3]:
initial_allocations = {'BTC': 1/6, 'ETH': 1/6, 'SOL': 1/6, 'DOGE': 1/6, 'LINK': 1/6, 'AXS': 1/6}
# initial_allocations = {'SOL': 1/4, 'DOGE': 1/4, 'LINK': 1/4, 'SIDUS': 1/4}
cryptos = list(initial_allocations.keys())  # Specifica le crypto
timeframe_download = '1h'  # Specifica il timeframe ('1m', '5m', '1h', '1d', etc.)
start_date = '2021-01-01'  # Data di inizio
end_date = '2021-12-31'  # Data di fine
plot = False
market = 'binance'

data_frames = download_and_plot(cryptos, market, timeframe_download, start_date, end_date, plot)
crypto_prices = pd.DataFrame()
crypto_prices['timestamp'] = data_frames[cryptos[0]].index
for crypto in cryptos:
    crypto_prices[crypto] = data_frames[crypto]['close'].values
crypto_prices.set_index('timestamp', inplace=True)

crypto_prices

Scarico i dati per BTC/USDT dal 2021-01-01 al 2021-12-31...
Scarico i dati per ETH/USDT dal 2021-01-01 al 2021-12-31...
Scarico i dati per SOL/USDT dal 2021-01-01 al 2021-12-31...
Scarico i dati per DOGE/USDT dal 2021-01-01 al 2021-12-31...
Scarico i dati per LINK/USDT dal 2021-01-01 al 2021-12-31...
Scarico i dati per AXS/USDT dal 2021-01-01 al 2021-12-31...


Unnamed: 0_level_0,BTC,ETH,SOL,DOGE,LINK,AXS
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01 00:00:00,28995.13,734.07,1.5442,0.004679,11.2812,0.53441
2021-01-01 01:00:00,29409.99,748.28,1.5734,0.004737,11.4735,0.53366
2021-01-01 02:00:00,29194.65,744.06,1.5872,0.004755,11.5773,0.53321
2021-01-01 03:00:00,29278.40,744.82,1.6029,0.004978,11.5006,0.54173
2021-01-01 04:00:00,29220.31,742.29,1.6022,0.004894,11.4575,0.53912
...,...,...,...,...,...,...
2021-12-30 20:00:00,47203.16,3724.78,173.1900,0.171100,19.8700,95.08000
2021-12-30 21:00:00,47300.00,3736.93,173.2000,0.171800,19.9900,95.53000
2021-12-30 22:00:00,47084.88,3703.83,171.6800,0.171000,19.8600,94.73000
2021-12-30 23:00:00,47120.87,3709.27,172.5200,0.171100,19.9300,95.00000


In [4]:
# Definizione delle funzioni di feature
def ma_pct_change(prices, window):
    return prices.pct_change().rolling(window=window).mean()

def mstd_pct_change(prices, window):
    return prices.pct_change().rolling(window=window).std()

def ma_prices(prices, window):
    return prices.rolling(window=window).mean()

def mstd_prices(prices, window):
    return prices.rolling(window=window).std()

def ma_ma_prices(prices, window):
    return prices.rolling(window=window).mean().rolling(window=window).mean()

def mstd_mstd_prices(prices, window):
    return prices.rolling(window=window).std().rolling(window=window).std()

def ma_mstd_prices(prices, window):
    return prices.rolling(window=window).std().rolling(window=window).mean()

def mstd_ma_prices(prices, window):
    return prices.rolling(window=window).mean().rolling(window=window).std()

def ma_derivative(prices, window):
    first_derivative = prices.diff()
    return first_derivative.rolling(window=window).mean()

def mstd_derivative(prices, window):
    first_derivative = prices.diff()
    return first_derivative.rolling(window=window).std()

def ma_second_derivative(prices, window):
    second_derivative = prices.diff().diff()
    return second_derivative.rolling(window=window).mean()

def mstd_second_derivative(prices, window):
    second_derivative = prices.diff().diff()
    return second_derivative.rolling(window=window).std()

def ma_third_derivative(prices, window):
    third_derivative = prices.diff().diff().diff()
    return third_derivative.rolling(window=window).mean()

def mstd_third_derivative(prices, window):
    third_derivative = prices.diff().diff().diff()
    return third_derivative.rolling(window=window).std()

In [5]:
def analyze_crypto_patterns(crypto_prices, feature_func, window, F):
    results = {}

    for crypto in crypto_prices.columns:
        prices = crypto_prices[crypto]

        # Calcolo della feature scelta
        feature_values = feature_func(prices, window)

        # Shift dei prezzi di F giorni per vedere il futuro
        future_prices = prices.shift(-F)

        # Segnale attuale: positivo (1) o negativo (-1)
        signal = np.where(feature_values > 0, 1, -1)

        # Verifica del risultato F giorni dopo
        future_result = np.where(future_prices > prices, 1, -1)

        # Confronto tra segnale e risultato futuro
        valid_indices = ~np.isnan(feature_values) & ~np.isnan(future_prices)
        if valid_indices.sum() > 0:
            accuracy = np.mean(signal[valid_indices] == future_result[valid_indices]) * 100
        else:
            accuracy = np.nan

        results[crypto] = accuracy

    return results

In [6]:
def optimize_parameters(crypto_prices, feature_functions, window_list, F_list):
    results_data = []

    # Testa tutte le combinazioni di feature, window e F
    for feature_name, feature_func in feature_functions.items():
        for window, F in product(window_list, F_list):
            accuracies = analyze_crypto_patterns(crypto_prices, feature_func, window, F)

            # Calcolo della media solo con valori validi (ignorando NaN)
            accuracies_values = [val for val in accuracies.values() if not np.isnan(val)]
            avg_accuracy = np.mean(accuracies_values) if accuracies_values else np.nan

            # Salva i risultati in una lista
            results_data.append({
                'feature': feature_name,
                'window': window,
                'F': F,
                'avg_accuracy': avg_accuracy,
                **accuracies
            })

    # Creazione DataFrame con tutti i risultati
    results_df = pd.DataFrame(results_data)

    # Rimuovi righe con NaN e trova la combinazione con la media più alta
    valid_results_df = results_df.dropna(subset=['avg_accuracy'])
    if not valid_results_df.empty:
        best_row = valid_results_df.loc[valid_results_df['avg_accuracy'].idxmax()]
        best_params = (best_row['feature'], best_row['window'], best_row['F'])
    else:
        best_params = (None, None, None)

    return best_params, results_df

In [7]:
# Parametri
feature_func = ma_pct_change
window = 2*24  # Finestra per la media mobile
F = 7*24       # Giorni futuri

# Analisi dei pattern
results = analyze_crypto_patterns(crypto_prices, feature_func, window, F)

# Visualizzazione dei risultati
results

{'BTC': np.float64(47.590503055947345),
 'ETH': np.float64(50.2820874471086),
 'SOL': np.float64(53.11471556182417),
 'DOGE': np.float64(52.30371415138693),
 'LINK': np.float64(50.2350728725905),
 'AXS': np.float64(51.83356840620592)}

In [8]:
# Liste di parametri da testare
# Dizionario delle feature da testare
feature_functions = {
    'MA_PCT_CHANGE': ma_pct_change,
    'MSTD_PCT_CHANGE': mstd_pct_change,
    # 'MA_PRICES': ma_prices,
    # 'STD_PRICES': mstd_prices,
    'MA_MA_PRICES': ma_ma_prices,
    'MSTD_MSTD_PRICES': mstd_mstd_prices,
    'MA_STD_PRICES': ma_mstd_prices,
    'MSTD_MA_PRICES': mstd_ma_prices,
    'MA_DERIV': ma_derivative,
    'MSTD_DERIV': mstd_derivative,
    'MA_SEC_DERIV': ma_second_derivative,
    'MSTD_SEC_DERIV': mstd_second_derivative,
    'MA_THIRD_DERIV': ma_third_derivative,
    'MSTD_THIRD_DERIV': mstd_third_derivative
}
window_list = np.arange(1, 14)*24
F_list = np.arange(1, 14)*24

# Ottimizzazione dei parametri
best_params, results_df = optimize_parameters(crypto_prices, feature_functions, window_list, F_list)

# Visualizzazione dei risultati
print(f"Migliore combinazione trovata: feature={best_params[0]}, window={best_params[1]}, F={best_params[2]}")
print("DataFrame con tutte le accuratezze:")
display(results_df)

Migliore combinazione trovata: feature=MA_MA_PRICES, window=1896, F=1560
DataFrame con tutte le accuratezze:


Unnamed: 0,feature,window,F,avg_accuracy,BTC,ETH,SOL,DOGE,LINK,AXS
0,MA_PCT_CHANGE,1200,1200,50.603521,46.457938,57.147375,64.500316,39.547755,36.796331,59.171410
1,MA_PCT_CHANGE,1200,1224,50.677249,45.984127,57.142857,64.380952,40.111111,36.761905,59.682540
2,MA_PCT_CHANGE,1200,1248,50.844487,45.140217,58.110261,64.690886,40.535373,37.444232,59.145953
3,MA_PCT_CHANGE,1200,1272,51.362231,45.489443,59.293026,65.802943,40.674984,37.779910,59.133077
4,MA_PCT_CHANGE,1200,1296,51.790302,45.825305,59.874759,67.068080,41.859345,36.881824,59.232498
...,...,...,...,...,...,...,...,...,...,...
10795,MSTD_THIRD_DERIV,1896,1800,64.295663,64.305611,79.267807,85.276562,33.207322,44.349383,79.367290
10796,MSTD_THIRD_DERIV,1896,1824,64.244302,63.954418,79.048381,85.425830,32.846861,43.842463,80.347861
10797,MSTD_THIRD_DERIV,1896,1848,64.480380,64.242668,79.670550,85.395741,32.322218,44.194456,81.056649
10798,MSTD_THIRD_DERIV,1896,1872,64.749024,65.401696,79.854663,84.618490,32.135648,44.428744,82.054905


In [9]:
# Filtro della migliore combinazione trovata
best_result = results_df[
    (results_df['feature'] == best_params[0]) &
    (results_df['window'] == best_params[1]) &
    (results_df['F'] == best_params[2])
]

display(best_result) # Attenzione alle accuratezze troppo alte (tipicamente sopra il 50%) che potrebbero essere dovute semplicemente al fatto che window e F sono così alti da avere così pochi valori non NaN che diventa facile avere un'alta accuratezza
# 59.89716675200547

Unnamed: 0,feature,window,F,avg_accuracy,BTC,ETH,SOL,DOGE,LINK,AXS
2685,MA_MA_PRICES,1896,1560,79.826121,84.499111,96.206283,96.354475,46.976882,72.673385,82.246592
