In [1]:
import ccxt
import pandas as pd
import random
import time
from datetime import datetime, timedelta

random.seed(8)

In [2]:
# Configurazione
symbol = 'BTC/USDT'
timeframe = '1m'
exchange_id = 'bybit'
start_date = datetime(2021, 1, 9)
end_date = datetime(2023, 12, 3)
period_minutes = 24 * 60
num_columns = 100
output_file = '100_datasets_1440_minutes_random_state_8_bybit_period_9_jan_2021_3_dic_2023.csv'

# Inizializza l'exchange
print("Inizializzazione dell'exchange...")
exchange = getattr(ccxt, exchange_id)({'enableRateLimit': True})
print("Exchange inizializzato con successo.")

# Funzione per scaricare i dati in blocchi
def fetch_data_blocks(symbol, timeframe, start, end):
    print(f"Scarico i dati da {start} a {end}...")
    data = []
    since = int(start.timestamp() * 1000)
    end_timestamp = int(end.timestamp() * 1000)

    while since < end_timestamp:
        try:
            ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since)
            if not ohlcv:
                print("Nessun dato restituito. Uscita dal ciclo fetch.")
                break
            data += ohlcv
            since = ohlcv[-1][0] + 1  # Avanza al prossimo blocco
            time.sleep(exchange.rateLimit / 1000)  # Rispetta il rate limit
        except Exception as e:
            print(f"Errore durante il fetch: {e}")
            break

    return data

# Genera un periodo casuale valido senza duplicati
def generate_valid_period(existing_periods):
    max_start_minutes = int((end_date - start_date).total_seconds() // 60) - period_minutes
    while True:
        random_start = start_date + timedelta(minutes=random.randint(0, max_start_minutes))
        random_end = random_start + timedelta(minutes=period_minutes)
        if any(random_start == p[0] and random_end == p[1] for p in existing_periods):
            print(f"Periodo duplicato {random_start} - {random_end}. Genero un nuovo periodo...")
            continue
        raw_data = fetch_data_blocks(symbol, timeframe, random_start, random_end)
        if raw_data and len(raw_data) >= period_minutes:  # Verifica se i dati sono completi
            return random_start, random_end, raw_data
        print(f"Dati insufficienti o assenti per il periodo {random_start} - {random_end}. Genero un nuovo periodo...")

# Crea il DataFrame finale
print("Creazione del DataFrame finale...")
data = pd.DataFrame()
data['Timestamp'] = [start_date + timedelta(minutes=i) for i in range(period_minutes)]
data['Timestamp'] = data['Timestamp'].apply(lambda x: exchange.iso8601(int(x.timestamp() * 1000))) # Timestamp formato ccxt
print("Colonna Timestamp creata con successo.")

existing_periods = []
for i in range(num_columns):
    print(f"Elaborazione del periodo {i + 1}/{num_columns}...")
    start, end, raw_data = generate_valid_period(existing_periods)
    existing_periods.append((start, end))

    df = pd.DataFrame(raw_data, columns=['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'])
    df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='ms')
    df.set_index('Timestamp', inplace=True)

    data[f'Price_{i + 1}'] = df['Close'].values[:period_minutes]
    print(f"Colonna Price_{i + 1} aggiunta con successo.")

# Esporta in CSV
print("Esportazione dei dati in CSV...")
data.to_csv(output_file, index=False)
print(f"Dati salvati in {output_file}")

Inizializzazione dell'exchange...
Exchange inizializzato con successo.
Creazione del DataFrame finale...
Colonna Timestamp creata con successo.
Elaborazione del periodo 1/100...
Scarico i dati da 2021-12-05 03:56:00 a 2021-12-06 03:56:00...
Colonna Price_1 aggiunta con successo.
Elaborazione del periodo 2/100...
Scarico i dati da 2022-07-02 10:49:00 a 2022-07-03 10:49:00...
Colonna Price_2 aggiunta con successo.
Elaborazione del periodo 3/100...
Scarico i dati da 2022-07-09 16:06:00 a 2022-07-10 16:06:00...
Colonna Price_3 aggiunta con successo.
Elaborazione del periodo 4/100...
Scarico i dati da 2021-07-11 23:35:00 a 2021-07-12 23:35:00...
Colonna Price_4 aggiunta con successo.
Elaborazione del periodo 5/100...
Scarico i dati da 2021-10-17 05:54:00 a 2021-10-18 05:54:00...
Colonna Price_5 aggiunta con successo.
Elaborazione del periodo 6/100...
Scarico i dati da 2023-11-01 11:08:00 a 2023-11-02 11:08:00...
Colonna Price_6 aggiunta con successo.
Elaborazione del periodo 7/100...
Scaric

  data[f'Price_{i + 1}'] = df['Close'].values[:period_minutes]


Colonna Price_100 aggiunta con successo.
Esportazione dei dati in CSV...
Dati salvati in 100_datasets_1440_minutes_random_state_8_bybit_period_9_jan_2021_3_dic_2023.csv


In [3]:
data

Unnamed: 0,Timestamp,Price_1,Price_2,Price_3,Price_4,Price_5,Price_6,Price_7,Price_8,Price_9,...,Price_91,Price_92,Price_93,Price_94,Price_95,Price_96,Price_97,Price_98,Price_99,Price_100
0,2021-01-09T00:00:00.000Z,49016.13,19135.61,21577.71,34566.37,61164.99,34457.19,36937.77,46496.81,20823.47,...,41722.57,35139.92,19246.33,42915.43,26548.06,23818.69,46856.64,25451.90,62101.93,48415.29
1,2021-01-09T00:01:00.000Z,49048.53,19147.81,21604.71,34569.67,61165.00,34457.63,36950.13,46523.70,20819.22,...,41731.35,35161.26,19230.44,42903.83,26550.00,23827.29,46885.21,25468.00,62122.68,48423.32
2,2021-01-09T00:02:00.000Z,49079.37,19139.24,21676.39,34513.31,61229.56,34457.04,37012.31,46516.23,20822.29,...,41725.30,35167.76,19244.92,42872.96,26541.97,23828.70,46858.39,25434.00,62135.64,48440.89
3,2021-01-09T00:03:00.000Z,49079.98,19124.54,21681.90,34503.31,61114.69,34452.02,37030.83,46519.14,20816.98,...,41743.42,35164.62,19237.34,42804.03,26514.01,23828.13,46820.54,25443.98,62180.63,48418.10
4,2021-01-09T00:04:00.000Z,48970.00,19118.63,21663.20,34479.04,61127.43,34452.02,37024.73,46488.41,20815.02,...,41726.26,35173.27,19242.29,42751.64,26510.01,23824.24,46848.87,25437.97,62229.04,48389.94
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,2021-01-09T23:55:00.000Z,48997.47,19057.34,21061.78,32839.67,62072.93,35359.55,39776.04,46388.09,21163.69,...,41800.15,34495.12,19364.92,42306.73,26092.25,23034.19,46638.21,25863.96,61653.58,47006.75
1436,2021-01-09T23:56:00.000Z,48917.63,19062.37,21043.39,32827.10,62041.41,35371.97,39753.76,46378.68,21171.62,...,41812.94,34487.84,19370.01,42347.31,26092.96,23046.40,46610.13,25863.96,61686.08,46995.51
1437,2021-01-09T23:57:00.000Z,48963.67,19068.67,21019.97,32832.04,62033.08,35386.36,39716.11,46368.96,21181.80,...,41788.30,34482.61,19366.18,42396.00,26100.96,23043.92,46559.24,25862.07,61727.99,46971.57
1438,2021-01-09T23:58:00.000Z,48960.65,19071.47,21008.94,32817.91,62034.96,35365.87,39682.29,46370.50,21172.60,...,41769.20,34492.00,19382.59,42326.52,26100.96,23040.15,46542.13,25862.08,61750.00,46918.44
