In [22]:
import random
import time
import calendar
import datetime
from datetime import date, timedelta

import numpy as np
import pandas as pd

import ccxt

In [23]:
exchange = ccxt.binance() # Exchange to be used.  
print(f'Available time granularities on the chosen digital currency exchange: {exchange.timeframes}')

Available time granularities on the chosen digital currency exchange: {'1s': '1s', '1m': '1m', '3m': '3m', '5m': '5m', '15m': '15m', '30m': '30m', '1h': '1h', '2h': '2h', '4h': '4h', '6h': '6h', '8h': '8h', '12h': '12h', '1d': '1d', '3d': '3d', '1w': '1w', '1M': '1M'}


In [24]:
sdate_training = date(2022,1,1)   # Start date training data -> included.
edate_training = date(2022,3,1)   # End date training data - excluded.

sdate_test = date(2022,3,1)   # Start date training date -> included.
edate_test = date(2022,4,1)   # End date training date - excluded.

trading_symbol = 'SOL/BUSD' # Pair to be downloaded (BE CAREFUL).
granularity = '1h' # Data granularity (it changes based on the chosen Exchange). 

list_dates_training = pd.date_range(sdate_training, edate_training-timedelta(days=1),freq='d').strftime("%Y-%m-%d").tolist() # List of all days in the training set.
list_dates_test = pd.date_range(sdate_test, edate_test-timedelta(days=1),freq='d').strftime("%Y-%m-%d").tolist() # List of all days in the test set.

print(f'Training days: {list_dates_training}')
print(f'Test days: {list_dates_test}')

Training days: ['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04', '2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08', '2022-01-09', '2022-01-10', '2022-01-11', '2022-01-12', '2022-01-13', '2022-01-14', '2022-01-15', '2022-01-16', '2022-01-17', '2022-01-18', '2022-01-19', '2022-01-20', '2022-01-21', '2022-01-22', '2022-01-23', '2022-01-24', '2022-01-25', '2022-01-26', '2022-01-27', '2022-01-28', '2022-01-29', '2022-01-30', '2022-01-31', '2022-02-01', '2022-02-02', '2022-02-03', '2022-02-04', '2022-02-05', '2022-02-06', '2022-02-07', '2022-02-08', '2022-02-09', '2022-02-10', '2022-02-11', '2022-02-12', '2022-02-13', '2022-02-14', '2022-02-15', '2022-02-16', '2022-02-17', '2022-02-18', '2022-02-19', '2022-02-20', '2022-02-21', '2022-02-22', '2022-02-23', '2022-02-24', '2022-02-25', '2022-02-26', '2022-02-27', '2022-02-28']
Test days: ['2022-03-01', '2022-03-02', '2022-03-03', '2022-03-04', '2022-03-05', '2022-03-06', '2022-03-07', '2022-03-08', '2022-03-09', '2022-03-10', '2022-

In [None]:
df_training = pd.DataFrame({'Time':[], 'Open':[], 'High':[], 'Low':[], 'Close':[], 'Volume':[]}) # Prepare empty training dataset. 
df_test = pd.DataFrame({'Time':[], 'Open':[], 'High':[], 'Low':[], 'Close':[], 'Volume':[]}) # Prepare empty test dataset.

# Download training data.
for date in list_dates_training:
    since = date # Take the day to be downloaded in a human readable format. 
    since = time.mktime(datetime.datetime.strptime(since, "%Y-%m-%d").timetuple()) * 1000 # Transform the day to be downloaded in a format accepted by the CCXT library. 
    ohlcv = exchange.fetch_ohlcv(symbol=trading_symbol, timeframe=granularity, since=int(since)) # Fetch OHLCV data from the chosen Exchange. 
    
    df = pd.DataFrame(ohlcv, columns = ['Time', 'Open', 'High', 'Low', 'Close', 'Volume']) # Organise data in a pd.Dataframe format.
    df['Time'] = [datetime.datetime.fromtimestamp(float(time)/1000) for time in df['Time']] # Transform the dates in a human readable format. 
    df = df.iloc[:24, :] # Take only the hours belonging to the 'date' day. 
    
    df_training = pd.concat([df_training, df], ignore_index=True)  # Append the data to the full training dataset. 

# Download test data.
for date in list_dates_test:
    since = date # Take the day to be downloaded in a human readable format.
    since = time.mktime(datetime.datetime.strptime(since, "%Y-%m-%d").timetuple()) * 1000 # Transform the day to be downloaded in a format accepted by the CCXT library.
    ohlcv = exchange.fetch_ohlcv(symbol=trading_symbol, timeframe=granularity, since=int(since)) # Fetch OHLCV data from the chosen Exchange.

    df = pd.DataFrame(ohlcv, columns = ['Time', 'Open', 'High', 'Low', 'Close', 'Volume']) # Organise data in a pd.Dataframe format.
    df['Time'] = [datetime.datetime.fromtimestamp(float(time)/1000) for time in df['Time']] # Transform the dates in a human readable format. 
    df = df.iloc[:24, :] # Take only the hours belonging to the 'date' day.
    
    df_test = pd.concat([df_test, df], ignore_index=True) # Append the data to the full test dataset. 
    
df_training.set_index('Time', inplace=True) # Set the 'Time' column as new index for the training dataset. 
df_test.set_index('Time', inplace=True) # Set the 'Time' column as new index for the test dataset.

In [None]:
df_training.head(5) # Visualize the training dataset.

In [None]:
df_test.head(5) # Visualize the training dataset.

In [None]:
df_training.to_csv('./data/training_data.csv') # Save training data into .csv format.
df_test.to_csv('./data/test_data.csv') # Save test data into .csv format.