In [1]:
import pandas as pd
import os

In [2]:
DIRECTORY = "Data\\crypto"

SEQ_LEN = 60 # minutes
FUTURE_PERIOD_PREDICT = 3 # minutes
RATIO_TO_PREDICT = "LTC-USD"

Writing a function to prepare the data:

In [3]:
def prepare(directory):
    """Function for preparing crypto-currency data. Assumes that data on each currency is stored in seperated CSV files and
    each file contains the following columns: 'time', 'low', 'high', 'open', 'close', 'volume'.
    
    Returns a Pandas DataFrame with the closing price and volume of each crypto-currency."""
    def classify(current, future):
        """Function to produce labels / target variable."""
        if float(future) > float(current):
            return 1
        else:
            return 0
    
    main_df = pd.DataFrame()
    for i, f_name in enumerate(os.listdir(DIRECTORY)):
        file = os.path.join(DIRECTORY, f_name)
        print("File", i+1, "|", f_name, "| Processing...")
        df = pd.read_csv(file, names=['time', 'low', 'high', 'open', 'close', 'volume'])
        f_name = f_name.split(".")[0]
        df.rename(columns={"close":f"{f_name}_close", "volume":f"{f_name}_volume"}, inplace=True)
        df.set_index("time", inplace=True)
        df = df[[f"{f_name}_close", f"{f_name}_volume"]]
        if len(main_df) == 0:
            main_df = df
        else:
            main_df = main_df.join(df)
            
    main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)
    main_df['target'] = list(map(classify, main_df[f'{RATIO_TO_PREDICT}_close'], main_df['future']))
    
    print("Process complete!")
    return main_df


df = prepare(DIRECTORY)
df[[f'{RATIO_TO_PREDICT}_close', 'future', 'target']].head(10)

File 1 | BCH-USD.csv | Processing...
File 2 | BTC-USD.csv | Processing...
File 3 | ETH-USD.csv | Processing...
File 4 | LTC-USD.csv | Processing...
Process complete!


Unnamed: 0_level_0,LTC-USD_close,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1528968660,96.580002,96.5,0
1528968720,96.660004,96.389999,0
1528968780,96.57,96.519997,0
1528968840,96.5,96.440002,0
1528968900,96.389999,96.470001,1
1528968960,96.519997,96.400002,0
1528969020,96.440002,96.400002,0
1528969080,96.470001,96.400002,0
1528969140,96.400002,96.400002,0
1528969200,96.400002,96.400002,0
