In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn import preprocessing
from collections import deque
import random
import time

In [2]:
datas = pd.read_csv('crypto_data/crypto_data/LTC-USD.csv',names = ["times", "low", "high","open","close","volume" ])
#Lecture des données et noms des colonnes

In [3]:
datas.head()

Unnamed: 0,times,low,high,open,close,volume
0,1528968660,96.580002,96.589996,96.589996,96.580002,9.6472
1,1528968720,96.449997,96.669998,96.589996,96.660004,314.387024
2,1528968780,96.470001,96.57,96.57,96.57,77.129799
3,1528968840,96.449997,96.57,96.57,96.5,7.216067
4,1528968900,96.279999,96.540001,96.5,96.389999,524.539978


In [4]:
ratios = ["BTC-USD", "LTC-USD", "ETH-USD","BCH-USD"] #Liste des ratios qu'on peut prédire
SEQ_LEN = 60 #Taille de la séquence qui peut servir à prédire
FUTURE_PERIOD_PREDICT = 3  #taille de la séquence à prédire
RATIO_TO_PREDICT = "LTC-USD" #ratio choisi 

def classify(current, future):
    if  float(future) > float(current):
        return 1 
    else :
        return 0

main_df = pd.DataFrame()
for ratio in ratios : 
    dataset = f"crypto_data/crypto_data/{ratio}.csv" # on recup le nom des datasets de tous les ratios
    df = pd.read_csv(dataset, names = ["time", "low", "high", "open", "close", "volume"]) #on lit le dataset
    df.rename(columns={"close":f"{ratio}_close", "volume": f"{ratio}_volume"}, inplace=True) #On renomme les colonnes pour les réintégrer
    df.set_index("time", inplace=True) #On set l'index à time
    df = df[[f"{ratio}_close",f"{ratio}_volume"]] #On récupère que les colonnes qui nous intéressent de df
    if len(main_df) == 0 :
        main_df = df
    else : 
        main_df = main_df.join(df)
main_df['future'] = main_df[[f"{RATIO_TO_PREDICT}_close"]].shift(-FUTURE_PERIOD_PREDICT) # On veut prédire 3 minutes plus loin
main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"], main_df["future"])) 
print (main_df[[f"{RATIO_TO_PREDICT}_close","future", "target"]].head())


            LTC-USD_close     future  target
time                                        
1528968660      96.580002  96.500000       0
1528968720      96.660004  96.389999       0
1528968780      96.570000  96.519997       0
1528968840      96.500000  96.440002       0
1528968900      96.389999  96.470001       1


In [5]:
def preprocess_df(df):
    df = df.drop('future', 1)
    for col in df.columns : 
        if (col != "target"):
            df[col] = df[col].pct_change() #On analyse les variations et non les valeurs en elle même 
            df.dropna(inplace = True)
            df[col] = preprocessing.scale(df[col].values)
        df.dropna(inplace = True)
        sequential_data = []
        prev_days = deque(maxlen = SEQ_LEN) #On créé une file de taille 60 pour stocker des séquences de 60 valeures
        for i in df.values : #Retourne une liste des lignes du data frame
            prev_days.append([n for n in i[:-1]])
            if (len(prev_days) == SEQ_LEN):
                sequential_data.append([np.array(prev_days), i[-1]]) #Dès qu'on a une séquence de 60, on l'append
    random.shuffle(sequential_data) #puis on shuffle
    
    sells = []
    buys = [] 
    for seq, target in sequential_data : 
        if (target == 1) : 
            sells.append([seq, target])
        else:
            buys.append([seq, target]) # On compte les buys et les sells. Le but est d'en donner autant au modèle. 
    lower = min(len(sells), len(buys))
    sells = sells[:lower]
    buys = buys[:lower]
    sequential_data = buys + sells 
    random.shuffle(sequential_data)
    return sequential_data

In [6]:
def split(seq_data): # LLe but de cette fonction est maintenant de récupérer les données sous un vecteur (X, y)
    X = []
    y = []
 
    for el in seq_data : 
        X.append(el[0])
        y.append(el[1])
    X = np.array(X).reshape(len(X),60,8)
    y = np.array(y)
    return X, y 

In [7]:
x = split(seq_data)

NameError: name 'seq_data' is not defined

In [8]:
times = sorted(main_df.index.values) #values retournes les valeurs de la colone sous forme de list
last_5pct = times[int(0.05*len(times))]
validation_main_df = main_df[(main_df.index < last_5pct)]
main_df = main_df[(main_df.index >= last_5pct)]

train_data = preprocess_df(main_df)

test_data = preprocess_df(validation_main_df)
X_train, y_train = split(train_data)
X_test, y_test = split(test_data)

  df = df.drop('future', 1)
  df = df.drop('future', 1)


In [9]:
len(main_df)

92838

In [10]:
len(validation_main_df)

4886

In [11]:
len(train_data)

68476

In [12]:
len(test_data)

3740

In [13]:
import tensorflow as tf 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint 

In [17]:
model = Sequential()

model.add(LSTM(128,input_shape=(X_train.shape[1:]), return_sequences = True, activation = 'tanh'))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, return_sequences = True, activation = 'tanh'))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, activation = 'tanh'))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(128, activation = 'relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation = 'softmax'))

In [18]:
opt = tf.keras.optimizers.Adam(learning_rate = 0.001, decay = 1e-6)
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = opt, metrics = ['accuracy'])

In [19]:
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"
tensorboard = TensorBoard(log_dir=f'test/{NAME}')

History = model.fit(X_train, y_train, 
                    validation_data = (X_test, y_test),
                   batch_size = 32,
                   epochs = 10,
                   callbacks = tensorboard )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
