In [20]:
import pandas as pd
import os
from sklearn import preprocessing  # pip install sklearn ... if you don't have it!
from collections import deque
import random
import numpy as np

# Longitud de la secuencia precedente para recolectar para la RNN
SEQ_LEN = 60

# Cuánto tiempo en el futuro estamos tratando de predecir
FUTURE_PERIOD_PREDICT = 8

# El ratio que queremos predecir
RATIO_TO_PREDICT = "DOGE"

# Función para clasificar si el valor futuro es mayor que el valor actual
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0

def preprocess_df(df):
    df = df.drop(columns=["future"])  # ya no la necesitamos.

    for col in df.columns:  # go through all of the columns
        if col != "target":  # normalize all ... except for the target itself!
            df[col] = df[col].pct_change()  # pct change "normalizes" the different currencies (each crypto coin has vastly diff values, we're really more interested in the other coin's movements)
            df.dropna(inplace=True)  # remove the nas created by pct_change
            df[col] = preprocessing.scale(df[col].values)  # scale between 0 and 1.

    df.dropna(inplace=True)  # cleanup again... jic. Those nasty NaNs love to creep in.
    sequential_data = []  # this is a list that will CONTAIN the sequences
    prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in

    for i in df.values:  # iterate over the values
        prev_days.append([n for n in i[:-1]])  # store all but the target
        if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!
            sequential_data.append([np.array(prev_days), i[-1]])  # append those bad boys!

    random.shuffle(sequential_data)  # shuffle for good measure.

# Uso de una cadena sin procesar para evitar errores de escape de unicode
file_path = r"C:\Users\CRISTIAN CHAVEZ\Documents\GitHub\AI_Project_Deep-Learning-Tensorflow-Keras\Modelo Para Predecir Criptomonedas\criptodata\DOGE.xlsx"

# Leer el archivo inicial
df = pd.read_excel(file_path, names=['time', 'open', 'high', 'low', 'close', 'volume', 'usdVolume'])

# DataFrame principal vacío para almacenar todos los datos
main_df = pd.DataFrame()

# Las 4 criptomonedas que queremos considerar
ratios = ["BTC", "DOGE", "ETH", "QTU"]

# Ruta base donde se encuentran los archivos de datos
base_path = r"C:\Users\CRISTIAN CHAVEZ\Documents\GitHub\AI_Project_Deep-Learning-Tensorflow-Keras\Modelo Para Predecir Criptomonedas\criptodata"

# Iterar sobre cada ratio
for ratio in ratios:
    print(ratio)
    
    # Obtener la ruta completa al archivo
    dataset = os.path.join(base_path, f"{ratio}.xlsx")
    
    # Leer el archivo específico
    df = pd.read_excel(dataset, names=['time', 'open', 'high', 'low', 'close', 'volume', 'usdVolume'])
    
    # Renombrar columnas de 'close' y 'volume' para incluir el ticker
    df.rename(columns={"close": f"{ratio}_close", "volume": f"{ratio}_volume"}, inplace=True)
    
    # Establecer 'time' como índice para poder unirlos por este tiempo compartido
    df.set_index("time", inplace=True)
    
    # Ignorar otras columnas además de 'close' y 'volume'
    df = df[[f"{ratio}_close", f"{ratio}_volume"]]
    
    # Si el DataFrame principal está vacío, simplemente asignar el DataFrame actual
    if len(main_df) == 0:
        main_df = df
    else:
        # De lo contrario, unir estos datos con el DataFrame principal
        main_df = main_df.join(df)

main_df.fillna(method="ffill", inplace=True)  # if there are gaps in data, use previously known values
main_df.dropna(inplace=True)
print(main_df.head())  # how did we do??

# Crear una nueva columna 'future' que es el valor de cierre futuro desplazado por 'FUTURE_PERIOD_PREDICT'
main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)

# Crear una columna 'target' que clasifica si el valor futuro es mayor que el valor actual
main_df['target'] = list(map(classify, main_df[f'{RATIO_TO_PREDICT}_close'], main_df['future']))

# Imprimir las primeras filas del DataFrame resultante
print(main_df[[f"{RATIO_TO_PREDICT}_close", "future"]].head())
#print(main_df.head())  # Imprimir las primeras filas del DataFrame resultante para verificar la estructura y los datos cargados correctamente.


BTC
DOGE
ETH
QTU
            BTC_close  BTC_volume  DOGE_close  DOGE_volume  ETH_close  \
time                                                                    
1715356800    60793.4      15.101    0.144841     169877.0    2914.04   
1715356860    60729.3       6.395    0.144513     245666.0    2907.25   
1715356920    60744.5       7.272    0.144437     417310.0    2906.65   
1715356980    60713.9       4.568    0.144207     311595.0    2904.65   
1715357040    60683.8      18.586    0.144067     153653.0    2901.04   

            ETH_volume  QTU_close  QTU_volume  
time                                           
1715356800        45.7      3.629      1283.8  
1715356860        23.8      3.620        44.7  
1715356920       997.9      3.611        20.8  
1715356980        82.0      3.606        46.6  
1715357040        96.0      3.609        32.8  
            DOGE_close    future
time                            
1715356800    0.144841  0.144547
1715356860    0.144513  0.144578
171

  main_df.fillna(method="ffill", inplace=True)  # if there are gaps in data, use previously known values


In [21]:
times = sorted(main_df.index.values)  # get the times
last_5pct = sorted(main_df.index.values)[-int(0.05*len(times))]  # get the last 5% of the times
print(last_5pct)

1715438880


In [22]:
validation_main_df = main_df[(main_df.index >= last_5pct)]  # make the validation data where the index is in the last 5%
main_df = main_df[(main_df.index < last_5pct)]  # now the main_df is all the data up to the last 5%
preprocess_df(main_df)
#train_x, train_y = preprocess_df(main_df) 
#validation_x, validation_y = preprocess_df(validation_main_df)