In [134]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras import Input
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

In [150]:
MAIN_DIR = os.path.dirname(os.getcwd())
DATA_DIR = os.path.join(MAIN_DIR, 'data')
MODEL_DIR = os.path.join(MAIN_DIR, 'models')

In [136]:
data = pd.read_csv(os.path.join(DATA_DIR,'merged_data.csv'))
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap,Source
0,2013-12-15,0.000559,0.000904,0.00029,0.000298,0.0,1960300.0,Dogecoin
1,2013-12-16,0.000299,0.000866,0.00015,0.000205,0.0,1509085.0,Dogecoin
2,2013-12-17,0.000207,0.000289,0.000116,0.000269,0.0,2169688.0,Dogecoin
3,2013-12-18,0.000267,0.000362,0.000205,0.000362,0.0,3188943.0,Dogecoin
4,2013-12-19,0.000395,0.00152,0.000328,0.001162,0.0,11150340.0,Dogecoin


In [137]:
data['Date'] = pd.to_datetime(data['Date'])
data['Timestamp'] = data['Date'].apply(lambda x: x.timestamp())
data['price_change'] = data.groupby('Source')['Close'].pct_change()
data['high_low_diff'] = data['High'] - data['Low']
data['open_close_diff'] = data['Close'] - data['Open']
data.dropna(inplace=True);
data['target'] = data['price_change'].apply(lambda x: 1 if x > 0 else 0)
data.drop(['Date', 'Source', 'price_change'], axis=1, inplace=True)
data.sort_values('Timestamp', inplace=True)
data.head()

Unnamed: 0,Open,High,Low,Close,Volume,Market Cap,Timestamp,high_low_diff,open_close_diff,target
1,0.000299,0.000866,0.00015,0.000205,0.0,1509085.0,1387152000.0,0.000716,-9.5e-05,0
2,0.000207,0.000289,0.000116,0.000269,0.0,2169688.0,1387238000.0,0.000173,6.2e-05,1
3,0.000267,0.000362,0.000205,0.000362,0.0,3188943.0,1387325000.0,0.000157,9.4e-05,1
4,0.000395,0.00152,0.000328,0.001162,0.0,11150340.0,1387411000.0,0.001191,0.000767,1
5,0.001143,0.001143,0.000662,0.000704,0.0,7284337.0,1387498000.0,0.000481,-0.000439,0


In [138]:
data.shape

(17658, 10)

In [139]:
features = data.drop('target', axis=1).columns
features = list(features)
features

['Open',
 'High',
 'Low',
 'Close',
 'Volume',
 'Market Cap',
 'Timestamp',
 'high_low_diff',
 'open_close_diff']

In [140]:
missing_values = data.isna().sum()
print(missing_values)

Open               0
High               0
Low                0
Close              0
Volume             0
Market Cap         0
Timestamp          0
high_low_diff      0
open_close_diff    0
target             0
dtype: int64


In [141]:
numeric_cols = data.select_dtypes(include=[np.number]).columns.drop('target')
data[numeric_cols] = data[numeric_cols].replace([np.inf, -np.inf], np.nan)
data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].median())

In [142]:
scaler = StandardScaler()
X = scaler.fit_transform(data[features].values)
y = data['target'].values

In [143]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [144]:
model = Sequential([
    # Capa de entrada con la forma de X_train.
    Input(shape=(X_train.shape[1],)),
    # Capa densa con 128 unidades, activación ReLU y regularización L2 (0.01).
    Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
    # Capa de normalización por lotes para normalizar las activaciones de la capa anterior.
    BatchNormalization(),
    # Capa de Dropout con una tasa de abandono del 0.3 para prevenir el sobreajuste.
    Dropout(0.3),
    # Capa densa con 64 unidades, activación ReLU y regularización L2 (0.01).
    Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
    # Capa de normalización por lotes para normalizar las activaciones de la capa anterior.
    BatchNormalization(),
    # Capa de Dropout con una tasa de abandono del 0.3 para prevenir el sobreajuste.
    Dropout(0.3),
    # Capa densa con 32 unidades y activación ReLU.
    Dense(32, activation='relu'),
    # Capa densa con 1 unidad y activación sigmoide para la salida de clasificación binaria.
    Dense(1, activation='sigmoid')
])


In [145]:
optimizer = Adam(learning_rate=0.001)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [146]:
history = model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping, lr_scheduler],
    verbose=1
)

Epoch 1/200
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.5158 - loss: 1.5052 - val_accuracy: 0.5071 - val_loss: 0.9912 - learning_rate: 0.0010
Epoch 2/200
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5204 - loss: 0.9354 - val_accuracy: 0.4947 - val_loss: 0.8174 - learning_rate: 0.0010
Epoch 3/200
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5378 - loss: 0.7745 - val_accuracy: 0.4961 - val_loss: 0.7446 - learning_rate: 0.0010
Epoch 4/200
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5635 - loss: 0.7142 - val_accuracy: 0.5042 - val_loss: 0.8507 - learning_rate: 0.0010
Epoch 5/200
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5879 - loss: 0.6854 - val_accuracy: 0.6277 - val_loss: 0.6647 - learning_rate: 0.0010
Epoch 6/200
[1m354/354[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [147]:
y_pred = (model.predict(X_test) > 0.5).astype(int)
print(classification_report(y_test, y_pred))

[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
              precision    recall  f1-score   support

           0       0.88      0.71      0.79      1915
           1       0.72      0.89      0.80      1617

    accuracy                           0.79      3532
   macro avg       0.80      0.80      0.79      3532
weighted avg       0.81      0.79      0.79      3532



In [151]:
model.save(os.path.join(MODEL_DIR, 'model.keras'))