In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load the dataset
df = pd.read_csv('scaled_data.csv')

# Display basic info
print(df.head())
print(df.info())

# Ensure data is sorted by date
df = df.sort_values(by='Date', ascending=True)

print(df.columns)


                  Date      High       Low      Open        Close    Volume  \
0  2017-10-17 23:59:59  0.040998  0.038657  0.041556  5605.509766  0.001195   
1  2017-10-18 23:59:59  0.037807  0.033213  0.039271  5590.689941  0.002847   
2  2017-10-19 23:59:59  0.040089  0.039645  0.038938  5708.520020  0.001077   
3  2017-10-20 23:59:59  0.045216  0.041275  0.041001  6011.450195  0.002719   
4  2017-10-21 23:59:59  0.047404  0.046999  0.045789  6031.600098  0.002298   

   Marketcap  Daily Return  50-Day MA  200-Day MA  Price Diff  Volatility  \
0   0.032583      0.561905   0.013515    0.000000    0.024032    0.603767   
1   0.032374      0.591271   0.013893    0.000545    0.033743    0.600001   
2   0.034120      0.629273   0.014321    0.001103    0.015131    0.600419   
3   0.038591      0.680526   0.014810    0.001691    0.032225    0.604666   
4   0.038898      0.600876   0.015237    0.002284    0.016417    0.601647   

   Open-Close Diff  High-Low Range  
0         0.500598       

In [2]:
data = df['Close'].values.reshape(-1, 1)

In [3]:
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data)


In [4]:
# Function to create sequences for LSTM
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length])
    return np.array(X), np.array(y)

sequence_length = 50

X, y = create_sequences(data_scaled, sequence_length)

# Check shapes
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")


Shape of X: (1309, 50, 1)
Shape of y: (1309, 1)


In [5]:
# Time-series split for cross-validation
n_splits = 5
tscv = TimeSeriesSplit(n_splits=n_splits)

for fold, (train_idx, test_idx) in enumerate(tscv.split(X)):
    print(f"Fold {fold+1}:")
    print(f"  Training indices: {train_idx[:5]}... to {train_idx[-5:]}")
    print(f"  Testing indices: {test_idx[:5]}... to {test_idx[-5:]}")

# Use the last split for training and testing
train_idx, test_idx = list(tscv.split(X))[-1]
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y[train_idx], y[test_idx]

print(f"Final Training Shape: X_train={X_train.shape}, y_train={y_train.shape}")
print(f"Final Testing Shape: X_test={X_test.shape}, y_test={y_test.shape}")


Fold 1:
  Training indices: [0 1 2 3 4]... to [214 215 216 217 218]
  Testing indices: [219 220 221 222 223]... to [432 433 434 435 436]
Fold 2:
  Training indices: [0 1 2 3 4]... to [432 433 434 435 436]
  Testing indices: [437 438 439 440 441]... to [650 651 652 653 654]
Fold 3:
  Training indices: [0 1 2 3 4]... to [650 651 652 653 654]
  Testing indices: [655 656 657 658 659]... to [868 869 870 871 872]
Fold 4:
  Training indices: [0 1 2 3 4]... to [868 869 870 871 872]
  Testing indices: [873 874 875 876 877]... to [1086 1087 1088 1089 1090]
Fold 5:
  Training indices: [0 1 2 3 4]... to [1086 1087 1088 1089 1090]
  Testing indices: [1091 1092 1093 1094 1095]... to [1304 1305 1306 1307 1308]
Final Training Shape: X_train=(1091, 50, 1), y_train=(1091, 1)
Final Testing Shape: X_test=(218, 50, 1), y_test=(218, 1)


In [6]:
# Build the LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25),
    Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Model summary
model.summary()


  super().__init__(**kwargs)


In [8]:
# Train the LSTM model
history = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=50,
    validation_data=(X_test, y_test),
    verbose=1
)

# Plot training and validation loss
plt.figure(figsize=(8, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


Epoch 1/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 1.0095e-04 - val_loss: 0.0016
Epoch 2/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 1.1282e-04 - val_loss: 0.0012
Epoch 3/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 9.3411e-05 - val_loss: 0.0012
Epoch 4/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 1.2245e-04 - val_loss: 0.0012
Epoch 5/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 9.2181e-05 - val_loss: 0.0026
Epoch 6/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - loss: 9.7861e-05 - val_loss: 0.0026
Epoch 7/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 1.1045e-04 - val_loss: 0.0030
Epoch 8/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 1.3259e-04 - val_loss: 0.0036
Epoch 9/50
[1m3

AttributeError: The layer sequential has never been called and thus has no defined input.

In [None]:
# Evaluate the model
test_loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")

# Predict future prices
y_pred = model.predict(X_test)

# Reverse scaling for predictions and actual values
y_pred_unscaled = scaler.inverse_transform(y_pred)
y_test_unscaled = scaler.inverse_transform(y_test.reshape(-1, 1))

# Plot predictions vs actual
plt.figure(figsize=(10, 6))
plt.plot(y_test_unscaled, label='Actual Prices', color='blue')
plt.plot(y_pred_unscaled, label='Predicted Prices', color='red')
plt.title('Bitcoin Price Prediction')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()


In [None]:
model.save('bitcoin_price_lstm_updated.keras')


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Calculate metrics
mae = mean_absolute_error(y_test_unscaled, y_pred_unscaled)
mse = mean_squared_error(y_test_unscaled, y_pred_unscaled)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_unscaled, y_pred_unscaled)
mape = np.mean(np.abs((y_test_unscaled - y_pred_unscaled) / y_test_unscaled)) * 100

# Print metrics
print(f"Metrics for Close Price Prediction:")
print(f"Mean Absolute Error (MAE): {mae:.6f}")
print(f"Mean Squared Error (MSE): {mse:.6f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.6f}")
print(f"R-squared (R²): {r2:.6f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")


In [None]:
import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np

# Define the objective function for Optuna
def objective(trial):
    # Hyperparameters to tune
    lstm_units = trial.suggest_categorical('lstm_units', [50, 100, 150, 200])
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5, step=0.1)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
    epochs = trial.suggest_int('epochs', 20, 100, step=10)
    
    # Build the model
    model = Sequential([
        LSTM(lstm_units, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(dropout_rate),
        LSTM(lstm_units, return_sequences=False),
        Dropout(dropout_rate),
        Dense(25),
        Dense(1)
    ])
    
    # Compile the model
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    
    # Train the model
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[early_stopping],
        verbose=0  # Suppress detailed training logs
    )
    
    # Evaluate the model
    val_loss = min(history.history['val_loss'])  # Use the minimum validation loss as the objective
    return val_loss

# Create an Optuna study
study = optuna.create_study(direction='minimize')  # Minimize validation loss
study.optimize(objective, n_trials=50)  # Number of trials to run

# Print the best hyperparameters
print("Best Hyperparameters:")
print(study.best_params)

# Access the best trial
best_trial = study.best_trial


In [None]:
# Train the model with the best hyperparameters
best_params = study.best_params

new_model = Sequential([
    LSTM(best_params['lstm_units'], return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(best_params['dropout_rate']),
    LSTM(best_params['lstm_units'], return_sequences=False),
    Dropout(best_params['dropout_rate']),
    Dense(25),
    Dense(1)
])

# Compile the model
optimizer = Adam(learning_rate=best_params['learning_rate'])
new_model.compile(optimizer=optimizer, loss='mean_squared_error')

# Train the model
history = new_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    batch_size=best_params['batch_size'],
    epochs=best_params['epochs'],
    verbose=1
)

# Evaluate on test data
test_loss = new_model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")


In [None]:
new_model.save('bitcoin_price_lstm_updated_tuned1.keras')


In [None]:

# Plot training and validation loss
plt.figure(figsize=(8, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
# Evaluate the model
test_loss = new_model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")

# Predict future prices
y_pred = new_model.predict(X_test)

# Reverse scaling for predictions and actual values
y_pred_unscaled = scaler.inverse_transform(y_pred)
y_test_unscaled = scaler.inverse_transform(y_test.reshape(-1, 1))

# Plot predictions vs actual
plt.figure(figsize=(10, 6))
plt.plot(y_test_unscaled, label='Actual Prices', color='blue')
plt.plot(y_pred_unscaled, label='Predicted Prices', color='red')
plt.title('Bitcoin Price Prediction')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()


In [None]:
# Calculate metrics
mae = mean_absolute_error(y_test_unscaled, y_pred_unscaled)
mse = mean_squared_error(y_test_unscaled, y_pred_unscaled)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_unscaled, y_pred_unscaled)
mape = np.mean(np.abs((y_test_unscaled - y_pred_unscaled) / y_test_unscaled)) * 100

# Print metrics
print(f"Metrics for Close Price Prediction:")
print(f"Mean Absolute Error (MAE): {mae:.6f}")
print(f"Mean Squared Error (MSE): {mse:.6f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.6f}")
print(f"R-squared (R²): {r2:.6f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
