In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Assuming crypto_df is your DataFrame loaded with cryptocurrency data
# Ensure 'Close' and 'log_return' columns are present and calculated


In [None]:
data_folder = "../input/g-research-crypto-forecasting/"
!ls $data_folder

In [None]:
crypto_df = pd.read_csv(data_folder + 'train.csv')

In [None]:
crypto_df.head(10)

In [None]:
asset_details = pd.read_csv(data_folder + 'asset_details.csv')
asset_details

In [None]:
# Select the most recent 8% of the data for training
crypto_df = crypto_df.tail(int(len(crypto_df) * 0.08))

In [None]:
# Convert timestamp to datetime for easier handling
crypto_df['datetime'] = pd.to_datetime(crypto_df['timestamp'], unit='s')

# Feature Engineering: Calculate log returns
crypto_df['log_return'] = np.log(crypto_df['Close'] / crypto_df['Close'].shift(1))

# Calculate additional features: moving averages
crypto_df['ma7'] = crypto_df['Close'].rolling(window=7).mean()
crypto_df['ma21'] = crypto_df['Close'].rolling(window=21).mean()

In [None]:
# Drop rows with NaN values resulting from feature engineering
crypto_df.dropna(inplace=True)

In [None]:
def create_sequences(data, sequence_length):
    xs = []
    ys = []
    for i in range(len(data)-sequence_length-1):
        x = data[i:(i+sequence_length)]
        y = data[i+sequence_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Selecting close prices and log returns as features
data = crypto_df[['Close', 'log_return']].values

# Normalize the features
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)



In [None]:
# Create sequences
sequence_length = 5  # Use the past 60 timesteps to predict the next timestep
X, y = create_sequences(data_scaled, sequence_length)


In [None]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential()
model.add(LSTM(32, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.1))
model.add(Dense(2))  # Predicting Close price and log return simultaneously


model.compile(optimizer='adam', loss='mse')


In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=3)
history = model.fit(X_train, y_train, epochs=6, batch_size=8, validation_split=0.1, verbose=1, callbacks=[early_stopping])

# Save the entire model to a HDF5 file.
model.save('my_model.h5')

In [None]:
# Evaluate the model on the test set
test_performance = model.evaluate(X_test, y_test, verbose=1)
print(f'Test Loss: {test_performance}')

# Plot the training and validation loss
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='validation')
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()


In [None]:
# Predict on the test set
y_pred = model.predict(X_test)

# Visualize the prediction
plt.figure(figsize=(15, 5))
plt.plot(y_test[:, 0], label='Actual')
plt.plot(y_pred[:, 0], label='Predicted')
plt.title('Prediction of Close Price')
plt.xlabel('Time')
plt.ylabel('Normalized Close Price')
plt.legend()
plt.show()

In [None]:
# Evaluate the model with MAE and RMSE
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt

# Calculate MAE and RMSE using the true labels (y_test) and the predictions (y_pred)
mae = mean_absolute_error(y_test[:, 0], y_pred[:, 0])
rmse = sqrt(mean_squared_error(y_test[:, 0], y_pred[:, 0]))

# Print the evaluation results
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
