# lstm_neural_network.ipynb

## Notebook Purpose
This notebook is designed to develop and train an LSTM (Long Short-Term Memory) neural network model for predicting cryptocurrency prices. The trained LSTM model will be used to generate future price predictions.

## Instructions
1. **Import Necessary Libraries**:
   - Import `pandas` and `numpy` for data manipulation.
   - Import `MinMaxScaler` from `sklearn.preprocessing` for feature scaling.
   - Import necessary modules from `tensorflow.keras` for building the LSTM model.

2. **Load Preprocessed Data**:
   - Load the preprocessed CSV file created in the first notebook.

3. **Prepare Data for LSTM**:
   - Scale the data using `MinMaxScaler`.
   - Create sequences of data for LSTM input.

4. **Build and Train LSTM Model**:
   - Define the LSTM model architecture.
   - Compile and train the model using the prepared data.

5. **Save the Trained Model**:
   - Save the trained LSTM model to a file for later use.

6. **Evaluate Model Performance**:
   - Evaluate the model's performance using appropriate metrics.

## Example Code
```python
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import joblib

# Load preprocessed data
data_path = 'data/historical_data/btc_usd_preprocessed.csv'  # Update this path based on the selected cryptocurrency
data = pd.read_csv(data_path, parse_dates=['Date'], index_col='Date')

# Prepare data for LSTM
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1, 1))

def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length - 1):
        x = data[i:(i + seq_length), 0]
        y = data[i + seq_length, 0]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

seq_length = 60
X, y = create_sequences(scaled_data, seq_length)

X_train, X_test = X[:int(len(X) * 0.8)], X[int(len(X) * 0.8):]
y_train, y_test = y[:int(len(y) * 0.8)], y[int(len(y) * 0.8):]

# Build LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, batch_size=1, epochs=1)

# Save the model
model.save('models/lstm_model.h5')
joblib.dump(scaler, 'models/scaler.pkl')

# Evaluate model performance
train_loss = model.evaluate(X_train, y_train, verbose=0)
test_loss = model.evaluate(X_test, y_test, verbose=0)
print(f'Train Loss: {train_loss:.4f}')
print(f'Test Loss: {test_loss:.4f}')


In [None]:
# Cell 1: Initial imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


In [None]:
# Cell 2: Global variables
global prices_df
global nn_model
global nn_future_predictions


In [None]:
# Cell 3: Print statement to indicate the start
print("in neural network")


In [None]:
# Cell 4: Function to create sequences of data
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data)-seq_length-1):
        x = data[i:(i+seq_length), 0]
        y = data[i+seq_length, 0]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)


In [None]:
# Cell 5: Load data and extract closing prices
prices_df = pd.read_csv('../data/cleaned_data/BTC_cleaned.csv', index_col='Date', parse_dates=True)  # Update this path based on the selected cryptocurrency
close = prices_df['Close'].values.reshape(-1,1)
scaler = MinMaxScaler(feature_range=(0,1))
scaled_close = scaler.fit_transform(close)


In [None]:
# Cell 6: Create sequences based on number of days
seq_length = 60
X, y = create_sequences(scaled_close, seq_length)


In [None]:
# Cell 7: Split into training and testing data
split = int(len(X) * .80)
X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]


In [None]:
# Cell 8: Reshape data for LSTM input [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))


In [None]:
# Cell 9: Define the LSTM model with 50 neurons
nn_model = Sequential()
nn_model.add(LSTM(units=50, return_sequences=False, input_shape=(X_train.shape[1], 1)))
nn_model.add(Dense(units=1))


In [None]:
# Cell 10: Compile the model
nn_model.compile(optimizer='adam', loss='mean_squared_error')


In [None]:
# Cell 11: Print the model summary
print(nn_model.summary())


In [None]:
# Cell 12: Train the model
nn_model.fit(X_train, y_train, epochs=50, batch_size=32)


In [None]:
# Cell 13: Evaluate the model using the testing data
nn_train_loss = nn_model.evaluate(X_train, y_train, verbose=0)
nn_test_loss = nn_model.evaluate(X_test, y_test, verbose=0)
print(f'Train Loss: {nn_train_loss:.4f}')
print(f'Test Loss: {nn_test_loss:.4f}')


In [None]:
# Cell 14: Predict using Historic Data (Backtest)
y_pred = nn_model.predict(X_test)


In [None]:
# Cell 15: Inverse transform the predictions (to get actual prices)
y_pred = scaler.inverse_transform(y_pred)


In [None]:
# Cell 16: Inverse transform the actual values for comparison
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))


In [None]:
# Cell 17: Compare predictions vs actual values
for i in range(10):
    print(f'Predicted: {y_pred[-(10-i)][0]:.2f}, Actual: {y_test_inv[-(10-i)][0]:.2f}')


In [None]:
#recall seq_length = 60, set above
# predict for 30 days
X_future = []
start_index = len(scaled_close) - seq_length
for i in range(start_index, start_index + 30):
    seq = scaled_close[i-seq_length:i,0]
    X_future.append(seq)

In [None]:
# Cell 18: Predict the Future
X_future = []
start_index = len(scaled_close) - seq_length
for i in range(start_index, start_index + 30):
    seq = scaled_close[i-seq_length:i,0]
    X_future.append(seq)


In [None]:
# Cell 19: Convert X_future to numpy array and reshape for LSTM input
X_future = np.array(X_future)
X_future = np.reshape(X_future, (X_future.shape[0], X_future.shape[1], 1))


In [None]:
# Cell 20: Predict future prices
nn_future_predictions = nn_model.predict(X_future)


In [None]:
# Cell 21: Inverse transform the predictions to get actual prices
nn_future_predictions = scaler.inverse_transform(nn_future_predictions)


In [None]:
# Cell 22: Derive future dates
last_date = prices_df.index[-1]
next_30_days = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
nn_dates_future = next_30_days.values


In [None]:
# Cell 23: Print future predictions
print("nn_future_predictions:")
print(nn_future_predictions)


In [None]:
# Cell 24: Plotting historical and predicted prices
plt.figure(figsize=(12, 6))
plt.plot(prices_df['Close'], label='Historical Prices')
plt.plot(nn_dates_future, nn_future_predictions, label='Predicted Prices', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Historical and Predicted Stock Prices using LSTM Neural Network')
plt.legend()
plt.savefig('nn_predict.png')
plt.show()


In [None]:
# Cell 25: Save future predictions to the output_predictions folder
output_path = '../results/output_predictions/BTC_future_predictions.csv'
nn_future_predictions_df = pd.DataFrame({
    'Date': nn_dates_future,
    'Predicted Price': nn_future_predictions.flatten()
})
nn_future_predictions_df.to_csv(output_path, index=False)
print(f"Future predictions saved to {output_path}")
