# lstm_neural_network-checkpoint.ipynb

## Notebook Purpose
This notebook is designed to build, train, and evaluate an LSTM (Long Short-Term Memory) neural network model for predicting cryptocurrency prices. The notebook will preprocess the data, create sequences for the LSTM model, train the model, evaluate its performance, and make future predictions.

## Instructions
1. **Import Necessary Libraries**:
   - Import `pandas` for data manipulation.
   - Import `numpy` for numerical operations.
   - Import `MinMaxScaler` from `sklearn` for data normalization.
   - Import `Sequential`, `LSTM`, and `Dense` from `tensorflow.keras` for building the neural network.
   - Import `matplotlib` for plotting.

2. **Load Preprocessed Data**:
   - Load the preprocessed CSV file created in the first notebook.

3. **Preprocess Data**:
   - Create sequences of data for training the LSTM model.
   - Split the data into training and testing sets.

4. **Build and Train LSTM Model**:
   - Define the architecture of the LSTM model.
   - Compile and train the model.

5. **Evaluate Model Performance**:
   - Evaluate the model using the testing data.
   - Print the training and testing loss.

6. **Make Predictions**:
   - Use the trained model to make predictions on the testing data.
   - Inverse transform the predictions and actual values for comparison.

7. **Future Predictions**:
   - Use the trained model to make future price predictions.
   - Plot the historical prices along with the predicted future prices.

## Example Code
```python
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import warnings
warnings.filterwarnings("ignore")

# Load preprocessed data
data_path = 'data/cleaned_data/btc_cleaned.csv'  # Update this path based on the selected cryptocurrency
prices_df = pd.read_csv(data_path, index_col='Date', parse_dates=True)

# Preprocess data
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data)-seq_length-1):
        x = data[i:(i+seq_length), 0]
        y = data[i+seq_length, 0]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

close = prices_df['Close'].values.reshape(-1,1)
scaler = MinMaxScaler(feature_range=(0,1))
scaled_close = scaler.fit_transform(close)
seq_length = 60
X, y = create_sequences(scaled_close, seq_length)
split = int(len(X) * .80)
X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Build LSTM model
nn_model = Sequential()
nn_model.add(LSTM(units=50, return_sequences=False, input_shape=(X_train.shape[1], 1)))
nn_model.add(Dense(units=1))
nn_model.compile(optimizer='adam', loss='mean_squared_error')
print(nn_model.summary())

# Train the model
nn_model.fit(X_train, y_train, epochs=50, batch_size=32)

# Evaluate the model
nn_train_loss = nn_model.evaluate(X_train, y_train, verbose=0)
nn_test_loss = nn_model.evaluate(X_test, y_test, verbose=0)
print(f'Train Loss: {nn_train_loss:.4f}')
print(f'Test Loss: {nn_test_loss:.4f}')

# Predict using Historic Data (Backtest)
y_pred = nn_model.predict(X_test)
y_pred = scaler.inverse_transform(y_pred)
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))

# Compare predictions vs actual values
for i in range(10):
    print(f'Predicted: {y_pred[-(10-i)][0]:.2f}, Actual: {y_test_inv[-(10-i)][0]:.2f}')

# Predict the Future
X_future = []
start_index = len(scaled_close) - seq_length
for i in range(start_index, start_index + 30):
    seq = scaled_close[i-seq_length:i,0]
    X_future.append(seq)
X_future = np.array(X_future)
X_future = np.reshape(X_future, (X_future.shape[0], X_future.shape[1], 1))
nn_future_predictions = nn_model.predict(X_future)
nn_future_predictions = scaler.inverse_transform(nn_future_predictions)

# Plotting historical and predicted prices
last_date = prices_df.index[-1]
next_30_days = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
nn_dates_future = next_30_days.values
plt.figure(figsize=(12, 6))
plt.plot(prices_df['Close'], label='Historical Prices')
plt.plot(nn_dates_future, nn_future_predictions, label='Predicted Prices', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Historical and Predicted Stock Prices using LSTM Neural Network')
plt.legend()
plt.savefig('nn_predict.png')
plt.show()


In [None]:
# Cell 1: Initial imports
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# Keras modules
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

global prices_df
global nn_model
global nn_future_predictions


In [None]:
# Cell 2: Print message indicating start of neural network processing
print("in neural network")


In [None]:
# Cell 3: Function to create sequences of data
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data)-seq_length-1):
        x = data[i:(i+seq_length), 0]
        y = data[i+seq_length, 0]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)


In [None]:
# Cell 4: Extract closing prices
close = prices_df['Close'].values.reshape(-1,1)
scaler = MinMaxScaler(feature_range=(0,1))
scaled_close = scaler.fit_transform(close)


In [None]:
# Cell 5: Create a sequence based on number of days
seq_length = 60
X, y = create_sequences(scaled_close, seq_length)


In [None]:
# Cell 6: Split into training and testing data (80% training, 20% testing)
split = int(len(X) * .80)
X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]


In [None]:
# Cell 7: Reshape data for LSTM input [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))


In [None]:
# Cell 8: Define the LSTM model with 50 neurons
nn_model = Sequential()
nn_model.add(LSTM(units=50, return_sequences=False, input_shape=(X_train.shape[1], 1)))
nn_model.add(Dense(units=1))


In [None]:
# Cell 9: Compile the model
nn_model.compile(optimizer='adam', loss='mean_squared_error')


In [None]:
# Cell 10: Print the model summary
print(nn_model.summary())


In [None]:
# Cell 11: Train and Evaluate
# Train the model
nn_model.fit(X_train, y_train, epochs=50, batch_size=32)


In [None]:
# Cell 12: Evaluate the model using the testing data
nn_train_loss = nn_model.evaluate(X_train, y_train, verbose=0)
nn_test_loss = nn_model.evaluate(X_test, y_test, verbose=0)
print(f'Train Loss: {nn_train_loss:.4f}')
print(f'Test Loss: {nn_test_loss:.4f}')


In [None]:
# Cell 13: Predict using Historic Data (Backtest)
# Predictions
y_pred = nn_model.predict(X_test)


In [None]:
# Cell 14: Inverse transform the predictions (to get actual prices)
y_pred = scaler.inverse_transform(y_pred)


In [None]:
# Cell 15: Inverse transform the actual values for comparison
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))


In [None]:
# Cell 16: Compare predictions vs actual values
# Example: print the last 10 predictions and actual values
for i in range(10):
    print(f'Predicted: {y_pred[-(10-i)][0]:.2f}, Actual: {y_test_inv[-(10-i)][0]:.2f}')


In [None]:
# Cell 17: Predict the Future
# Recall seq_length = 60, set above
# Predict for 30 days
X_future = []
start_index = len(scaled_close) - seq_length
for i in range(start_index, start_index + 30):
    seq = scaled_close[i-seq_length:i,0]
    X_future.append(seq)


In [None]:
# Cell 18: Convert X_future to numpy array and reshape for LSTM input [samples, time steps, features]
X_future = np.array(X_future)
X_future = np.reshape(X_future, (X_future.shape[0], X_future.shape[1], 1))


In [None]:
# Cell 19: Predict future prices
nn_future_predictions = nn_model.predict(X_future)


In [None]:
# Cell 20: Inverse transform the predictions to get actual prices
nn_future_predictions = scaler.inverse_transform(nn_future_predictions)


In [None]:
# Cell 21: Derive future dates
last_date = prices_df.index[-1]
# Add 1 day, then derive next 30 days
next_30_days = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
# Convert to an array
nn_dates_future = next_30_days.values


In [None]:
# Cell 22: Print future predictions
print("nn_future_predictions:")
print(nn_future_predictions)


In [None]:
# Cell 23: Plotting historical and predicted prices
plt.figure(figsize=(12, 6))
plt.plot(prices_df['Close'], label='Historical Prices')
plt.plot(nn_dates_future, nn_future_predictions, label='Predicted Prices', linestyle='--')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Historical and Predicted Stock Prices using LSTM Neural Network')
plt.legend()
plt.savefig('../results/output_predictions/nn_predict.png')
plt.show()
