# lstm_nn_predict-checkpoint.ipynb

## Notebook Purpose
This notebook uses the trained LSTM model to predict cryptocurrency prices for future days. It loads the trained model and test data, generates predictions, and saves the predicted prices.

## Instructions
1. **Import Necessary Libraries**:
   - Import `pandas` for data manipulation.
   - Import `joblib` to load the trained model.
   - Import `MinMaxScaler` from `sklearn` for data normalization.
   - Import `matplotlib` for visualization.

2. **Load Test Data and Model**:
   - Load the test data and the trained LSTM model.

3. **Generate Predictions**:
   - Use the model to predict future prices for 1, 3, 5, 7, 14, 21, and 30 days.

4. **Save Predictions**:
   - Save the generated predictions to a CSV file.

5. **Review Predictions**:
   - Display the predictions to verify they are as expected.

## Example Code
```python
# Import necessary libraries
import pandas as pd
import joblib
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

# Load test data and model
data_path = 'data/historical_data/btc_usd_test.csv'  # Update this path based on the selected cryptocurrency
data = pd.read_csv(data_path, parse_dates=['Date'], index_col='Date')
model = joblib.load('models/trained_lstm_model.pkl')

# Generate predictions
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[['Close']])
future_predictions = model.predict(scaled_data)

# Inverse transform the predictions
future_predictions = scaler.inverse_transform(future_predictions)

# Save predictions
predictions_df = pd.DataFrame(future_predictions, index=data.index, columns=['Predicted'])
predictions_df.to_csv('results/lstm_nn_predictions.csv')

# Display predictions
predictions_df.head()

# Plot predictions
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['Close'], label='Actual Prices')
plt.plot(predictions_df.index, predictions_df['Predicted'], label='Predicted Prices')
plt.title('LSTM Model Predictions vs Actual Prices')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.show()


In [None]:
# Cell 1: Import necessary libraries and set working directory
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import joblib
import os
from tensorflow.keras.models import load_model

# Define global variables
global crypto_data, nn_models, nn_future_predictions

# Print the current working directory
print("Current working directory:", os.getcwd())

# Change to the root directory of the project
os.chdir('../../')
print("Changed working directory to:", os.getcwd())


In [None]:
# Cell 2: Load preprocessed data and models
cryptos = ['BTC', 'ETH', 'SOL']
crypto_data = {}
nn_models = {}

data_dir = 'data/cleaned_data'
model_dir = 'models'

# Print files in the directories to verify their presence
print(f"Files in {data_dir} directory: {os.listdir(data_dir)}")
print(f"Files in {model_dir} directory: {os.listdir(model_dir)}")

for crypto in cryptos:
    # Paths to data, model, and scaler
    data_path = os.path.join(data_dir, f"{crypto}_cleaned.csv")
    model_path = os.path.join(model_dir, f"{crypto}_lstm_model.h5")
    scaler_path = os.path.join(model_dir, f"{crypto}_scaler.pkl")
    
    if os.path.exists(data_path) and os.path.exists(model_path) and os.path.exists(scaler_path):
        crypto_data[crypto] = pd.read_csv(data_path, index_col='Date', parse_dates=True)
        nn_models[crypto] = load_model(model_path)
        
        # Load scaler correctly
        crypto_data[crypto]['scaler'] = joblib.load(scaler_path)
        
        # Verify the scaler type
        scaler = crypto_data[crypto]['scaler']
        if isinstance(scaler, MinMaxScaler):
            print(f"Scaler for {crypto} is a MinMaxScaler object")
        else:
            print(f"Scaler for {crypto} is not a MinMaxScaler object")
        
        print(f"{crypto} data and model loaded successfully")
    else:
        print(f"File not found: {data_path} or {model_path} or {scaler_path}")

if not crypto_data:
    raise FileNotFoundError("No cryptocurrency data or models found. Please ensure data preparation and model training steps are completed.")


In [None]:
# Cell 3: Preprocess data and generate predictions
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data)-seq_length-1):
        x = data[i:(i+seq_length), 0]
        y = data[i+seq_length, 0]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

for crypto, df in crypto_data.items():
    scaler = crypto_data[crypto]['scaler']
    close_values = df[['Close']].values  # Use double brackets to ensure 2D array
    scaled_data = scaler.transform(close_values)  # Using transform to ensure scaling

    seq_length = 60
    X, y = create_sequences(scaled_data, seq_length)
    split = int(len(X) * .80)
    X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    crypto_data[crypto] = {
        'X_train': X_train,
        'X_test': X_test,
        'y_train': y_train,
        'y_test': y_test,
        'scaler': scaler,
        'scaled_close': scaled_data
    }

print("Data preprocessing and sequence generation completed for all cryptocurrencies.")


In [None]:
# Cell 4: Define and train LSTM models
nn_models = {}

for crypto, data in crypto_data.items():
    X_train = data['X_train']
    y_train = data['y_train']
    
    # Define the LSTM model with 50 neurons
    nn_model = Sequential()
    nn_model.add(LSTM(units=50, return_sequences=False, input_shape=(X_train.shape[1], 1)))
    nn_model.add(Dense(units=1))
    
    # Compile the model
    nn_model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Print the model summary
    print(f"{crypto} LSTM model summary:")
    print(nn_model.summary())
    
    # Train the model
    nn_model.fit(X_train, y_train, epochs=50, batch_size=32)
    
    nn_models[crypto] = nn_model

print("Models trained for all cryptocurrencies.")


In [None]:
# Cell 5: Evaluate the LSTM models
for crypto, model in nn_models.items():
    X_train = crypto_data[crypto]['X_train']
    y_train = crypto_data[crypto]['y_train']
    X_test = crypto_data[crypto]['X_test']
    y_test = crypto_data[crypto]['y_test']
    
    nn_train_loss = model.evaluate(X_train, y_train, verbose=0)
    nn_test_loss = model.evaluate(X_test, y_test, verbose=0)
    
    print(f'{crypto} - Train Loss: {nn_train_loss:.4f}, Test Loss: {nn_test_loss:.4f}')


In [None]:
# Cell 6: Predict using historical data (backtest)
for crypto, model in nn_models.items():
    X_test = crypto_data[crypto]['X_test']
    scaler = crypto_data[crypto]['scaler']
    
    y_pred = model.predict(X_test)
    y_pred = scaler.inverse_transform(y_pred)
    y_test_inv = scaler.inverse_transform(crypto_data[crypto]['y_test'].reshape(-1, 1))
    
    # Compare predictions vs actual values
    print(f"{crypto} - Predictions vs Actual values:")
    for i in range(10):
        print(f'Predicted: {y_pred[-(10-i)][0]:.2f}, Actual: {y_test_inv[-(10-i)][0]:.2f}')


In [None]:
# Cell 7: Predict future prices
nn_future_predictions = {}

for crypto, model in nn_models.items():
    scaled_close = crypto_data[crypto]['scaled_close']
    scaler = crypto_data[crypto]['scaler']
    
    X_future = []
    seq_length = 60
    start_index = len(scaled_close) - seq_length
    for i in range(start_index, start_index + 30):
        seq = scaled_close[i-seq_length:i, 0]
        X_future.append(seq)
    
    X_future = np.array(X_future)
    X_future = np.reshape(X_future, (X_future.shape[0], X_future.shape[1], 1))
    
    future_predictions = model.predict(X_future)
    future_predictions = scaler.inverse_transform(future_predictions)
    
    nn_future_predictions[crypto] = future_predictions

print("Future predictions generated for all cryptocurrencies.")


In [None]:
# Cell 8: Plot historical and predicted prices
for crypto, future_predictions in nn_future_predictions.items():
    original_df = pd.read_csv(f'data/cleaned_data/{crypto}_cleaned.csv', index_col='Date', parse_dates=True)
    
    last_date = original_df.index[-1]
    next_30_days = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)
    nn_dates_future = next_30_days.values
    
    plt.figure(figsize=(12, 6))
    plt.plot(original_df['Close'], label='Historical Prices')
    plt.plot(nn_dates_future, future_predictions, label='Predicted Prices', linestyle='--')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.title(f'Historical and Predicted Prices for {crypto} using LSTM Neural Network')
    plt.legend()
    plt.savefig(f'results/output_predictions/{crypto}_nn_predict.png')
    plt.show()
