# lstm_nn_predict-checkpoint.ipynb

## Notebook Purpose
This notebook uses the trained LSTM model to predict cryptocurrency prices for future days. It loads the trained model and test data, generates predictions, and saves the predicted prices.

## Instructions
1. **Import Necessary Libraries**:
   - Import `pandas` for data manipulation.
   - Import `joblib` to load the trained model.
   - Import `MinMaxScaler` from `sklearn` for data normalization.
   - Import `matplotlib` for visualization.

2. **Load Test Data and Model**:
   - Load the test data and the trained LSTM model.

3. **Generate Predictions**:
   - Use the model to predict future prices for 1, 3, 5, 7, 14, 21, and 30 days.

4. **Save Predictions**:
   - Save the generated predictions to a CSV file.

5. **Review Predictions**:
   - Display the predictions to verify they are as expected.

## Example Code
```python
# Import necessary libraries
import pandas as pd
import joblib
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

# Load test data and model
data_path = 'data/historical_data/btc_usd_test.csv'  # Update this path based on the selected cryptocurrency
data = pd.read_csv(data_path, parse_dates=['Date'], index_col='Date')
model = joblib.load('models/trained_lstm_model.pkl')

# Generate predictions
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[['Close']])
future_predictions = model.predict(scaled_data)

# Inverse transform the predictions
future_predictions = scaler.inverse_transform(future_predictions)

# Save predictions
predictions_df = pd.DataFrame(future_predictions, index=data.index, columns=['Predicted'])
predictions_df.to_csv('results/lstm_nn_predictions.csv')

# Display predictions
predictions_df.head()

# Plot predictions
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['Close'], label='Actual Prices')
plt.plot(predictions_df.index, predictions_df['Predicted'], label='Predicted Prices')
plt.title('LSTM Model Predictions vs Actual Prices')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.show()


In [1]:
# Cell 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import joblib
import warnings
import os
warnings.filterwarnings("ignore")

global crypto_data
global nn_models


In [6]:
# Cell 2: Load preprocessed data and models
cryptos = ['BTC', 'ETH', 'SOL']
crypto_data = {}
nn_models = {}

data_dir = '../data/cleaned_data'
model_dir = '../models'

# Print files in the directories to verify their presence
print(f"Files in {data_dir} directory: {os.listdir(data_dir)}")
print(f"Files in {model_dir} directory: {os.listdir(model_dir)}")

for crypto in cryptos:
    data_path = f'{data_dir}/{crypto}_cleaned.csv'
    model_path = f'{model_dir}/{crypto}_lstm_model.h5'
    scaler_path = f'{model_dir}/{crypto}_scaler.pkl'
    
    if os.path.exists(data_path) and os.path.exists(model_path) and os.path.exists(scaler_path):
        crypto_data[crypto] = pd.read_csv(data_path, index_col='Date', parse_dates=True)
        nn_models[crypto] = joblib.load(model_path)
        crypto_data[crypto]['scaler'] = joblib.load(scaler_path)
        
        # Debugging: Verify scaler type
        scaler = crypto_data[crypto]['scaler']
        print(f"{crypto} scaler type: {type(scaler)}")
        
        print(f"{crypto} data and model loaded successfully")
    else:
        print(f"File not found: {data_path} or {model_path} or {scaler_path}")

if not crypto_data:
    raise FileNotFoundError("No cryptocurrency data or models found. Please ensure data preparation and model training steps are completed.")


FileNotFoundError: [Errno 2] No such file or directory: '../data/cleaned_data'

In [None]:
# Cell 3: Preprocess data and generate predictions
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length - 1):
        x = data[i:(i + seq_length), 0]
        y = data[i + seq_length, 0]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

for crypto, df in crypto_data.items():
    scaler = crypto_data[crypto]['scaler']
    
    # Ensure the scaler is of the correct type
    if not isinstance(scaler, MinMaxScaler):
        raise TypeError(f"Scaler for {crypto} is not a MinMaxScaler object")
    
    close_values = df['Close'].values.reshape(-1, 1)  # Ensure it's a 2D array for the scaler
    scaled_data = scaler.transform(close_values)  # Using transform to ensure scaling
    
    seq_length = 60
    X, y = create_sequences(scaled_data, seq_length)
    split = int(len(X) * 0.80)
    X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]
    
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    crypto_data[crypto].update({
        'X_train': X_train,
        'X_test': X_test,
        'y_train': y_train,
        'y_test': y_test,
        'scaled_data': scaled_data
    })
    
    model = nn_models[crypto]
    future_predictions = model.predict(X_test)
    future_predictions = scaler.inverse_transform(future_predictions)
    crypto_data[crypto]['future_predictions'] = future_predictions

print("Predictions generated for all cryptocurrencies.")


In [None]:
# Cell 4: Load the trained LSTM model
model_path = 'models/trained_lstm_model.pkl'  # Update this path based on the saved model
nn_model = joblib.load(model_path)
print("Trained LSTM model loaded successfully")


In [None]:
# Cell 5: Prepare the data for prediction
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[['Close']])


In [None]:
# Cell 6: Generate predictions for 1, 3, 5, 7, 14, 21, and 30 days
prediction_days = [1, 3, 5, 7, 14, 21, 30]
predictions = {}

for days in prediction_days:
    X_future = []
    start_index = len(scaled_data) - days
    for i in range(start_index, start_index + days):
        seq = scaled_data[i - days:i, 0]
        X_future.append(seq)
    X_future = np.array(X_future)
    X_future = np.reshape(X_future, (X_future.shape[0], X_future.shape[1], 1))
    future_predictions = nn_model.predict(X_future)
    future_predictions = scaler.inverse_transform(future_predictions)
    predictions[days] = future_predictions
    print(f"Predictions for {days} days generated successfully")


In [None]:
# Cell 7: Save predictions to CSV files
for days, preds in predictions.items():
    predictions_df = pd.DataFrame(preds, index=data.index[-days:], columns=['Predicted'])
    predictions_df.to_csv(f'results/lstm_nn_predictions_{days}_days.csv')
    print(f"Predictions for {days} days saved to 'results/lstm_nn_predictions_{days}_days.csv'")


In [None]:
# Cell 8: Display a sample of predictions
sample_days = 5  # Number of sample days to display
sample_predictions = predictions[sample_days]
print(f"Sample predictions for {sample_days} days:")
print(sample_predictions[:sample_days])


In [None]:
# Cell 9: Plot predictions vs actual prices
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['Close'], label='Actual Prices')
for days, preds in predictions.items():
    plt.plot(data.index[-days:], preds, label=f'Predicted Prices ({days} days)')
plt.title('LSTM Model Predictions vs Actual Prices')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.show()
