In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt

file_path ="./AAPL_daily_5years.ipynb"

# Assuming your data is in a CSV file with columns like: Date, Open, High, Low, Close, Volume, etc.
def load_and_prepare_data(file_path):
    # Load the data
    df = pd.read_csv(file_path)

    # Convert date column to datetime if needed
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'])
        df.set_index('Date', inplace=True)

    # Calculate technical indicators
    # Moving averages
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    df['SMA_50'] = df['Close'].rolling(window=50).mean()

    # Bollinger Bands
    df['BB_middle'] = df['Close'].rolling(window=20).mean()
    df['BB_upper'] = df['BB_middle'] + 2*df['Close'].rolling(window=20).std()
    df['BB_lower'] = df['BB_middle'] - 2*df['Close'].rolling(window=20).std()

    # RSI
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    # Remove NaN values
    df.dropna(inplace=True)

    return df

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:(i + seq_length)])
        y.append(data[i + seq_length, 0])  # Predicting the Close price
    return np.array(X), np.array(y)

def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(25),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

def train_and_evaluate_model(file_path):
    # Load and prepare data
    df = load_and_prepare_data(file_path)

    # Select features for the model
    features = ['Close', 'SMA_20', 'SMA_50', 'BB_middle', 'BB_upper', 'BB_lower', 'RSI']
    data = df[features].values

    # Scale the data
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(data)

    # Create sequences for LSTM
    seq_length = 60  # Number of time steps to look back
    X, y = create_sequences(data_scaled, seq_length)

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Build and train the model
    model = build_lstm_model((X_train.shape[1], X_train.shape[2]))

    # Training
    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_split=0.1,
        verbose=1
    )

    # Make predictions
    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)

    # Evaluate the model
    train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
    test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))

    print(f'Train RMSE: {train_rmse:.2f}')
    print(f'Test RMSE: {test_rmse:.2f}')

    # Plot training history
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    return model, scaler, test_predictions, y_test

def predict_future_prices(model, scaler, last_sequence, num_days=30):
    future_predictions = []
    current_sequence = last_sequence.copy()

    for _ in range(num_days):
        # Get prediction for next day
        next_pred = model.predict(current_sequence.reshape(1, current_sequence.shape[0], current_sequence.shape[1]))
        future_predictions.append(next_pred[0])

        # Update sequence for next prediction
        current_sequence = np.roll(current_sequence, -1, axis=0)
        current_sequence[-1] = next_pred

    return np.array(future_predictions)

# Usage example
if __name__ == "__main__":
    file_path = "your_stock_data.csv"  # Replace with your data file path

    # Train the model
    model, scaler, test_predictions, y_test = train_and_evaluate_model(file_path)

    # Plot test predictions vs actual values
    plt.figure(figsize=(12, 6))
    plt.plot(y_test, label='Actual')
    plt.plot(test_predictions, label='Predicted')
    plt.title('Stock Price Prediction')
    plt.xlabel('Time')
    plt.ylabel('Price')
    plt.legend()
    plt.show()

    # Get the last sequence from your data to predict future values
    df = load_and_prepare_data(file_path)
    features = ['Close', 'SMA_20', 'SMA_50', 'BB_middle', 'BB_upper', 'BB_lower', 'RSI']
    data = df[features].values
    data_scaled = scaler.transform(data)
    last_sequence = data_scaled[-60:]  # Using last 60 days

    # Predict future prices
    future_pred = predict_future_prices(model, scaler, last_sequence)

    # Plot future predictions
    plt.figure(figsize=(12, 6))
    plt.plot(range(len(future_pred)), future_pred, label='Future Predictions')
    plt.title('Future Stock Price Predictions')
    plt.xlabel('Days')
    plt.ylabel('Price')
    plt.legend()
    plt.show()

# Created/Modified files during execution:
# - Training history plot
# - Test predictions vs actual values plot
# - Future predictions plot

In [None]:
%pip install tensorflow