# Model Comparison for Stock Market Prediction

This notebook evaluates various AI/ML models for stock market predictions using Microsoft stock data from Yahoo Finance. The models compared include:
- Linear Regression
- XGBoost
- Random Forest
- LSTM
- Transformer-based Time Series Model

Each model will be trained on different time periods: 3 months, 6 months, 12 months, 24 months, and 36 months.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Import custom modules
from src.data.data_loader import load_data
from src.data.data_preprocessor import preprocess_data
from src.models.linear_regression import LinearRegressionModel
from src.models.xgboost_model import XGBoostModel
from src.models.random_forest import RandomForestModel
from src.models.lstm_model import LSTMModel
from src.models.transformer_model import TransformerModel
from src.evaluation.metrics import calculate_metrics
from src.evaluation.visualizations import plot_results
from src.training.time_period_trainer import train_models_on_time_periods

# Load and preprocess data
data = load_data('MSFT')  # Load Microsoft stock data
processed_data = preprocess_data(data)  # Preprocess the data

# Define time periods for training
time_periods = [3, 6, 12, 24, 36]  # in months

# Initialize models
models = {
    'Linear Regression': LinearRegressionModel(),
    'XGBoost': XGBoostModel(),
    'Random Forest': RandomForestModel(),
    'LSTM': LSTMModel(),
    'Transformer': TransformerModel()
}

# Train models on different time periods and evaluate
results = {}
for period in time_periods:
    print(f'Training models for {period} months...')
    results[period] = train_models_on_time_periods(processed_data, models, period)

# Evaluate and visualize results
for period, metrics in results.items():
    print(f'Performance metrics for {period} months:')
    for model_name, metric in metrics.items():
        print(f'{model_name}: {metric}')
        plot_results(metric['predictions'], metric['actual'], model_name, period)
