# Stock Price Prediction Analysis

This notebook demonstrates stock price prediction using machine learning.

## Steps:
1. Data Collection
2. Exploratory Data Analysis
3. Feature Engineering
4. Model Training
5. Evaluation
6. Predictions

In [None]:
# Import libraries
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from src.data_collector import StockDataCollector
from src.feature_engineering import FeatureEngineer
from src.models import LinearRegressionModel, RandomForestModel, XGBoostModel
from src.visualizer import StockVisualizer

# Set display options
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

## 1. Data Collection

Fetch historical stock data from Yahoo Finance.

In [None]:
# Initialize data collector
collector = StockDataCollector(data_dir='../data')

# Fetch stock data
TICKER = 'AAPL'  # Change this to any stock ticker
PERIOD = '5y'     # 5 years of data

df = collector.fetch_stock_data(TICKER, period=PERIOD)
print(f"Data shape: {df.shape}")
df.head()

In [None]:
# Get stock information
info = collector.get_stock_info(TICKER)
for key, value in info.items():
    print(f"{key}: {value}")

## 2. Exploratory Data Analysis

In [None]:
# Basic statistics
df.describe()

In [None]:
# Initialize visualizer
viz = StockVisualizer()

# Plot stock price
viz.plot_stock_price(df, TICKER)

In [None]:
# Check for missing values
print("Missing values:")
print(df.isnull().sum())

In [None]:
# Price distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].hist(df['Close'], bins=50, edgecolor='black', alpha=0.7)
axes[0].set_title('Close Price Distribution')
axes[0].set_xlabel('Price ($)')
axes[0].set_ylabel('Frequency')

axes[1].hist(df['Volume'], bins=50, edgecolor='black', alpha=0.7, color='green')
axes[1].set_title('Volume Distribution')
axes[1].set_xlabel('Volume')
axes[1].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

## 3. Feature Engineering

Create technical indicators and features.

In [None]:
# Create features
engineer = FeatureEngineer(df)
engineer.add_all_features()
features_df = engineer.get_feature_dataframe()

print(f"Features shape: {features_df.shape}")
print(f"\nFeature names: {engineer.get_feature_names()}")

In [None]:
# Display features
features_df.tail()

In [None]:
# Visualize technical indicators
viz.plot_technical_indicators(features_df.tail(252), TICKER)  # Last year

In [None]:
# Correlation matrix
viz.plot_correlation_matrix(features_df)

## 4. Model Training

Train multiple machine learning models.

In [None]:
# Get feature names
feature_names = engineer.get_feature_names()
print(f"Number of features: {len(feature_names)}")

### Linear Regression

In [None]:
# Train Linear Regression
lr_model = LinearRegressionModel(model_dir='../models')
X_train, X_test, y_train, y_test = lr_model.prepare_data(features_df, feature_names)

lr_model.train(X_train, y_train)
lr_pred = lr_model.predict(X_test)
lr_metrics = lr_model.evaluate(y_test, lr_pred)

print("Linear Regression Metrics:")
for key, value in lr_metrics.items():
    print(f"  {key}: {value:.4f}")

### Random Forest

In [None]:
# Train Random Forest
rf_model = RandomForestModel(n_estimators=100, max_depth=15, model_dir='../models')
X_train, X_test, y_train, y_test = rf_model.prepare_data(features_df, feature_names)

rf_model.train(X_train, y_train)
rf_pred = rf_model.predict(X_test)
rf_metrics = rf_model.evaluate(y_test, rf_pred)

print("Random Forest Metrics:")
for key, value in rf_metrics.items():
    print(f"  {key}: {value:.4f}")

In [None]:
# Feature importance
importance = rf_model.get_feature_importance()
viz.plot_feature_importance(importance, top_n=20)

### XGBoost

In [None]:
# Train XGBoost
xgb_model = XGBoostModel(n_estimators=100, learning_rate=0.1, max_depth=6, model_dir='../models')
X_train, X_test, y_train, y_test = xgb_model.prepare_data(features_df, feature_names)

xgb_model.train(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)
xgb_metrics = xgb_model.evaluate(y_test, xgb_pred)

print("XGBoost Metrics:")
for key, value in xgb_metrics.items():
    print(f"  {key}: {value:.4f}")

## 5. Model Comparison

In [None]:
# Compare all models
results = {
    'Linear Regression': lr_metrics,
    'Random Forest': rf_metrics,
    'XGBoost': xgb_metrics
}

comparison_df = pd.DataFrame(results).T
print("\nModel Comparison:")
print(comparison_df)

# Best model
best_model = comparison_df['R2'].idxmax()
print(f"\nüèÜ Best Model: {best_model} (R¬≤ = {comparison_df.loc[best_model, 'R2']:.4f})")

In [None]:
# Visualize comparison
viz.plot_model_comparison(results, TICKER)

## 6. Visualization of Predictions

In [None]:
# Visualize predictions
viz.plot_predictions(y_test, lr_pred, TICKER, 'Linear Regression')

In [None]:
viz.plot_predictions(y_test, rf_pred, TICKER, 'Random Forest')

In [None]:
viz.plot_predictions(y_test, xgb_pred, TICKER, 'XGBoost')

## 7. Future Predictions

Make predictions for future dates using the best model.

In [None]:
# Use the best model (XGBoost in most cases)
# Get the latest data point
latest_features = features_df[feature_names].iloc[-1:].values
latest_scaled = xgb_model.scaler.transform(latest_features)
next_day_prediction = xgb_model.predict(latest_scaled)

print(f"Current Price: ${features_df['Close'].iloc[-1]:.2f}")
print(f"Predicted Next Day Price: ${next_day_prediction[0]:.2f}")
print(f"Expected Change: ${next_day_prediction[0] - features_df['Close'].iloc[-1]:.2f}")
print(f"Expected Change %: {((next_day_prediction[0] - features_df['Close'].iloc[-1]) / features_df['Close'].iloc[-1] * 100):.2f}%")

## Conclusion

This notebook demonstrated:
- Data collection from Yahoo Finance
- Feature engineering with technical indicators
- Training multiple ML models
- Model evaluation and comparison
- Making predictions

**Remember**: Stock prediction is inherently uncertain. Use this for educational purposes only!