<a href="https://colab.research.google.com/github/Keyur1028/Profnitt-Task/blob/main/ML_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance ta seaborn

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from ta.volatility import BollingerBands
from ta.trend import MACD, SMAIndicator, EMAIndicator
from ta.momentum import RSIIndicator
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import pickle


In [None]:
import yfinance as yf
import pandas as pd

# Data download, change period and stock
Stock = input("Enter stock symbol (e.g., AAPL): ")
ticker = Stock

df = yf.download(ticker, period="1y", interval="1d")
df.dropna()
df.head()

In [None]:
dfClose = df['Close']
dfClose = dfClose.transpose()
dfClose = dfClose.squeeze()

This was needed to convert the incoming data from 2D to a 1D vector

In [None]:
#Indicators


# Simple and Exponential Moving Average

df['SMA_10'] = SMAIndicator(close=dfClose, window=10).sma_indicator()
df['EMA_10'] = EMAIndicator(close=dfClose, window=10).ema_indicator()

# RSI
df['RSI'] = RSIIndicator(close=dfClose, window=14).rsi()

# MACD
macd = MACD(close=dfClose)
df['MACD'] = macd.macd_diff()

# Bollinger Band Width
bb = BollingerBands(close=dfClose)
df['BB_width'] = bb.bollinger_wband()

# Volume spike (as % change)
df['Volume_change'] = df['Volume'].pct_change()

n= int(input("Enter prediction period:"))

# Check if n is too large
if n >= len(df) - max(10, 14): # 10 and 14 are the largest window sizes for SMA and RSI
    print(f"Warning: Prediction period (n={n}) is too large for the available data after calculating indicators. Please enter a smaller value for n.")
    # You might want to exit here or handle this case differently depending on your needs
    # For now, we'll continue but the resulting df will be empty
else:
    # Target Variable: Rolling volatility over next n days (standard deviation of returns)
    df['Future_volatility'] = dfClose.pct_change().rolling(window=n).std().shift(-n)
    df = df.dropna()

df.tail()

The indicators this model is using is Moving avarages,RSI, Brollinger bands and Volume spikes

In [None]:
#Correlation matrix
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title("Feature Correlation Matrix")
plt.show()


In [None]:
#Feature map
df[['SMA_10', 'EMA_10', 'RSI', 'MACD', 'BB_width', 'Volume_change']].hist(bins=30, figsize=(12, 8))
plt.suptitle("Feature Distributions")
plt.show()


In [None]:
#Model
features = ['SMA_10', 'EMA_10', 'RSI', 'MACD', 'BB_width', 'Volume_change']

# Check if df is empty before splitting
if not df.empty:
    X = df[features]
    y = df['Future_volatility']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    print(f'Mean Squared Error on Test Set: {mse:.6f}')
else:
    print("DataFrame is empty. Cannot train the model.")

In [None]:
# Save the model using pickle
with open('volatility_model.pkl', 'wb') as f:
    pickle.dump(model, f)


In [None]:
import pickle

with open("volatility_model.pkl", "rb") as f:
    model = pickle.load(f)

In [None]:
features = ['SMA_10', 'EMA_10', 'RSI', 'MACD', 'BB_width', 'Volume_change']
latest_input = df[features].iloc[-1:]  # shape (1, n_features)


In [None]:
dfClose = df['Close']
dfClose = dfClose.transpose()
dfClose = dfClose.squeeze()
# (Recalculate the indicators just before prediction)
df['SMA_10'] = SMAIndicator(close=dfClose, window=10).sma_indicator()
# ... repeat for EMA_10, RSI, MACD, etc.
df.dropna(inplace=True)  # to remove rows with NaNs


In [None]:
predicted_volatility = model.predict(latest_input)[0]
print(f"Predicted {n} -day volatility for {ticker}: {predicted_volatility:.6f}")
