In [4]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import os
import sys
import json
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from datetime import date
from ta.trend import MACD
from ta.momentum import  RSIIndicator
import yfinance as yf

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
def get_symbols():
    """
    Get stock symbols from a file.
    Returns:
    list: List of stock symbols.
    """
    path = r"E:\stock_price\data\NIFTY50_23April2025\MW-NIFTY-50-23-Apr-2025.csv"
    data_NIFTY = pd.read_csv(path)
    data_NIFTY.columns = data_NIFTY.columns.str.replace('\n', '')
    data_NIFTY.columns = data_NIFTY.columns.str.strip()
    symbols = data_NIFTY['SYMBOL'].unique().tolist()
    return symbols
symbols = get_symbols()

In [7]:
symbols

['NIFTY 50',
 'ITC',
 'HINDUNILVR',
 'HDFCBANK',
 'JIOFIN',
 'M&M',
 'TATACONSUM',
 'BEL',
 'KOTAKBANK',
 'JSWSTEEL',
 'ETERNAL',
 'CIPLA',
 'SBIN',
 'NESTLEIND',
 'ICICIBANK',
 'HCLTECH',
 'SUNPHARMA',
 'TITAN',
 'TATAMOTORS',
 'ADANIENT',
 'MARUTI',
 'ULTRACEMCO',
 'DRREDDY',
 'TECHM',
 'TCS',
 'COALINDIA',
 'EICHERMOT',
 'BAJFINANCE',
 'SHRIRAMFIN',
 'HINDALCO',
 'RELIANCE',
 'HDFCLIFE',
 'SBILIFE',
 'AXISBANK',
 'ASIANPAINT',
 'GRASIM',
 'ONGC',
 'LT',
 'TATASTEEL',
 'TRENT',
 'APOLLOHOSP',
 'NTPC',
 'ADANIPORTS',
 'BAJAJ-AUTO',
 'BAJAJFINSV',
 'BHARTIARTL',
 'WIPRO',
 'INFY',
 'HEROMOTOCO',
 'POWERGRID',
 'INDUSINDBK']

In [6]:
(datetime.now() - timedelta(days=364*3)).strftime('%Y-%m-%d')

'2022-05-02'

In [8]:
def get_stock_data(symbol : str, period = '1y', interval = '1d ', start=(datetime.now() - timedelta(days=364*2)).strftime('%Y-%m-%d'), end = datetime.now().strftime("%Y-%m-%d") ):
    """
    Fetch stock data from Yahoo Finance using yfinance library.
    Parameters:
    symbol (str): Stock symbol to fetch data for.
    """
    symbol = symbol + ".NS"
    stock = yf.Ticker(symbol)
    df = pd.DataFrame(stock.history(period=period, interval=interval, start=start, end=end))
    return df,stock


In [17]:
data, stock = get_stock_data(symbol=symbols[7])

In [15]:
stock.info.get('shortName', 'N/A')

'BHARAT ELECTRONICS LTD'

In [14]:
stock.info['debtToEquity']

0.344

In [8]:
data = get_stock_data(symbols[4])
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-04-21 00:00:00+05:30,250.0,250.0,244.5,246.380005,31786610,0.0,0.0
2025-04-22 00:00:00+05:30,245.5,251.339996,243.699997,250.559998,30235260,0.0,0.0
2025-04-23 00:00:00+05:30,252.0,258.549988,251.389999,258.140015,37568330,0.0,0.0
2025-04-24 00:00:00+05:30,258.0,259.799988,256.5,258.779999,17152000,0.0,0.0
2025-04-25 00:00:00+05:30,258.779999,261.619995,249.100006,253.050003,33348710,0.0,0.0


In [9]:
data.shape

(409, 7)

In [None]:
def plot_stock_data(df,):
    """
    Plot stock data using Plotly.
    Parameters:
    df (DataFrame): DataFrame containing stock data.
    """
    fig = px.line(df, x=df.index, y="Close Price", title=f"{symbols[4]} Stock Price")
    fig.update_layout(
        xaxis_rangeslider_thickness=0.1,
        xaxis_rangeslider_bgcolor='rgba(0, 0, 0, 0.1)',
        xaxis_rangeslider_bordercolor='rgba(0, 0, 0, 0.1)',   
    )
    return fig


plot_stock_data(data)

In [None]:
def plot_stock_high_low(df,
                        
                        ):
    """
    Plot high and low stock prices using Plotly.
    Parameters:
    df (DataFrame): DataFrame containing stock data.
    """
    fig = px.line(df, x=df.index, y=["High", "Low"], title=f"{symbols[4]} High and Low Prices")
    fig.update_layout(
        xaxis_rangeslider_thickness=0.1,
        xaxis_rangeslider_bgcolor='rgba(0, 0, 0, 0.1)',
        xaxis_rangeslider_bordercolor='rgba(0, 0, 0, 0.1)',   
    )
    return fig
    return fig

plot_stock_high_low(data)


In [None]:
def plot_stock_volume(df):
    """
    Plot stock volume using Plotly.
    Parameters:
    df (DataFrame): DataFrame containing stock data.
    """
    fig = px.line(df, x=df.index, y="Volume", title=f"{symbols[4]} Volume TraDED")
    fig.update_layout(
        xaxis_rangeslider_thickness=0.1,
        xaxis_rangeslider_bgcolor='rgba(0, 0, 0, 0.1)',
        xaxis_rangeslider_bordercolor='rgba(0, 0, 0, 0.1)',   
    )
    return fig
    return fig
plot_stock_volume(data)


## Moving Averages

In [20]:
def calculate_mathematical_term(df):
    """
    Calculate SMA, MACDa dn RSI for stock data.
    Parameters:
    df (DataFrame): DataFrame containing stock data.
    Returns:
    DataFrame: DataFrame with SMA(50 days and 200days), MACD and RSI columns added.
    """

    #SMA(50 days and 200days)
    sma_50 = df['Close'].rolling(window=50).mean()
    sma_200 = df['Close'].rolling(window=200).mean()
    sma_10 = df['Close'].rolling(window=10).mean()
    df['sma_50'] = sma_50
    df['sma_200'] = sma_200
    df['sma_10'] = sma_10

    #MACD
    macd = MACD(close=df['Close'])
    df['MACD'] = macd.macd()
    df['Signal_Line'] = macd.macd_signal()
    df['MACD_Diff'] = macd.macd_diff()

    #RSI
    rsi = RSIIndicator(close=df['Close'], window=14)
    df['RSI'] = rsi.rsi()

    return df

calculate_mathematical_term(data)

TypeError: tuple indices must be integers or slices, not str

In [14]:
data.isnull().sum().to_frame()

Unnamed: 0,0
Open,0
High,0
Low,0
Close,0
Volume,0
Dividends,0
Stock Splits,0
sma_50,49
sma_200,199
sma_10,9


In [None]:
def plot_sma(df):
    """
    Plot SMA using Plotly.
    Parameters:
    df (DataFrame): DataFrame containing stock data.
    """
    fig = px.line(df, x=df.index, y=["Close", "sma_50", "sma_200"], title=f"{symbols[4]} Stock Price with SMA")
    fig.update_layout(
        xaxis_rangeslider_thickness=0.1,
        xaxis_rangeslider_bgcolor='rgba(0, 0, 0, 0.1)',
        xaxis_rangeslider_bordercolor='rgba(0, 0, 0, 0.1)',   
    )
    return fig
plot_sma(data)

In [16]:
def plot_MACD(df):
    """
    Plot MACD using Plotly.
    Parameters:
    df (DataFrame): DataFrame containing stock data.
    """
    colors = np.where(df['MACD_Diff'] > 0, 'green', 'red')
    fig = go.Figure()

    fig.add_bar(
        x=df.index,
        y=df.MACD_Diff,
        name='MACD Difference',
        marker=dict(color=colors),
    )

    fig.add_trace(go.Scatter(
                x=df.index,
                y=df['MACD'],
                name='MACD',
                opacity=0.8,
                line=dict(color='blue') 
                ))

    fig.add_trace(go.Scatter(
        x=df.index,
        y=df['Signal_Line'],
        name='Signal Line',
        opacity=0.8,
        line=dict(color='grey')  # <-- Use this for line color
    ))

    fig.update_layout(
        title=f"{symbols[4]} MACD",
        xaxis_title="Date",
        yaxis_title="MACD Difference",
        xaxis_rangeslider_visible=True,
        xaxis_rangeslider_thickness=0.05,
        xaxis_rangeslider_bgcolor='rgba(0, 0, 0, 0.1)',
        xaxis_rangeslider_bordercolor='rgba(0, 0, 0, 0.1)',   
    )

    return fig


plot_MACD(data)

In [17]:
def plot_rsi(df):
    """
    Plot RSI using Plotly.
    Parameters:
    df (DataFrame): DataFrame containing stock data.
    """
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df.index,
        y=df['RSI'],
        name='RSI',
        line=dict(color='blue')
    ))

    fig.add_hline(y=70, line_color='red', line_dash='dash', annotation_text="Overbought", annotation_position="bottom right")
    fig.add_hline(y=30, line_color='green', line_dash='dash', annotation_text="Oversold", annotation_position="top right")

    fig.update_layout(
        title=f"{symbols[4]} RSI",
        xaxis_title="Date",
        yaxis_title="RSI",
        xaxis_rangeslider_visible=True,
        xaxis_rangeslider_thickness=0.05,
        xaxis_rangeslider_bgcolor='rgba(0, 0, 0, 0.1)',
        xaxis_rangeslider_bordercolor='rgba(0, 0, 0, 0.1)',   
    )

    return fig

plot_rsi(data)

In [18]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,sma_50,sma_200,sma_10,MACD,Signal_Line,MACD_Diff,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-08-21 00:00:00+05:30,262.0,262.049988,248.899994,248.899994,74715306,0.0,0.0,,,,,,,
2023-08-22 00:00:00+05:30,236.449997,236.449997,236.449997,236.449997,7281398,0.0,0.0,,,,,,,
2023-08-23 00:00:00+05:30,224.649994,224.649994,224.649994,224.649994,4289046,0.0,0.0,,,,,,,
2023-08-24 00:00:00+05:30,213.449997,213.449997,213.449997,213.449997,27673350,0.0,0.0,,,,,,,
2023-08-25 00:00:00+05:30,202.800003,224.100006,202.800003,214.5,281040100,0.0,0.0,,,,,,,


## Training

In [19]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

In [20]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,sma_50,sma_200,sma_10,MACD,Signal_Line,MACD_Diff,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-08-21 00:00:00+05:30,262.0,262.049988,248.899994,248.899994,74715306,0.0,0.0,,,,,,,
2023-08-22 00:00:00+05:30,236.449997,236.449997,236.449997,236.449997,7281398,0.0,0.0,,,,,,,
2023-08-23 00:00:00+05:30,224.649994,224.649994,224.649994,224.649994,4289046,0.0,0.0,,,,,,,
2023-08-24 00:00:00+05:30,213.449997,213.449997,213.449997,213.449997,27673350,0.0,0.0,,,,,,,
2023-08-25 00:00:00+05:30,202.800003,224.100006,202.800003,214.5,281040100,0.0,0.0,,,,,,,


In [21]:
data.isnull().sum()

Open              0
High              0
Low               0
Close             0
Volume            0
Dividends         0
Stock Splits      0
sma_50           49
sma_200         199
sma_10            9
MACD             25
Signal_Line      33
MACD_Diff        33
RSI              13
dtype: int64

In [22]:
def  time_train_test_split(data, test_size=0.2):
    """
    Split the data into training and testing sets.

    Returns:
    DataFrame: Training set.
    DataFrame: Testing set.
    """
    
    train_size = int(len(data) * (1 - test_size))
    train_data = data[:train_size]
    test_data = data[train_size:]
    
    return train_data, test_data


#

## PROPHET MODEL FORECASTING'

In [23]:
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


data_prophet = data.reset_index()
data_prophet = data_prophet.rename(columns={'Date': 'ds', 'Close': 'y'})
data_prophet = data_prophet[['ds', 'y']]
data_prophet.head()

Unnamed: 0,ds,y
0,2023-08-21 00:00:00+05:30,248.899994
1,2023-08-22 00:00:00+05:30,236.449997
2,2023-08-23 00:00:00+05:30,224.649994
3,2023-08-24 00:00:00+05:30,213.449997
4,2023-08-25 00:00:00+05:30,214.5


In [24]:
data_prophet.dtypes

ds    datetime64[ns, Asia/Kolkata]
y                          float64
dtype: object

In [25]:
data_prophet['ds'] = data_prophet['ds'].dt.tz_convert(None)

In [26]:
data_prophet.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 409 entries, 0 to 408
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ds      409 non-null    datetime64[ns]
 1   y       409 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 6.5 KB


In [27]:
data_prophet.describe()

Unnamed: 0,ds,y
count,409,409.0
mean,2024-06-22 01:32:29.633251840,290.723717
min,2023-08-20 18:30:00,200.919998
25%,2024-01-18 18:30:00,234.0
50%,2024-06-24 18:30:00,304.700012
75%,2024-11-25 18:30:00,343.799988
max,2025-04-24 18:30:00,387.950012
std,,54.905485


In [28]:
train_data, test_data = time_train_test_split(data_prophet, test_size=0.2)

In [29]:

model_prophet = Prophet()
model_prophet.fit(train_data)

12:20:04 - cmdstanpy - INFO - Chain [1] start processing
12:20:04 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x20c34dad150>

In [30]:
  # daily frequency
future = model_prophet.make_future_dataframe(periods=len(test_data))
forecast = model_prophet.predict(future)
forecast.tail()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
404,2025-03-11 18:30:00,317.04223,286.804791,340.75773,294.896313,339.074677,-3.58794,-3.58794,-3.58794,-3.58794,-3.58794,-3.58794,0.0,0.0,0.0,313.45429
405,2025-03-12 18:30:00,316.929271,286.937599,339.953002,294.473437,339.460278,-2.438835,-2.438835,-2.438835,-2.438835,-2.438835,-2.438835,0.0,0.0,0.0,314.490436
406,2025-03-13 18:30:00,316.816312,288.160839,343.183404,293.873203,339.827715,-1.414675,-1.414675,-1.414675,-1.414675,-1.414675,-1.414675,0.0,0.0,0.0,315.401636
407,2025-03-14 18:30:00,316.703352,294.667281,348.590491,293.319734,340.210052,5.060726,5.060726,5.060726,5.060726,5.060726,5.060726,0.0,0.0,0.0,321.764079
408,2025-03-15 18:30:00,316.590393,292.179464,347.308249,292.749638,340.363334,5.060728,5.060728,5.060728,5.060728,5.060728,5.060728,0.0,0.0,0.0,321.651121


In [31]:

future

Unnamed: 0,ds
0,2023-08-20 18:30:00
1,2023-08-21 18:30:00
2,2023-08-22 18:30:00
3,2023-08-23 18:30:00
4,2023-08-24 18:30:00
...,...
404,2025-03-11 18:30:00
405,2025-03-12 18:30:00
406,2025-03-13 18:30:00
407,2025-03-14 18:30:00


In [32]:
forecast.head()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2023-08-20 18:30:00,237.813357,223.067243,250.763633,237.813357,237.813357,-1.043256,-1.043256,-1.043256,-1.043256,-1.043256,-1.043256,0.0,0.0,0.0,236.770102
1,2023-08-21 18:30:00,237.616138,220.719771,250.262784,237.616138,237.616138,-1.636749,-1.636749,-1.636749,-1.636749,-1.636749,-1.636749,0.0,0.0,0.0,235.979389
2,2023-08-22 18:30:00,237.418919,218.974473,248.620997,237.418919,237.418919,-3.58794,-3.58794,-3.58794,-3.58794,-3.58794,-3.58794,0.0,0.0,0.0,233.830979
3,2023-08-23 18:30:00,237.2217,219.841972,249.340305,237.2217,237.2217,-2.438835,-2.438835,-2.438835,-2.438835,-2.438835,-2.438835,0.0,0.0,0.0,234.782865
4,2023-08-24 18:30:00,237.02448,221.082156,251.083266,237.02448,237.02448,-1.414675,-1.414675,-1.414675,-1.414675,-1.414675,-1.414675,0.0,0.0,0.0,235.609805


In [33]:
test_data.tail()

Unnamed: 0,ds,y
404,2025-04-20 18:30:00,246.380005
405,2025-04-21 18:30:00,250.559998
406,2025-04-22 18:30:00,258.140015
407,2025-04-23 18:30:00,258.779999
408,2025-04-24 18:30:00,253.050003


In [34]:
from prophet.plot import plot_plotly, plot_components_plotly

In [35]:
plot_plotly(model_prophet,forecast,)

In [36]:
data_prophet.shape

(409, 2)

In [37]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

mae = mean_absolute_error(data_prophet['y'], forecast['yhat'])
rmse = np.sqrt(mean_squared_error(data_prophet['y'], forecast['yhat']))
r2 = r2_score(data_prophet['y'], forecast['yhat'])
print(f"R^2: {r2:.2f}")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")


R^2: 0.54
MAE: 22.52
RMSE: 37.37


In [38]:
import plotly.graph_objs as go

# Create the base figure
fig = go.Figure()

# Historical data
fig.add_trace(go.Scatter(
    x=data_prophet['ds'],
    y=data_prophet['y'],
    name='Actual',
    mode='lines',
    line=dict(color='black')
))

# Forecast line
fig.add_trace(go.Scatter(
    x=data_prophet['ds'],
    y=forecast['yhat'],
    name='Forecast',
    line=dict(color='royalblue')
))



# Data For Training ML Models

In [27]:
def prepare_data_for_model(symbol):

    data,stock = get_stock_data(symbol)

    data = calculate_mathematical_term(data)

    data.drop(data[[ 'sma_50', 'sma_200', 'Dividends', 'Stock Splits','Open', 'High', 'Low', 'Signal_Line', 'Volume']], axis=1, inplace=True)

    data.reset_index( inplace=True)
    
    data['Close_prev'] = data['Close'].shift(1)
    
    data.dropna(axis=0, inplace=True)
   
    data['dayofweek'] = data['Date'].dt.dayofweek          # 0=Monday
    data['day'] = data['Date'].dt.day
    data['month'] = data['Date'].dt.month
    data['year'] = data['Date'].dt.year
    
    data.set_index('Date', inplace=True)
    

    return data

## Machine Learning(XGBoost, LR)

In [40]:
# df_xgboost = get_stock_data(symbols[4])
# df_xgboost = calculate_mathematical_term(df_xgboost)
df_xgboost = prepare_data_for_model(symbols[4])
df_xgboost.isnull().sum()

Close         0
sma_10        0
MACD          0
MACD_Diff     0
RSI           0
Close_prev    0
dayofweek     0
day           0
month         0
year          0
dtype: int64

In [41]:
df_xgboost.columns

Index(['Close', 'sma_10', 'MACD', 'MACD_Diff', 'RSI', 'Close_prev',
       'dayofweek', 'day', 'month', 'year'],
      dtype='object')

In [42]:
df_xgboost.drop(df_xgboost[[ 'sma_50', 'sma_200', 'Dividends', 'Stock Splits','Open', 'High', 'Low', 'Signal_Line', 'Volume']], axis=1, inplace=True)

KeyError: "None of [Index(['sma_50', 'sma_200', 'Dividends', 'Stock Splits', 'Open', 'High', 'Low',\n       'Signal_Line', 'Volume'],\n      dtype='object')] are in the [columns]"

In [None]:
df_xgboost.columns

Index(['Close', 'sma_10', 'MACD', 'MACD_Diff', 'RSI'], dtype='object')

In [None]:
df_xgboost.isnull().sum()

Close         0
sma_10        9
MACD         25
MACD_Diff    33
RSI          13
dtype: int64

In [None]:
df_xgboost.dropna(axis=0, how='any', inplace=True)
df_xgboost.isnull().sum()

Close        0
sma_10       0
MACD         0
MACD_Diff    0
RSI          0
dtype: int64

In [None]:
df_xgboost.shape

(376, 5)

In [None]:
df_xgboost.head()

Unnamed: 0_level_0,Close,sma_10,MACD,MACD_Diff,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-10-09 00:00:00+05:30,219.649994,226.994998,-4.598435,-0.639077,33.890669
2023-10-10 00:00:00+05:30,220.149994,226.034998,-4.865632,-0.725019,34.702057
2023-10-11 00:00:00+05:30,225.100006,225.504999,-4.624653,-0.387232,42.257846
2023-10-12 00:00:00+05:30,225.149994,225.284998,-4.379161,-0.113392,42.330418
2023-10-13 00:00:00+05:30,224.550003,225.234998,-4.184782,0.064789,41.653719


In [None]:
df_xgboost.reset_index( inplace=True)

In [None]:
df_xgboost

Unnamed: 0,Date,Close,sma_10,MACD,MACD_Diff,RSI,Close_prev
0,2023-10-10 00:00:00+05:30,220.149994,226.034998,-4.865632,-0.725019,34.702057,219.649994
1,2023-10-11 00:00:00+05:30,225.100006,225.504999,-4.624653,-0.387232,42.257846,220.149994
2,2023-10-12 00:00:00+05:30,225.149994,225.284998,-4.379161,-0.113392,42.330418,225.100006
3,2023-10-13 00:00:00+05:30,224.550003,225.234998,-4.184782,0.064789,41.653719,225.149994
4,2023-10-16 00:00:00+05:30,224.800003,224.594998,-3.964858,0.227771,42.069277,224.550003
...,...,...,...,...,...,...,...
370,2025-04-21 00:00:00+05:30,246.380005,231.570003,3.386917,2.849986,62.465563,246.470001
371,2025-04-22 00:00:00+05:30,250.559998,233.585002,4.434417,3.117989,64.834279,246.380005
372,2025-04-23 00:00:00+05:30,258.140015,237.167003,5.809246,3.594254,68.692688,250.559998
373,2025-04-24 00:00:00+05:30,258.779999,241.693002,6.871242,3.725001,69.001936,258.140015


In [None]:
df_xgboost['Close_prev'] = df_xgboost['Close'].shift(1)

In [None]:
df_xgboost

Unnamed: 0,Date,Close,sma_10,MACD,RSI,Close_prev
0,2023-09-26 00:00:00+05:30,230.399994,234.484998,-3.288582,40.406910,
1,2023-09-27 00:00:00+05:30,227.350006,232.794998,-3.626918,37.550776,230.399994
2,2023-09-28 00:00:00+05:30,225.050003,231.279999,-4.034140,35.512250,227.350006
3,2023-09-29 00:00:00+05:30,231.199997,230.144998,-3.816618,44.230424,225.050003
4,2023-10-03 00:00:00+05:30,231.399994,229.344998,-3.586746,44.493226,231.199997
...,...,...,...,...,...,...
379,2025-04-21 00:00:00+05:30,246.380005,231.570003,3.386917,62.465563,246.470001
380,2025-04-22 00:00:00+05:30,250.559998,233.585002,4.434417,64.834279,246.380005
381,2025-04-23 00:00:00+05:30,258.140015,237.167003,5.809246,68.692688,250.559998
382,2025-04-24 00:00:00+05:30,258.779999,241.693002,6.871242,69.001936,258.140015


In [None]:
df_xgboost.dropna(axis=0, how='any', inplace=True)
df_xgboost.isnull().sum()

Close         0
sma_10        0
MACD          0
MACD_Diff     0
RSI           0
Close_prev    0
dtype: int64

In [None]:
df_xgboost.shape

(383, 6)

In [None]:
df_xgboost['dayofweek'] = df_xgboost['Date'].dt.dayofweek          # 0=Monday
df_xgboost['day'] = df_xgboost['Date'].dt.day
df_xgboost['month'] = df_xgboost['Date'].dt.month
df_xgboost['year'] = df_xgboost['Date'].dt.year

In [None]:
df_xgboost.set_index('Date', inplace=True)

In [None]:
df_xgboost

Unnamed: 0_level_0,Close,sma_10,MACD,MACD_Diff,RSI,Close_prev,dayofweek,day,month,year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-10-10 00:00:00+05:30,220.149994,226.034998,-4.865632,-0.725019,34.702057,219.649994,1,10,10,2023
2023-10-11 00:00:00+05:30,225.100006,225.504999,-4.624653,-0.387232,42.257846,220.149994,2,11,10,2023
2023-10-12 00:00:00+05:30,225.149994,225.284998,-4.379161,-0.113392,42.330418,225.100006,3,12,10,2023
2023-10-13 00:00:00+05:30,224.550003,225.234998,-4.184782,0.064789,41.653719,225.149994,4,13,10,2023
2023-10-16 00:00:00+05:30,224.800003,224.594998,-3.964858,0.227771,42.069277,224.550003,0,16,10,2023
...,...,...,...,...,...,...,...,...,...,...
2025-04-21 00:00:00+05:30,246.380005,231.570003,3.386917,2.849986,62.465563,246.470001,0,21,4,2025
2025-04-22 00:00:00+05:30,250.559998,233.585002,4.434417,3.117989,64.834279,246.380005,1,22,4,2025
2025-04-23 00:00:00+05:30,258.140015,237.167003,5.809246,3.594254,68.692688,250.559998,2,23,4,2025
2025-04-24 00:00:00+05:30,258.779999,241.693002,6.871242,3.725001,69.001936,258.140015,3,24,4,2025


In [None]:
train_data, test_data = time_train_test_split(df_xgboost, test_size=0.2)

In [None]:
train_data.shape, test_data.shape

((300, 10), (76, 10))

In [None]:
X_train = train_data.drop(['Close'], axis=1)
X_test = test_data.drop(['Close'], axis=1)
y_train = train_data['Close']
y_test = test_data['Close']
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((300, 9), (76, 9), (300,), (76,))

In [None]:

from xgboost import XGBRegressor

xgb_model = XGBRegressor(
    n_estimators=100,
    learning_rate=0.1,
    verbose=1  # Will print warning/info messages during training
)

In [None]:
xgb_model.fit(X_train, y_train)

In [None]:
y_pred = xgb_model.predict(X_test)

In [None]:
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)   
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"R^2: {r2:.2f}")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")

R^2: 0.82
MAE: 7.53
RMSE: 9.91


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x= X_train.index, y=y_train, mode='lines', name='Train Data'))
fig.add_trace(go.Scatter(x= X_test.index, y=y_test, mode='lines', name='Test Data'))
fig.add_trace(go.Scatter(x= X_test.index, y=y_pred, mode='lines', name='Predictions'))

fig.update_layout(
    title="XGBoost Predictions vs Actual",
    xaxis_title="Date",
    yaxis_title="Close Price",
    xaxis_rangeslider_visible=True,
    xaxis_rangeslider_thickness=0.05,
    xaxis_rangeslider_bgcolor='rgba(0, 0, 0, 0.1)',
    xaxis_rangeslider_bordercolor='rgba(0, 0, 0, 0.1)',   
)

fig.show()

In [None]:
from sklearn.model_selection import RandomizedSearchCV

xgb = XGBRegressor(eval_metric = "rmse", random_state=42)

param_grid = {
    'n_estimators': [100, 200, 300],              # Number of trees
    'learning_rate': [0.01, 0.05, 0.1],           # Learning rate
    'max_depth': [3, 5, 7, 10],                    # Maximum depth of each tree
    'min_child_weight': [1, 3, 5],                  # Minimum sum of instance weight (leaf node)
    'subsample': [0.7, 0.8, 0.9],                  # Fraction of samples used for training
    'colsample_bytree': [0.7, 0.8, 0.9],          # Fraction of features used for each tree
    'gamma': [0, 0.1, 0.2],                       # Regularization parameter
    'reg_alpha': [0, 0.1, 0.2],                   # L1 regularization
    'reg_lambda': [0, 0.1, 0.2]                   # L2 regularization
}

grid_xg_model = RandomizedSearchCV(
    estimator=xgb,
    param_distributions=param_grid,
    scoring='neg_root_mean_squared_error',
    verbose=1,
    n_jobs=None,
    cv=3  # Use all available cores
)

In [None]:
grid_xg_model.fit(X_train, y_train)


Fitting 3 folds for each of 10 candidates, totalling 30 fits


In [None]:
print("Best_Parmas: ", grid_xg_model.best_params_)
print("Best_Score: ", grid_xg_model.best_score_)
print("Best Estimator", grid_xg_model.best_estimator_)

Best_Parmas:  {'subsample': 0.7, 'reg_lambda': 0.1, 'reg_alpha': 0, 'n_estimators': 300, 'min_child_weight': 1, 'max_depth': 3, 'learning_rate': 0.1, 'gamma': 0.1, 'colsample_bytree': 0.7}
Best_Score:  -31.502945316608873
Best Estimator XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=0.7, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric='rmse', feature_types=None,
             feature_weights=None, gamma=0.1, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.1, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=3,
             max_leaves=None, min_child_weight=1, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=300,
             n_jobs=None, num_parallel_tree=None, ...)


In [None]:
xgb_best = grid_xg_model.best_estimator_
ypred_xgb= xgb_best.predict(X_test)
grid_xg_model.score(X_test, y_test)

-6.9921539470066

In [None]:
r2 = r2_score(y_test, ypred_xgb)
mae = mean_absolute_error(y_test, ypred_xgb)   
rmse = np.sqrt(mean_squared_error(y_test, ypred_xgb))
print(f"R^2: {r2:.2f}")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")

R^2: 0.91
MAE: 5.16
RMSE: 6.99


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x= X_train.index, y=y_train, mode='lines', name='Train Data'))
fig.add_trace(go.Scatter(x= X_test.index, y=y_test, mode='lines', name='Test Data'))
fig.add_trace(go.Scatter(x= X_test.index, y=ypred_xgb, mode='lines', name='Predictions'))

fig.update_layout(
    title="XGBoost Predictions vs Actual",
    xaxis_title="Date",
    yaxis_title="Close Price",
    xaxis_rangeslider_visible=True,
    xaxis_rangeslider_thickness=0.05,
    xaxis_rangeslider_bgcolor='rgba(0, 0, 0, 0.1)',
    xaxis_rangeslider_bordercolor='rgba(0, 0, 0, 0.1)',   
)

fig.show()

In [None]:
def prepare_x_y(train_data, test_data):
    """
    Prepare X and y for training and testing.
    Parameters:
    train_data (DataFrame): Training data.
    test_data (DataFrame): Testing data.
    Returns:
    DataFrame: X_train.
    DataFrame: X_test.
    Series: y_train.
    Series: y_test.
    """
    X_train = train_data.drop(['Close'], axis=1)
    X_test = test_data.drop(['Close'], axis=1)
    y_train = train_data['Close']
    y_test = test_data['Close']
    
    return X_train, X_test, y_train, y_test

# LR

In [None]:
data_lr = prepare_data_for_model(symbols[4])
train_data_lr, test_data_lr = time_train_test_split(data_lr, test_size=0.2)
X_train_lr, X_test_lr, y_train_lr, y_test_lr = prepare_x_y(train_data_lr, test_data_lr)
X_train_lr.shape, X_test_lr.shape, y_train_lr.shape, y_test_lr.shape



((300, 9), (76, 9), (300,), (76,))

In [None]:
X_train_lr

Unnamed: 0_level_0,sma_10,MACD,MACD_Diff,RSI,Close_prev,dayofweek,day,month,year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-10-09 00:00:00+05:30,226.994998,-4.598435,-0.639077,33.890669,225.100006,0,9,10,2023
2023-10-10 00:00:00+05:30,226.034998,-4.865632,-0.725019,34.702057,219.649994,1,10,10,2023
2023-10-11 00:00:00+05:30,225.504999,-4.624653,-0.387232,42.257846,220.149994,2,11,10,2023
2023-10-12 00:00:00+05:30,225.284998,-4.379161,-0.113392,42.330418,225.100006,3,12,10,2023
2023-10-13 00:00:00+05:30,225.234998,-4.184782,0.064789,41.653719,225.149994,4,13,10,2023
...,...,...,...,...,...,...,...,...,...
2024-12-27 00:00:00+05:30,317.900003,-5.958869,-3.870558,33.385221,305.549988,4,27,12,2024
2024-12-30 00:00:00+05:30,314.550003,-6.329586,-3.393020,34.976493,304.950012,0,30,12,2024
2024-12-31 00:00:00+05:30,310.385004,-7.150179,-3.370891,30.430083,306.250000,1,31,12,2024
2025-01-01 00:00:00+05:30,307.120004,-7.252922,-2.778907,37.132408,298.700012,2,1,1,2025


In [None]:
from sklearn.linear_model import LinearRegression

model_lr = LinearRegression()
model_lr.fit(X_train_lr, y_train_lr)

In [None]:
X_train_lr

Unnamed: 0_level_0,sma_10,MACD,MACD_Diff,RSI,Close_prev,dayofweek,day,month,year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-10-09 00:00:00+05:30,226.994998,-4.598435,-0.639077,33.890669,225.100006,0,9,10,2023
2023-10-10 00:00:00+05:30,226.034998,-4.865632,-0.725019,34.702057,219.649994,1,10,10,2023
2023-10-11 00:00:00+05:30,225.504999,-4.624653,-0.387232,42.257846,220.149994,2,11,10,2023
2023-10-12 00:00:00+05:30,225.284998,-4.379161,-0.113392,42.330418,225.100006,3,12,10,2023
2023-10-13 00:00:00+05:30,225.234998,-4.184782,0.064789,41.653719,225.149994,4,13,10,2023
...,...,...,...,...,...,...,...,...,...
2024-12-27 00:00:00+05:30,317.900003,-5.958869,-3.870558,33.385221,305.549988,4,27,12,2024
2024-12-30 00:00:00+05:30,314.550003,-6.329586,-3.393020,34.976493,304.950012,0,30,12,2024
2024-12-31 00:00:00+05:30,310.385004,-7.150179,-3.370891,30.430083,306.250000,1,31,12,2024
2025-01-01 00:00:00+05:30,307.120004,-7.252922,-2.778907,37.132408,298.700012,2,1,1,2025


In [None]:
y_pred_lr = model_lr.predict(X_test_lr)

In [None]:
print("Intercept: ", model_lr.intercept_)
print("Coefficients: ", model_lr.coef_)

Intercept:  3051.82572077107
Coefficients:  [ 0.7750991  -0.67150648  1.10350827  0.79729929  0.23833102 -0.03926416
  0.01705606 -0.39631812 -1.52775598]


In [None]:
model_lr.score(X_test_lr, y_test_lr)

0.9788559872379928

In [None]:
r2 = r2_score(y_test_lr, y_pred_lr)
mae = mean_absolute_error(y_test_lr, y_pred_lr)   
rmse = np.sqrt(mean_squared_error(y_test_lr, y_pred_lr))
print(f"R^2: {r2:.2f}")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")


R^2: 0.98
MAE: 2.62
RMSE: 3.37


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x= X_train_lr.index, y=y_train_lr, mode='lines', name='Train Data'))
fig.add_trace(go.Scatter(x= X_test_lr.index, y=y_test_lr, mode='lines', name='Test Data'))
fig.add_trace(go.Scatter(x= X_test_lr.index, y=y_pred_lr, mode='lines', name='Predictions'))

fig.update_layout(
    title="Linear Regression Predictions vs Actual",
    xaxis_title="Date",
    yaxis_title="Close Price",
    xaxis_rangeslider_visible=True,
    xaxis_rangeslider_thickness=0.05,
    xaxis_rangeslider_bgcolor='rgba(0, 0, 0, 0.1)',
    xaxis_rangeslider_bordercolor='rgba(0, 0, 0, 0.1)',   
)


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import ElasticNet

# Initialize the model
elastic = ElasticNet()

# Define parameter grid
param_grid = {
    'alpha': [0.01, 0.1, 1, 10],           # regularization strength
    'l1_ratio': [0.1, 0.5, 0.7, 0.9, 1.0], # 0 = Ridge, 1 = Lasso, in between = ElasticNet
    'fit_intercept': [True, False],
    'max_iter': [1000, 5000]               # ensure convergence
}

# GridSearchCV setup
grid_search = GridSearchCV(
    estimator=elastic,
    param_grid=param_grid,
    scoring='neg_root_mean_squared_error',
    cv=5,
    verbose=1,
    n_jobs=-1
)

# Fit on training data
grid_search.fit(X_train_lr, y_train_lr)

# Best parameters and estimator
print("Best Parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_

# Predict and evaluate
y_pred_elastic = best_model.predict(X_test_lr)
rmse = np.sqrt(mean_squared_error(y_test_lr, y_pred_elastic))
print("ElasticNet RMSE:", rmse)

Fitting 5 folds for each of 80 candidates, totalling 400 fits
Best Parameters: {'alpha': 1, 'fit_intercept': True, 'l1_ratio': 0.1, 'max_iter': 1000}
ElasticNet RMSE: 3.4971157469634324


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x= X_train_lr.index, y=y_train_lr, mode='lines', name='Train Data'))
fig.add_trace(go.Scatter(x= X_test_lr.index, y=y_test_lr, mode='lines', name='Test Data'))
fig.add_trace(go.Scatter(x= X_test_lr.index, y=y_pred_elastic, mode='lines', name='Predictions'))

fig.update_layout(
    title="Linear Regression Predictions vs Actual",
    xaxis_title="Date",
    yaxis_title="Close Price",
    xaxis_rangeslider_visible=True,
    xaxis_rangeslider_thickness=0.05,
    xaxis_rangeslider_bgcolor='rgba(0, 0, 0, 0.1)',
    xaxis_rangeslider_bordercolor='rgba(0, 0, 0, 0.1)',   
)

## LSTM

In [43]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler


In [44]:
df_lstm=prepare_data_for_model(symbols[4])


In [45]:
df_lstm.head()

Unnamed: 0_level_0,Close,sma_10,MACD,MACD_Diff,RSI,Close_prev,dayofweek,day,month,year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-10-09 00:00:00+05:30,219.649994,226.994998,-4.598435,-0.639077,33.890669,225.100006,0,9,10,2023
2023-10-10 00:00:00+05:30,220.149994,226.034998,-4.865632,-0.725019,34.702057,219.649994,1,10,10,2023
2023-10-11 00:00:00+05:30,225.100006,225.504999,-4.624653,-0.387232,42.257846,220.149994,2,11,10,2023
2023-10-12 00:00:00+05:30,225.149994,225.284998,-4.379161,-0.113392,42.330418,225.100006,3,12,10,2023
2023-10-13 00:00:00+05:30,224.550003,225.234998,-4.184782,0.064789,41.653719,225.149994,4,13,10,2023


In [47]:
X = df_lstm.drop(['Close'], axis=1)
y = df_lstm['Close']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((300, 9), (76, 9), (300,), (76,))

In [None]:
X_test.index

DatetimeIndex(['2025-01-03 00:00:00+05:30', '2025-01-06 00:00:00+05:30',
               '2025-01-07 00:00:00+05:30', '2025-01-08 00:00:00+05:30',
               '2025-01-09 00:00:00+05:30', '2025-01-10 00:00:00+05:30',
               '2025-01-13 00:00:00+05:30', '2025-01-14 00:00:00+05:30',
               '2025-01-15 00:00:00+05:30', '2025-01-16 00:00:00+05:30',
               '2025-01-17 00:00:00+05:30', '2025-01-20 00:00:00+05:30',
               '2025-01-21 00:00:00+05:30', '2025-01-22 00:00:00+05:30',
               '2025-01-23 00:00:00+05:30', '2025-01-24 00:00:00+05:30',
               '2025-01-27 00:00:00+05:30', '2025-01-28 00:00:00+05:30',
               '2025-01-29 00:00:00+05:30', '2025-01-30 00:00:00+05:30',
               '2025-01-31 00:00:00+05:30', '2025-02-01 00:00:00+05:30',
               '2025-02-03 00:00:00+05:30', '2025-02-04 00:00:00+05:30',
               '2025-02-05 00:00:00+05:30', '2025-02-06 00:00:00+05:30',
               '2025-02-07 00:00:00+05:30', '2025-0

In [48]:
def scaled_features(features,pred,X_train, X_test, y_train, y_test):
    """
    Scale features using MinMaxScaler.
    Parameters:
    features (list): List of feature names to scale.
    data (DataFrame): DataFrame containing the data.
    Returns:
    DataFrame: Scaled DataFrame.
    """
    scaler_x = MinMaxScaler()
    scaler_y = MinMaxScaler()
    X_train_scaled = scaler_x.fit_transform(X_train[features])
    X_test_scaled = scaler_x.fit_transform(X_test[features])
    y_train_scaled = scaler_y.fit_transform(y_train.to_frame())
    y_test_scaled = scaler_y.fit_transform(y_test.to_frame())
    return X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, scaler_x, scaler_y

In [50]:
X_train.columns

Index(['sma_10', 'MACD', 'MACD_Diff', 'RSI', 'Close_prev', 'dayofweek', 'day',
       'month', 'year'],
      dtype='object')

In [51]:
features = ['sma_10', 'MACD', 'MACD_Diff', 'RSI','Close_prev', 'dayofweek', 'day', 'month', 'year']
pred = ['Close']
X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, scaler_x, scaler_y = scaled_features(features, pred, X_train, X_test, y_train, y_test)
X_train_scaled.shape, X_test_scaled.shape, y_train_scaled.shape, y_test_scaled.shape

((300, 9), (76, 9), (300, 1), (76, 1))

In [52]:
X_train_scaled

array([[0.06947438, 0.12977488, 0.46284429, ..., 0.26666667, 0.81818182,
        0.        ],
       [0.06364436, 0.12091359, 0.45539422, ..., 0.3       , 0.81818182,
        0.        ],
       [0.06042571, 0.12890539, 0.48467599, ..., 0.33333333, 0.81818182,
        0.        ],
       ...,
       [0.57589654, 0.04514913, 0.22603144, ..., 1.        , 1.        ,
        0.5       ],
       [0.55606841, 0.04174179, 0.2773488 , ..., 0.        , 0.        ,
        1.        ],
       [0.54398327, 0.04247475, 0.32706054, ..., 0.03333333, 0.        ,
        1.        ]])

In [None]:
X_train_scaled.shape

(300, 5)

In [31]:

def create_seq_for_lstm(X,y, timestep=10):
    X = X
    y = y
    Xs, ys = [], []
    for i in range(timestep, X.shape[0]):
        Xs.append(X[i - timestep:i])  
        ys.append(y[i])

    return np.array(Xs), np.array(ys)
X_train_seq, y_train_seq = create_seq_for_lstm(X_train_scaled, y_train_scaled)
X_test_seq, y_test_seq = create_seq_for_lstm(X_test_scaled, y_test_scaled)

NameError: name 'X_train_scaled' is not defined

In [54]:
X_train_seq.shape, y_train_seq.shape, X_test_seq.shape, y_test_seq.shape

((290, 10, 9), (290, 1), (66, 10, 9), (66, 1))

In [55]:
input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])
def create_lstm_model(input_shape):
    """
    Create and compile an LSTM model.
    Parameters:
    input_shape (tuple): Shape of the input data.
    Returns:
    Model: Compiled LSTM model.
    """
    model = Sequential()

    # 1st Layer: Bidirectional LSTM
    model.add(Bidirectional(LSTM(64, return_sequences=True), input_shape=input_shape))
    model.add(Dropout(0.3))

    # 2nd Layer: LSTM
    model.add(LSTM(32, return_sequences=False))
    model.add(Dropout(0.3))

    # Output Layer
    model.add(Dense(1))  # Predict single value (like next Close Price)

    model.compile(optimizer='adam', loss='mse', metrics=['mae', 'mse'])
    
    return model

In [56]:
model_lstm = create_lstm_model(input_shape)
model_lstm.summary()

In [57]:
def train_lstm_model(model, X, y, epochs=50, batch_size=32):
    callback_es = tf.keras.callbacks.EarlyStopping(monitor='mse', patience=15, restore_best_weights=True, mode='min')
    callback_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='mse', factor=0.2, patience=15, min_lr=0.00001)
    history = model.fit(X, y, epochs=epochs, batch_size=batch_size,callbacks=[callback_es, callback_lr], verbose=1)
    
    return history
model_lstm_history = train_lstm_model(model_lstm, X_train_seq, y_train_seq, epochs=200, batch_size=32 )

Epoch 1/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - loss: 0.2237 - mae: 0.3961 - mse: 0.2237 - learning_rate: 0.0010
Epoch 2/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0414 - mae: 0.1697 - mse: 0.0414 - learning_rate: 0.0010
Epoch 3/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0283 - mae: 0.1331 - mse: 0.0283 - learning_rate: 0.0010
Epoch 4/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0183 - mae: 0.1096 - mse: 0.0183 - learning_rate: 0.0010
Epoch 5/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0197 - mae: 0.1148 - mse: 0.0197 - learning_rate: 0.0010
Epoch 6/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0143 - mae: 0.0927 - mse: 0.0143 - learning_rate: 0.0010
Epoch 7/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/st

In [58]:
y_pred_lstm_scaled = model_lstm.predict(X_test_seq)

y_pred_lstm = scaler_y.inverse_transform(y_pred_lstm_scaled)
print('Inverse Shape', y_pred_lstm.shape)
y_pred_lstm = y_pred_lstm.reshape(-1,)
print("Predicted Shape", y_pred_lstm.shape)

y_lstm = scaler_y.inverse_transform(y_test_seq)
y_lstm = np.squeeze(y_lstm)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 233ms/step
Inverse Shape (66, 1)
Predicted Shape (66,)


In [59]:
mse = mean_squared_error(y_lstm, y_pred_lstm)
rmse = np.sqrt(mse)

print(f"RMSE: {rmse}")

RMSE: 12.583471173258154


In [60]:
print(X_test.index.shape)
print(y_pred_lstm.shape)

(76,)
(66,)


In [63]:
import plotly.graph_objects as go

# 5. Plot
fig = go.Figure()

fig.add_trace(go.Scatter(x=X_test.index[-len(y_pred_lstm):], y=y_test, mode='lines', name='Actual'))
fig.add_trace(go.Scatter(x=X_test.index[-len(y_pred_lstm):], y=y_pred_lstm, mode='lines', name='Predicted'))

fig.update_layout(
    title='LSTM Model: Actual vs Predicted Stock Prices',
    xaxis_title='Date',
    yaxis_title='Stock Price',
    xaxis_rangeslider_visible=True,
    xaxis_rangeslider_thickness=0.05,
)

fig.show()


In [None]:
mse = mean_squared_error(y_lstm, y_pred_lstm)
rmse = np.sqrt(mse)

print(f"RMSE: {rmse}")

RMSE: 13.163073790778077


## FORECAST

### Prophet

In [None]:
def forecast_prophet(days=10):
    ds = []
    for i in range(days):
        future = (datetime.now() + timedelta(days=i)).strftime("%Y-%m-%d")
        ds.append(future)

    future_data = pd.DataFrame(ds, columns = ['ds'])

    prediction = model_prophet.predict(future_data)

    return prediction['yhat'], plot_plotly(model_prophet,prediction)

In [None]:
model_prophet_forecast, model_prophet_forecast_plot = forecast_prophet()

In [None]:
print("Model Prophet Forecast: ",model_prophet_forecast)
model_prophet_forecast_plot

Model Prophet Forecast:  0    315.196040
1    310.317447
2    309.550778
3    307.899459
4    309.134088
5    310.783745
6    317.487826
7    314.405326
8    309.526732
9    308.760063
Name: yhat, dtype: float64


## LSTM Predictions
data = get_stock_data(symbols[4])
data = calculate_mathematical_term(data)
data = prepare_data_for_model(symbols[4])

n_future = 5
predictions = []

# Start from last available sequence
current_sequence = X_test[-1]

for _ in range(n_future):
    pred = model.predict(current_sequence.reshape(1, time_steps, X_train.shape[2]))
    predictions.append(pred[0,0])

    # Update the current sequence by appending the prediction and removing the first element
    new_sequence = np.append(current_sequence[1:], np.expand_dims(pred, 0), axis=0)
    current_sequence = new_sequence

# Inverse transform predictions to get original scale
future_predictions = scaler_y.inverse_transform(np.array(predictions).reshape(-1, 1))

In [65]:
 X_test_seq[-1]

array([[0.11193442, 0.6652989 , 0.50756775, 0.58451033, 0.11827658,
        0.2       , 0.23333333, 1.        , 0.        ],
       [0.10451329, 0.65832144, 0.48569475, 0.52700811, 0.22425604,
        0.4       , 0.26666667, 1.        , 0.        ],
       [0.11276524, 0.68121944, 0.55310266, 0.66826534, 0.18774052,
        0.8       , 0.33333333, 1.        , 0.        ],
       [0.12712474, 0.72559665, 0.66807309, 0.77857998, 0.27316251,
        0.2       , 0.46666667, 1.        , 0.        ],
       [0.1437072 , 0.77163101, 0.76475906, 0.82189078, 0.35342161,
        0.4       , 0.5       , 1.        , 0.        ],
       [0.16172674, 0.81989356, 0.84844039, 0.86897209, 0.3882474 ,
        0.6       , 0.53333333, 1.        , 0.        ],
       [0.18036377, 0.85638316, 0.88192661, 0.8672615 , 0.42757906,
        0.        , 0.66666667, 1.        , 0.        ],
       [0.20298642, 0.89617145, 0.91809052, 0.91536461, 0.42673426,
        0.2       , 0.7       , 1.        , 0.        ],


In [None]:
## LSTM Predictions
data = get_stock_data(symbols[4])
data = calculate_mathematical_term(data)
data = prepare_data_for_model(symbols[4])



n_future = 5
predictions = []
timesteps = 10

# Start from last available sequence
current_sequence = X_test_seq[-1]

for _ in range(n_future):
    pred = model_lstm.predict(current_sequence.reshape(1, timesteps, X_train_seq.shape[2]))
    print(pred)
    predictions.append(pred[0,0])

    # Update the current sequence by appending the prediction and removing the first element
    new_sequence = np.append(current_sequence[1:], np.expand_dims(pred, 0), axis=0)
    current_sequence = new_sequence

# Inverse transform predictions to get original scale
future_predictions = scaler_y.inverse_transform(np.array(predictions).reshape(-1, 1))

print(future_predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 369ms/step
[[0.41951054]]


ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 3 dimension(s)

### XGB, LR, ElasticNet

In [None]:
data = get_stock_data(symbols[4])
data = calculate_mathematical_term(data)
data = prepare_data_for_model(symbols[4])
data.tail()


Unnamed: 0_level_0,Close,sma_10,MACD,MACD_Diff,RSI,Close_prev,dayofweek,day,month,year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2025-04-21 00:00:00+05:30,246.380005,231.570003,3.386917,2.849986,62.465563,246.470001,0,21,4,2025
2025-04-22 00:00:00+05:30,250.559998,233.585002,4.434417,3.117989,64.834279,246.380005,1,22,4,2025
2025-04-23 00:00:00+05:30,258.140015,237.167003,5.809246,3.594254,68.692688,250.559998,2,23,4,2025
2025-04-24 00:00:00+05:30,258.779999,241.693002,6.871242,3.725001,69.001936,258.140015,3,24,4,2025
2025-04-25 00:00:00+05:30,253.050003,244.517003,7.167892,3.21732,63.001512,258.779999,4,25,4,2025


In [None]:
data.isnull().sum()

Date          0
Close         0
sma_10        0
MACD          0
MACD_Diff     0
RSI           0
Close_prev    0
dayofweek     0
day           0
month         0
year          0
dtype: int64

In [None]:
data.columns

Index(['Date', 'Close', 'sma_10', 'MACD', 'MACD_Diff', 'RSI', 'Close_prev',
       'dayofweek', 'day', 'month', 'year'],
      dtype='object')

In [None]:
sma_50 = df['Close'].rolling(window=50).mean()
sma_200 = df['Close'].rolling(window=200).mean()
sma_10 = df['Close'].rolling(window=10).mean()
df['sma_50'] = sma_50
df['sma_200'] = sma_200
df['sma_10'] = sma_10

#MACD
macd = MACD(close=df['Close'])
df['MACD'] = macd.macd()
df['Signal_Line'] = macd.macd_signal()
df['MACD_Diff'] = macd.macd_diff()

#RSI
rsi = RSIIndicator(close=df['Close'], window=14)
df['RSI'] = rsi.rsi()

In [None]:
def calculate_macd(close_prices, slow=26, fast=12):
    exp1 = close_prices.ewm(span=fast, adjust=False).mean()
    exp2 = close_prices.ewm(span=slow, adjust=False).mean()
    macd = exp1 - exp2
    signal = macd.ewm(span=9, adjust=False).mean()
    macd_diff = macd - signal
    return macd.iloc[-1], signal.iloc[-1], macd_diff.iloc[-1]

def calculate_rsi(close_prices, window=14):
    delta = close_prices.diff()
    gain = delta.where(delta > 0, 0).rolling(window=window).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi.iloc[-1]

def calculate_sma(close_prices, window):
    return close_prices.rolling(window=window).mean().iloc[-1]

In [None]:

data['Close'].iloc[-1],

(np.float64(253.0500030517578),)

Unnamed: 0,Date,Close,sma_10,MACD,MACD_Diff,RSI,Close_prev,dayofweek,day,month,year,0
372,2025-04-22 00:00:00+05:30,250.559998,233.585002,4.434417,3.117989,64.834279,246.380005,1.0,22.0,4.0,2025.0,NaT
373,2025-04-23 00:00:00+05:30,258.140015,237.167003,5.809246,3.594254,68.692688,250.559998,2.0,23.0,4.0,2025.0,NaT
374,2025-04-24 00:00:00+05:30,258.779999,241.693002,6.871242,3.725001,69.001936,258.140015,3.0,24.0,4.0,2025.0,NaT
375,2025-04-25 00:00:00+05:30,253.050003,244.517003,7.167892,3.21732,63.001512,258.779999,4.0,25.0,4.0,2025.0,NaT
0,NaT,,,,,,,,,,,2025-04-27 10:59:17.433470


In [None]:
def make_prediction_ml_model(model, n_days = 5,symbol = symbols[4]):
    data = get_stock_data(symbol)
    data = calculate_mathematical_term(data)
    data = prepare_data_for_model(symbol)

    print('Last Close Price: ', data['Close'].iloc[-1])
    print('Last Date: ', data.index[-1])

    predictions = pd.DataFrame(columns=['sma_10', 'MACD', 'MACD_Diff', 'RSI', 'Close_prev', 'dayofweek', 'day', 'month', 'year', 'Close'])

    data_for_predictions = data.copy()

    for i in range(n_days):
        future = pd.to_datetime((datetime.now() + timedelta(days=i)))
        
        pred_dict = {
            'sma_10': calculate_sma(data_for_predictions['Close'], 10),
            'MACD': calculate_macd(data_for_predictions['Close'])[0],
            'MACD_Diff': calculate_macd(data_for_predictions['Close'])[2],
            'RSI': calculate_rsi(data_for_predictions['Close']),  
            'Close_prev': data_for_predictions['Close'].iloc[-1],
            'dayofweek': future.dayofweek, # 0=Monday
            'day' : future.day,
            'month': future.month,
            'year': future.year,     
        }

        df_pred = pd.DataFrame(pred_dict, index=[future])

        y_pred = model.predict(df_pred)

        df_pred['Close'] = y_pred[0]

        data_for_predictions = pd.concat([data_for_predictions, df_pred])

        predictions = pd.concat([predictions, df_pred])

    return predictions



In [None]:
pred_data = make_prediction_ml_model(model_lr, n_days = 5,symbol = symbols[4])

Last Close Price:  253.0500030517578
Last Date:  2025-04-25 00:00:00+05:30


In [None]:
pred_data

Unnamed: 0,sma_10,MACD,MACD_Diff,RSI,Close_prev,dayofweek,day,month,year,Close
2025-04-27 13:54:15.604789,244.517003,7.167892,3.21732,65.215123,253.050003,6,27,4,2025,257.32711
2025-04-28 13:54:15.610772,248.157714,7.659819,2.967397,66.798605,257.32711,0,28,4,2025,262.077416
2025-04-29 13:54:15.614761,251.363455,8.336881,2.915568,75.89135,262.077416,1,29,4,2025,272.409913
2025-04-30 13:54:15.619749,254.747446,9.596581,3.340214,87.600556,272.409913,2,30,4,2025,286.431633
2025-05-01 13:54:15.624735,259.162609,11.592705,4.26907,88.018512,286.431633,3,1,5,2025,292.283252


In [None]:
def plot_forecast(pred_data, model):
    fig = px.line(pred_data, x= pred_data.index,y = ['Close'])

    fig.update_layout(
        title=f'Forecast {model}',
        xaxis_title='Date',
        yaxis_title='Stock Price',
        xaxis_rangeslider_visible=True,
        xaxis_rangeslider_thickness=0.05, 
    )

    fig.show()

plot_forecast(pred_data, "Linear Regression")

In [None]:
pred_data = make_prediction_ml_model(xgb_best, n_days = 5,symbol = symbols[4])

Last Close Price:  253.0500030517578
Last Date:  2025-04-25 00:00:00+05:30


In [None]:
plot_forecast(pred_data, "XG Boost")

In [32]:
def create_seq_for_lstm(X,y, timestep=10):
    X = X
    y = y
    Xs, ys = [], []
    for i in range(timestep, X.shape[0]):
        Xs.append(X[i - timestep:i])  
        ys.append(y[i])


In [33]:
from sklearn.preprocessing import MinMaxScaler

data, stock = get_stock_data(symbols[4])
data = calculate_mathematical_term(data)
data = prepare_data_for_model(symbols[4])
X = data.drop(['Close'], axis=1)
y = data['Close']

scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled= scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform([y])
X_train_seq, y_train_seq = create_seq_for_lstm(X_scaled, y_scaled)







IndexError: index 10 is out of bounds for axis 0 with size 1

In [34]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Fetch and prepare data
data, stock = get_stock_data(symbols[4])
data = calculate_mathematical_term(data)
data = prepare_data_for_model(symbols[4])

# Features and target
X = data.drop(['Close'], axis=1)
y = data['Close']

# Scale features and target
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))

# Create LSTM sequences
def create_seq_for_lstm(X, y, timestep=10):
    Xs, ys = [], []
    for i in range(timestep, len(X)):
        Xs.append(X[i - timestep:i])
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

X_train_seq, y_train_seq = create_seq_for_lstm(X_scaled, y_scaled)

In [None]:
timestep = 10
n_days = 5

last_sequence = X_scaled[-timestep:]  # shape: (timestep, features)
forecasted = []

for _ in range(n_days):
    # Reshape to match model input: (1, timestep, features)
    input_seq = last_sequence.reshape(1, timestep, -1)

    # Predict the next value
    pred_scaled = model.predict(input_seq)
    pred = scaler_y.inverse_transform(pred_scaled)[0][0]  # Inverse scale

    forecasted.append(pred)

    # Create new input by appending the prediction
    new_input = np.append(last_sequence[1:], input_seq[0][-1].copy().reshape(1, -1), axis=0)
    last_sequence = new_input

return forecasted

(367, 10, 9)