In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split,RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Dropout

In [17]:
df = pd.read_csv("/content/Suzlon_Stock_final_data.csv")
df = df.drop(columns=["Unnamed: 0"])
# df = df[4:]
df["Next_close"] = df["Close"].shift(-1)
df = df.dropna()
df.info()
features = ["Open","High","Low","Volume","Sentiment_Score","MA5","MA10","MA15","RSI","VPT","Previous_close","Volatility","Previous_RSI"]
X = df[features]
Y = df["Next_close"]

<class 'pandas.core.frame.DataFrame'>
Index: 1973 entries, 1 to 1973
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Open             1973 non-null   float64
 1   High             1973 non-null   float64
 2   Low              1973 non-null   float64
 3   Close            1973 non-null   float64
 4   Volume           1973 non-null   float64
 5   Date             1973 non-null   object 
 6   Sentiment_Score  1973 non-null   float64
 7   MA5              1973 non-null   float64
 8   MA10             1973 non-null   float64
 9   MA15             1973 non-null   float64
 10  RSI              1973 non-null   float64
 11  VPT              1973 non-null   float64
 12  Previous_close   1973 non-null   float64
 13  Volatility       1973 non-null   float64
 14  Previous_RSI     1973 non-null   float64
 15  Next_close       1973 non-null   float64
dtypes: float64(15), object(1)
memory usage: 262.0+ KB


In [18]:
from sklearn.feature_selection import mutual_info_regression
mi_scores = mutual_info_regression(X,Y)
mi_scores = pd.Series(data=mi_scores,name="MI Scores",index=X.columns)
mi_scores = mi_scores.sort_values(ascending=False)
print(mi_scores)
print("this shows what are most depended variables")

Low                2.939679
High               2.919526
Previous_close     2.747818
Open               2.746892
MA5                2.692718
MA10               2.530662
MA15               2.432804
VPT                2.319539
RSI                1.078817
Previous_RSI       1.058368
Volatility         0.619499
Volume             0.241838
Sentiment_Score    0.066070
Name: MI Scores, dtype: float64
this shows what are most depended variables


In [37]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,train_size=0.8,test_size=0.2)
# from sklearn.model_selection import TimeSeriesSplit
# tss = TimeSeriesSplit(n_splits=3)

# for train_index,test_index in tss.split(X):
#   X_train,X_test = X.iloc[train_index,:],X.iloc[test_index,:]
#   Y_train,Y_test = Y.iloc[train_index],Y.iloc[test_index]

In [20]:
def Random_Forest_Reg(X_train,X_test,Y_train,Y_test):
  model = RandomForestRegressor()
  Hyper_param = {
      "n_estimators" : [50,100,150,200,250],
      "max_depth" : [5,10,15,20,None],
      "min_samples_split" : [2,5,10],
      "min_samples_leaf" : [1,2,4],
      "max_features" : ["sqrt","log2"]
  }
  Search = RandomizedSearchCV(
      estimator=model,
      param_distributions=Hyper_param,
      n_iter = 50,
      scoring = "neg_mean_squared_error",
      n_jobs = -1,
      cv = 3,
      verbose=0
  )
  Search.fit(X_train,Y_train)
  print(Search.best_params_)
  return Search

def XGB_Boost_Reg(X_train,X_test,Y_train,Y_test):
  model = XGBRegressor()
  Hyper_param = {
    'n_estimators': np.arange(100, 200),
    'learning_rate': np.linspace(0.01, 0.3, 10),
    'max_depth': np.arange(3, 15, 2),
    'min_child_weight': np.arange(1, 10, 2),
    'subsample': np.linspace(0.5, 1.0, 5),
    'colsample_bytree': np.linspace(0.5, 1.0, 5),
    'gamma': np.linspace(0, 5, 5),
    'lambda': np.linspace(0, 10, 5),
    'alpha': np.linspace(0, 10, 5),
  }
  Search = RandomizedSearchCV(
      estimator = model,
      param_distributions=Hyper_param,
      scoring = "neg_mean_squared_error",
      n_jobs = -1,
      n_iter=50,
      cv = 3,
      verbose=0
  )
  Search.fit(X_train,Y_train)
  print(Search.best_params_)
  return Search

In [32]:
Model = Random_Forest_Reg(X_train,X_test,Y_train,Y_test)

{'n_estimators': 50, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 20}


In [38]:
Model_XGB =XGB_Boost_Reg(X_train,X_test,Y_train,Y_test)

{'subsample': 1.0, 'n_estimators': 164, 'min_child_weight': 1, 'max_depth': 9, 'learning_rate': 0.042222222222222223, 'lambda': 0.0, 'gamma': 3.75, 'colsample_bytree': 0.625, 'alpha': 0.0}


In [34]:
Preds = Model.predict(X_test)
print("error in Data using RFR : ",mean_absolute_error(Y_test,Preds))

error in Data using RFR :  30.369301611236637


In [39]:
Preds_XGB = Model_XGB.predict(X_test)
print("error in data using XGBR : ",mean_absolute_error(Y_test,Preds_XGB))

error in data using XGBR :  0.5110029747878448


In [40]:
Next_day = df[df["Date"] == "2025-03-07"]
Next_day = Next_day.drop(columns=["Date","Close","Next_close"])
Next_day_preds = Model.predict(Next_day)
print("next days prices Using RFR: ",Next_day_preds)
Next_day_preds = Model_XGB.predict(Next_day)
print("next days prices Using XGBR : ",Next_day_preds)

next days prices Using RFR:  [20.03977642]
next days prices Using XGBR :  [53.46684]


**Above we can see which are very similar to the Next days price so now to test it on unseen Data**

In [42]:
stock_data = {
    "Date": ["2025-03-11", "2025-03-12", "2025-03-13"],
    "Open": [51.79, 54.18, 54.31],
    "High": [54.12, 54.44, 54.96],
    "Low": [51.75, 52.80, 53.60],
    "Close": [53.75, 53.84, 54.57],
    "Volume": [4761655, 3802049, 2702431],
    "Sentiment_Score": [0.174625, 0.0000, 0.0000],
    "MA5": [53.046, 53.376, 53.724],
    "MA10": [52.944, 52.999, 53.124],
    "MA15": [52.669, 52.767, 52.897],
    "RSI": [-125.6429, -120.2857, -117.2143],
    "VPT": [3.777915e+08, 3.778724e+08, 3.779224e+08],
    "Previous_close": [53.23, 53.75, 53.84],
    "Next_Close" : [53.84,54.57,999],
    "Volatility": [2.37, 1.64, 1.99],
    "Previous_RSI": [-131.4286, -125.6429, -120.2857]
}
test_data = pd.DataFrame(stock_data)

In [43]:
Ready_test = test_data.drop(columns=["Date","Close","Next_Close"])

In [None]:
Preds_next =  Model.predict(Ready_test)
test_data["predicted_price"] = Preds_next
print(test_data[["Date","Next_Close","predicted_price"]])

         Date  Next_Close  predicted_price
0  2025-03-11       53.84        39.706938
1  2025-03-12       54.57        39.706938
2  2025-03-13      999.00        39.706938


In [45]:
Preds_next =  Model_XGB.predict(Ready_test)
test_data["predicted_price"] = Preds_next
print(test_data[["Date","Next_Close","predicted_price"]])

         Date  Next_Close  predicted_price
0  2025-03-11       53.84        49.268181
1  2025-03-12       54.57        50.295689
2  2025-03-13      999.00        50.295689


In [16]:
df.head(10)

Unnamed: 0,Open,High,Low,Close,Volume,Date,Sentiment_Score,MA5,MA10,MA15,RSI,VPT,Previous_close,Volatility,Previous_RSI,Next_close
1,19.1,19.25,18.85,18.95,8062251.0,2017-03-24,0.0,18.62,18.93,19.146667,-2.5,-21216.45,19.0,0.4,0.0,18.4
2,19.0,19.0,18.35,18.4,7102503.0,2017-03-27,0.9449,18.62,18.93,19.146667,-20.0,-227357.698021,18.95,0.65,-2.5,18.5
3,18.5,18.7,18.4,18.5,10393766.0,2017-03-28,2.3274,18.62,18.93,19.146667,-17.5,-170869.839325,18.4,0.3,-20.0,18.25
4,18.6,18.6,18.15,18.25,8017044.0,2017-03-29,-0.8196,18.62,18.93,19.146667,-19.0,-279208.271758,18.5,0.45,-17.5,19.45
5,18.35,19.55,18.35,19.45,13393126.0,2017-03-30,1.591,18.71,18.93,19.146667,-35.833333,601435.629612,18.25,1.2,-19.0,19.1
6,19.75,19.85,19.0,19.1,20732080.0,2017-03-31,0.4209,18.74,18.93,19.146667,-35.714286,228364.781283,19.45,0.85,-35.833333,19.15
7,19.15,19.4,19.0,19.15,6703036.0,2017-04-03,1.5263,18.89,18.93,19.146667,-31.875,245911.995943,19.1,0.4,-35.714286,19.3
8,19.35,19.4,19.2,19.3,6151185.0,2017-04-05,4.3664,19.05,18.93,19.146667,-30.0,294093.601687,19.15,0.2,-31.875,19.2
9,19.25,19.4,19.05,19.2,6366841.0,2017-04-06,1.0638,19.24,18.93,19.146667,-28.0,261104.788215,19.3,0.35,-30.0,18.9
10,19.2,19.35,18.8,18.9,6849180.0,2017-04-07,0.2732,19.13,18.92,19.146667,-28.181818,154086.350715,19.2,0.55,-28.0,19.35


#Using LSTM