In [51]:
#Nessesory Libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm

In [52]:
# read dataset

df = pd.read_csv('/content/MRF.NS.csv')

In [53]:
#print the data
print(df.head())

         Date   Open   High    Low       Close   Adj Close  Volume
0  2002-07-01  910.0  910.0  880.0  897.599976  815.841370   426.0
1  2002-07-02  900.0  905.0  895.0  903.650024  821.340393   310.0
2  2002-07-03  910.0  910.0  900.0  906.700012  824.112366   336.0
3  2002-07-04  920.0  925.0  910.0  916.799988  833.292358   591.0
4  2002-07-05  915.0  935.0  910.0  927.299988  842.836182  1623.0


In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4793 entries, 0 to 4792
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       4793 non-null   object 
 1   Open       4776 non-null   float64
 2   High       4776 non-null   float64
 3   Low        4776 non-null   float64
 4   Close      4776 non-null   float64
 5   Adj Close  4776 non-null   float64
 6   Volume     4776 non-null   float64
dtypes: float64(6), object(1)
memory usage: 262.2+ KB


In [55]:
# check null values in columns
df.isnull().sum()

Unnamed: 0,0
Date,0
Open,17
High,17
Low,17
Close,17
Adj Close,17
Volume,17


In [56]:
#fill the null values with mean
col=["Open", "High", "Low", "Close", "Adj Close", "Volume"]

for i in col:
   df[i].fillna(df[i].mean(), inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[i].fillna(df[i].mean(), inplace=True)


In [57]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4793 entries, 0 to 4792
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       4793 non-null   object 
 1   Open       4793 non-null   float64
 2   High       4793 non-null   float64
 3   Low        4793 non-null   float64
 4   Close      4793 non-null   float64
 5   Adj Close  4793 non-null   float64
 6   Volume     4793 non-null   float64
dtypes: float64(6), object(1)
memory usage: 262.2+ KB


In [58]:
df.isnull().sum()

Unnamed: 0,0
Date,0
Open,0
High,0
Low,0
Close,0
Adj Close,0
Volume,0


In [59]:
# defining Independent and Dependent Variables

# X is Independent Variables
Xtrain = df[["Open", "High", "Low", "Close"]]


# y is the dependent variable

ytrain =np.where( df['Adj Close'].shift(1) > df['Adj Close'],0,1)

In [60]:
Xtrain.shape

(4793, 4)

In [61]:
ytrain.shape

(4793,)

In [62]:
print(ytrain)

[1 1 1 ... 1 0 0]


In [63]:
from sklearn.metrics import (confusion_matrix,accuracy_score)
from sklearn.model_selection import train_test_split
# Split data into training and testing sets
Xtrain, Xtest, ytrain, ytest = train_test_split(df[["Open", "High", "Low", "Close"]],
                                                np.where(df['Adj Close'].shift(1) > df['Adj Close'], 0, 1),
                                                test_size=0.2, # Adjust test_size as needed
                                                random_state=42)  # Set random_state for reproducibility

log_reg = sm.Logit(ytrain, Xtrain).fit()

#printing the summary table
print(log_reg.summary())



Optimization terminated successfully.
         Current function value: 0.453239
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:                      y   No. Observations:                 3834
Model:                          Logit   Df Residuals:                     3830
Method:                           MLE   Df Model:                            3
Date:                Fri, 24 Jan 2025   Pseudo R-squ.:                  0.3459
Time:                        06:50:02   Log-Likelihood:                -1737.7
converged:                       True   LL-Null:                       -2656.5
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Open          -0.0066      0.000    -17.255      0.000      -0.007      -0.006
High          -0.0020      0.

In [64]:
Xtrain.shape

(3834, 4)

In [65]:
Xtest.shape

(959, 4)

In [66]:
ytrain.shape

(3834,)

In [67]:
ytest.shape

(959,)

In [68]:
# Make predictions on the test set
prediction = log_reg.predict(Xtest)



In [69]:
print(prediction)

561     0.386258
893     0.402355
1684    0.495441
705     0.418164
1873    0.464112
          ...   
1859    0.475564
4283    0.999981
650     0.454038
2302    0.160404
708     0.441114
Length: 959, dtype: float64


In [70]:
prediction = (prediction > 0.5).astype(int)  # Convert probabilities to class labels (0 or 1)




In [71]:
print(prediction)

561     0
893     0
1684    0
705     0
1873    0
       ..
1859    0
4283    1
650     0
2302    0
708     0
Length: 959, dtype: int64


In [72]:
cm = confusion_matrix(ytest, prediction)
print("Confusion Matrix: \n",cm)

Confusion Matrix: 
 [[422  72]
 [ 98 367]]


In [73]:
#accuracy score of the model
print("Accuracy Score: ",accuracy_score(ytest, prediction))

Accuracy Score:  0.8227320125130344


In [74]:
# accuracy statistics

from sklearn import metrics
print( 'Accuracy Score:',metrics.accuracy_score(ytest, prediction))

print( 'Precision Score:',metrics.precision_score(ytest, prediction))
print( 'Recall Score:',metrics.recall_score(ytest, prediction))
print( 'F1 Score:',metrics.f1_score(ytest, prediction))

Accuracy Score: 0.8227320125130344
Precision Score: 0.835990888382688
Recall Score: 0.789247311827957
F1 Score: 0.8119469026548672


In [75]:
print( 'Accuracy Score:',metrics.accuracy_score(ytest, prediction))

#create classification report

class_report=metrics.classification_report(ytest, prediction)
print(class_report)

Accuracy Score: 0.8227320125130344
              precision    recall  f1-score   support

           0       0.81      0.85      0.83       494
           1       0.84      0.79      0.81       465

    accuracy                           0.82       959
   macro avg       0.82      0.82      0.82       959
weighted avg       0.82      0.82      0.82       959

