In [51]:
# code for ignore any warning messages
import warnings 
warnings.filterwarnings('ignore')

In [52]:
# import the libraries and requrie modules
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

In [53]:
# import the data from the dataset
INFOSYS= pd.read_csv("NSE_TATA.csv")
INFOSYS = INFOSYS.dropna()

# define columns in the dataset
INFOSYS = INFOSYS[['Date', 'Open', 'High', 'Low', 'Last', 'Close', 'Total Trade Quantity','Turnover (Lacs)']]

# code to print first 5 rows
INFOSYS.head()

Unnamed: 0,Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
0,2022-04-22,818.0,821.6,803.1,811.6,815.6,1880668,15288.05349
1,2022-04-21,812.0,822.0,804.25,818.95,820.05,1344250,10958.1757
2,2022-04-20,800.0,813.45,792.1,810.1,810.45,1522453,12244.14776
3,2022-04-19,825.0,836.85,784.85,788.0,791.35,2460482,20063.01436
4,2022-04-18,816.2,824.9,809.0,822.5,822.3,1480954,12102.6105


In [54]:
# get the summary statistics of the numerical columns in the dataset
INFOSYS.describe()

Unnamed: 0,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
count,1561.0,1561.0,1561.0,1561.0,1561.0,1561.0,1561.0
mean,348.303972,353.31246,343.0623,348.047341,348.034177,3187189.0,11480.285301
std,222.923844,225.242387,219.99598,222.566091,222.600846,3349989.0,14159.640225
min,103.0,104.6,100.0,102.6,102.65,175837.0,388.735415
25%,171.8,173.85,169.0,171.0,171.15,1518494.0,3202.778609
50%,265.95,271.25,262.0,266.2,266.6,2329094.0,7762.714725
75%,511.0,517.85,502.1,508.6,509.1,3662455.0,15107.10829
max,885.0,889.0,877.6,880.2,882.1,63552600.0,234317.6879


In [55]:
#get the  information about pandas dataset
INFOSYS.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1561 entries, 0 to 1560
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Date                  1561 non-null   object 
 1   Open                  1561 non-null   float64
 2   High                  1561 non-null   float64
 3   Low                   1561 non-null   float64
 4   Last                  1561 non-null   float64
 5   Close                 1561 non-null   float64
 6   Total Trade Quantity  1561 non-null   int64  
 7   Turnover (Lacs)       1561 non-null   float64
dtypes: float64(6), int64(1), object(1)
memory usage: 97.7+ KB


In [56]:
#import and set up Plotly, a data visualization library, for use in a Jupyter notebook environment.
import plotly.graph_objs as go
from plotly.offline import plot

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)


In [57]:
# code to define a layout and create a line chart with th edates on the x-axis and stock prices of infosys on the y-axis 
layout = go.Layout(
    title='STOCK PRICE OF INFOSYS',
        xaxis=dict(
            title='Date',
            titlefont=dict(
                family='Arial',
                size=20,
                color='green'
            )
        ),
        yaxis=dict(
            title='Price',
            titlefont=dict(
                family='Arial',
                size=20,
                color='blue'
        )
    )
)
INFOSYS_DATA = [{'x':INFOSYS['Date'], 'y':INFOSYS['Close']}]
plot = go.Figure(data=INFOSYS_DATA, layout=layout)

In [58]:
# code to display chart of stock price
iplot(plot)

In [59]:
# code to calculate the differences between the opening and closing prices 
INFOSYS['Open-Close']= INFOSYS.Close - INFOSYS.Open

#code to calculate the differences between the high and low prices
INFOSYS['High-Low']  = INFOSYS.High - INFOSYS.Low
INFOSYS = INFOSYS.dropna()

# define the input features
X= INFOSYS[['Open-Close', 'High-Low']]
X.head()

Unnamed: 0,Open-Close,High-Low
0,-2.4,18.5
1,8.05,17.75
2,10.45,21.35
3,-33.65,52.0
4,6.1,15.9


In [60]:
#code creates a binary target variable 'B' where it assigns 1 if the next day's closing price is higher than the current day's closing price and -1 if it is lower.
Y= np.where(INFOSYS['Close'].shift(-1)>INFOSYS['Close'],1,-1)

In [61]:
# splitting the data into training and testing sets based on a given percentage 0.8
split_percentage = 0.8
split = int(split_percentage*len(INFOSYS))

X_train = X[:split]
Y_train = Y[:split]

X_test = X[split:]
Y_test = Y[split:]

In [62]:
#Importing necessary modules for decision tree 
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
%pylab inline
%matplotlib inline

model = DecisionTreeClassifier()
model.fit(X_train, Y_train)
print(model)

accuracy_train = accuracy_score(Y_train, model.predict(X_train))
accuracy_test = accuracy_score(Y_test, model.predict(X_test))

print ('Train_data Accuracy: %.2f' %accuracy_train)
print ('Test_data Accuracy: %.2f' %accuracy_test)

Populating the interactive namespace from numpy and matplotlib
DecisionTreeClassifier()
Train_data Accuracy: 1.00
Test_data Accuracy: 0.79


In [63]:
model.score(X_train, Y_train)

1.0

In [64]:
# calculate class probabilities
probability = model.predict_proba(X_test)
print(probability)

[[0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 

In [65]:
# predict class labels
predicted = model.predict(X_test)

In [66]:
#Importing module
from sklearn import metrics

In [67]:
# confusion matrix
print(metrics.confusion_matrix(Y_test, predicted))

[[132  32]
 [ 34 115]]


In [68]:
# generate the classification report
print(metrics.classification_report(Y_test, predicted))

              precision    recall  f1-score   support

          -1       0.80      0.80      0.80       164
           1       0.78      0.77      0.78       149

    accuracy                           0.79       313
   macro avg       0.79      0.79      0.79       313
weighted avg       0.79      0.79      0.79       313



In [69]:
#code for print score
print(model.score(X_train,Y_train))

1.0


In [70]:
#Importing modules
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

In [71]:
pipe_line = Pipeline([('clf', DecisionTreeClassifier())])

In [72]:
pipe_line.fit(X_train, Y_train)

Pipeline(steps=[('clf', DecisionTreeClassifier())])

In [73]:
pipe_line.score(X_train, Y_train)

1.0

In [74]:
#Importing module
from sklearn.model_selection import cross_val_score

In [75]:
score = cross_val_score(estimator=pipe_line, X=X, y=Y, cv=10)

In [76]:
#code for print cv accuracy score and cv accuracy
print('cv accuracy score : %s' % score)
print('cv accuracy : %.3f +/- %.3f' % (np.mean(score), np.std(score)))

cv accuracy score : [0.84076433 0.83333333 0.83974359 0.81410256 0.80128205 0.84615385
 0.81410256 0.78846154 0.78846154 0.72435897]
cv accuracy : 0.809 +/- 0.035
