## Importing the essential libraries over here

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

## Importing the dataset over here

In [2]:
data=pd.read_csv("National_Stock_Exchange_of_India_Ltd.csv")

In [3]:
data.head()

Unnamed: 0,Symbol,Open,High,Low,LTP,Chng,% Chng,Volume (lacs),Turnover (crs.),52w H,52w L,365 d % chng,30 d % chng
0,ADANIPORTS,750.0,766.0,713.25,715.0,-47.45,-6.22,72.2,532.63,901.0,384.4,79.22,-4.65
1,ASIANPAINT,3101.0,3167.35,3091.0,3138.0,-6.25,-0.2,10.29,322.53,3505.0,2117.15,45.66,5.66
2,AXISBANK,669.0,674.9,660.45,661.0,-18.9,-2.78,102.53,684.0,866.9,568.4,10.19,-21.49
3,BAJAJ-AUTO,3370.0,3383.5,3320.0,3335.0,-56.7,-1.67,3.42,114.59,4361.4,3041.0,9.3,-12.05
4,BAJAJFINSV,17200.0,17237.2,16610.0,16684.0,-684.85,-3.94,3.42,576.79,19325.0,8273.7,91.38,-9.1


## Taking care of missing values if present over here

In [4]:
data.isnull().sum()

Unnamed: 0,0
Symbol,0
Open,0
High,0
Low,0
LTP,0
Chng,0
% Chng,0
Volume (lacs),0
Turnover (crs.),0
52w H,0


## Taking care of duplicate observations if present over here

In [5]:
data.duplicated().sum()

0

## Filtering all the numerical features over here

In [6]:
numerical_features=[feature for feature in data.columns if data[feature].dtype!="O"]
for feature in numerical_features:
  print(feature)

Chng
% Chng
Volume (lacs)
365 d % chng
30 d % chng


In [7]:
data[numerical_features].head()

Unnamed: 0,Chng,% Chng,Volume (lacs),365 d % chng,30 d % chng
0,-47.45,-6.22,72.2,79.22,-4.65
1,-6.25,-0.2,10.29,45.66,5.66
2,-18.9,-2.78,102.53,10.19,-21.49
3,-56.7,-1.67,3.42,9.3,-12.05
4,-684.85,-3.94,3.42,91.38,-9.1


## Filtering all the categorical features over here

In [8]:
cat_features=[feature for feature in data.columns if data[feature].dtype=="O"]
for feature in cat_features:
  print(feature)

Symbol
Open
High
Low
LTP
Turnover (crs.)
52w H
52w L


In [9]:
data[cat_features].head()

Unnamed: 0,Symbol,Open,High,Low,LTP,Turnover (crs.),52w H,52w L
0,ADANIPORTS,750.0,766.0,713.25,715.0,532.63,901.0,384.4
1,ASIANPAINT,3101.0,3167.35,3091.0,3138.0,322.53,3505.0,2117.15
2,AXISBANK,669.0,674.9,660.45,661.0,684.0,866.9,568.4
3,BAJAJ-AUTO,3370.0,3383.5,3320.0,3335.0,114.59,4361.4,3041.0
4,BAJAJFINSV,17200.0,17237.2,16610.0,16684.0,576.79,19325.0,8273.7


In [10]:
data['Open']=data['Open'].str.replace(",","")
data['High']=data['High'].str.replace(",","")
data['Low']=data['Low'].str.replace(",","")
data['LTP']=data['LTP'].str.replace(",","")
data['Turnover (crs.)']=data['Turnover (crs.)'].str.replace(",","")
data['52w H']=data['52w H'].str.replace(",","")
data['52w L']=data['52w L'].str.replace(",","")

In [11]:
data['Open']=pd.to_numeric(data['Open'])
data['High']=pd.to_numeric(data['High'])
data['Low']=pd.to_numeric(data['Low'])
data['LTP']=pd.to_numeric(data['LTP'])
data['Turnover (crs.)']=pd.to_numeric(data['Turnover (crs.)'])
data['52w H']=pd.to_numeric(data['52w H'])
data['52w L']=pd.to_numeric(data['52w L'])

In [12]:
cat_features=[feature for feature in data.columns if data[feature].dtype=="O"]
for feature in cat_features:
  print(feature)

Symbol


In [13]:
data.head()

Unnamed: 0,Symbol,Open,High,Low,LTP,Chng,% Chng,Volume (lacs),Turnover (crs.),52w H,52w L,365 d % chng,30 d % chng
0,ADANIPORTS,750.0,766.0,713.25,715.0,-47.45,-6.22,72.2,532.63,901.0,384.4,79.22,-4.65
1,ASIANPAINT,3101.0,3167.35,3091.0,3138.0,-6.25,-0.2,10.29,322.53,3505.0,2117.15,45.66,5.66
2,AXISBANK,669.0,674.9,660.45,661.0,-18.9,-2.78,102.53,684.0,866.9,568.4,10.19,-21.49
3,BAJAJ-AUTO,3370.0,3383.5,3320.0,3335.0,-56.7,-1.67,3.42,114.59,4361.4,3041.0,9.3,-12.05
4,BAJAJFINSV,17200.0,17237.2,16610.0,16684.0,-684.85,-3.94,3.42,576.79,19325.0,8273.7,91.38,-9.1


In [14]:
data.shape

(50, 13)

## Encoding the categorical features into numerical features over here

In [15]:
for feature in cat_features:
  feature_mapping={category:index for index,category in enumerate(data[feature].unique())}
  data[feature]=data[feature].map(feature_mapping)

## Creating the features and labels over here

In [16]:
data['OPEN']=data['Open']
data.drop('Open',axis=1,inplace=True)

In [17]:
X=data.iloc[:,:-1].values
y=data.iloc[:,-1].values

In [18]:
X.shape

(50, 12)

## Splitting the dataset into training set and testing set to avoid the problem of overfitting over here

In [19]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

## Training the model on the training dataset over here

In [20]:
from sklearn.ensemble import RandomForestRegressor
regressor=RandomForestRegressor(n_estimators=100,random_state=0)
regressor.fit(X_train,y_train)

## Evaluating the performance of the model on the testing dataset

In [22]:
y_pred=regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[ 1897.1   2002.  ]
 [ 4191.83  4770.  ]
 [  196.44   157.75]
 [  761.34   800.2 ]
 [  695.54   669.  ]
 [  655.19   668.25]
 [  383.2    486.25]
 [ 6341.89  7520.  ]
 [  740.57   739.  ]
 [13557.53 17200.  ]]


## Printing the R2 Score over here

In [23]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.942004797655232