In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


#  The code reads a CSV file named "TCS1.csv" into a DataFrame (df) and then displays the first few rows of the DataFrame using the "head()" function, providing an initial view of the data in the file.

In [2]:
df = pd.read_csv("TCS1.csv")
df.head()

Unnamed: 0,Date,Open,High,Low,Adj Close,Volume,Close
0,27-08-2004,122.800003,122.800003,119.82,88.088272,30646000.0,120.332497
1,30-08-2004,121.237503,123.75,120.625,90.293549,24465208.0,123.345001
2,31-08-2004,123.3125,123.75,122.0,90.416122,21194656.0,123.512497
3,01-09-2004,123.75,124.375,122.949997,90.39782,19935544.0,123.487503
4,02-09-2004,123.737503,125.574997,123.25,90.924896,21356352.0,124.207497


# Column Description :

This Dataset Contains the Historical Share Price of Tata Consultancy Services ltd(TCS) From 2004 to 2002.

This Dataset Contains seven columns

Date-- Date includes day and month and year
open -- open value of the TCS stock price on particular day
High -- high price value of TCS on particular day
LOW -- Low Price Value Of ITC on particular day
Close --Stock Price of TCS After Closing The Stock Market
Volume -- Volume of TCS means sum of buy's and shares
Adjclose --Adjusted close is the closing price after adjustments for all applicable splits and dividend distributions

# The given line of code calculates the percentage of missing values in each column of a DataFrame `df`.

In [3]:
df.isnull().sum()/len(df)*100 

Date         0.000000
Open         0.178015
High         0.178015
Low          0.178015
Adj Close    0.178015
Volume       0.178015
Close        0.178015
dtype: float64

# This code removes rows with missing (NaN) values from the DataFrame "df" in place, meaning it modifies the DataFrame directly without creating a new one.

In [4]:
df.dropna(inplace=True)

In [5]:
df.isnull().sum()/len(df)*100 

Date         0.0
Open         0.0
High         0.0
Low          0.0
Adj Close    0.0
Volume       0.0
Close        0.0
dtype: float64

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4486 entries, 0 to 4493
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       4486 non-null   object 
 1   Open       4486 non-null   float64
 2   High       4486 non-null   float64
 3   Low        4486 non-null   float64
 4   Adj Close  4486 non-null   float64
 5   Volume     4486 non-null   float64
 6   Close      4486 non-null   float64
dtypes: float64(6), object(1)
memory usage: 280.4+ KB


# The code separates the features and the target variable from a DataFrame, where "features" include all columns except the last one, and "target" contains the last column.

In [7]:
features=df.iloc[:,:-1]
target=df.iloc[:,-1]

In [8]:
features.head()

Unnamed: 0,Date,Open,High,Low,Adj Close,Volume
0,27-08-2004,122.800003,122.800003,119.82,88.088272,30646000.0
1,30-08-2004,121.237503,123.75,120.625,90.293549,24465208.0
2,31-08-2004,123.3125,123.75,122.0,90.416122,21194656.0
3,01-09-2004,123.75,124.375,122.949997,90.39782,19935544.0
4,02-09-2004,123.737503,125.574997,123.25,90.924896,21356352.0


In [9]:
target.head()

0    120.332497
1    123.345001
2    123.512497
3    123.487503
4    124.207497
Name: Close, dtype: float64

# This line of code calculates and returns a count of the unique values in the "Close" column of the DataFrame `df`.

In [10]:
 df["Close"].value_counts()

Close
1349.599976    3
1256.900024    3
1261.449951    3
1253.025024    3
568.500000     2
              ..
470.450012     1
475.350006     1
496.424988     1
493.024994     1
3161.699951    1
Name: count, Length: 4333, dtype: int64

# Encoding

In [11]:
cat_col = features.select_dtypes(object).columns

In [12]:
cat_col

Index(['Date'], dtype='object')

In [13]:
from sklearn.preprocessing import OrdinalEncoder
oe = OrdinalEncoder()
features[cat_col] = oe.fit_transform(features[cat_col])

In [14]:
features.head()

Unnamed: 0,Date,Open,High,Low,Adj Close,Volume
0,3904.0,122.800003,122.800003,119.82,88.088272,30646000.0
1,4333.0,121.237503,123.75,120.625,90.293549,24465208.0
2,4449.0,123.3125,123.75,122.0,90.416122,21194656.0
3,87.0,123.75,124.375,122.949997,90.39782,19935544.0
4,233.0,123.737503,125.574997,123.25,90.924896,21356352.0


# The code splits the dataset into training and testing sets (`xtrain` and `xtest` for features, and `ytrain` and `ytest` for the target) with an 80-20 ratio and a fixed random seed for reproducibility (random_state = 1).

In [15]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(features,target,test_size = 0.2,random_state = 1)

# 1 - predict data using multiple regression

In [16]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()

lr.fit(xtrain,ytrain)

ypred=lr.predict(xtest)

In [17]:
ytest

1857     618.525024
2296     992.299988
1962     611.525024
2868    1236.699951
4322    3733.750000
           ...     
2969    1300.525024
1522     493.024994
2950    1276.300049
1564     551.825012
2974    1272.074951
Name: Close, Length: 898, dtype: float64

In [18]:
ypred

array([ 611.20764222,  986.59512084,  610.3247345 , 1242.52171206,
       3767.1967617 , 1130.75963746, 3573.72798882, 1280.79089976,
       1324.51554987,  742.66918603, 3652.26504976,  590.81758523,
       3481.19438781, 2343.31616375, 1425.19412731, 1199.43169685,
       1239.5746985 , 1159.64025939, 2084.6004976 ,  210.73102671,
        226.01895659,  373.21341011,  995.64460122,  416.17322646,
       1287.93287306, 1812.22971057, 1281.33839032, 1297.2326196 ,
        130.70350662,  583.97992629, 3744.38674427,  145.61681092,
       1249.02318067,  640.69257893, 3208.61438742,  148.75535586,
        983.04185448,  170.21466291, 2312.17692209, 3636.13558688,
       1078.1726821 , 1175.27070399, 3211.70096155,  207.02686482,
        644.70872122, 1318.9403595 ,  377.86306786, 1174.67541783,
       1045.1720151 ,  600.11628595, 3126.64012704, 3131.06743038,
        144.21052432, 1182.79816991,  176.3403268 , 1918.41899211,
       3267.93581012, 1114.37554004,  975.88069894,  254.28683

In [19]:
lr.coef_

array([ 2.94603047e-05, -5.20368349e-01,  8.14990869e-01,  6.68629684e-01,
        3.53552812e-02, -3.16720856e-07])

In [20]:
lr.intercept_

4.354565878887797

# The provided code calculates and prints the Mean Squared Error (MSE) and the R-squared (R2) score, providing insights into the model's prediction accuracy and goodness of fit.

In [21]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(ytest, ypred)
r2 = r2_score(ytest, ypred)

print("Mean Squared Error:", mse)
print("R-squared (R2) Score:", r2)

Mean Squared Error: 86.55892590215834
R-squared (R2) Score: 0.9999185548342606


In [22]:
df.head()

Unnamed: 0,Date,Open,High,Low,Adj Close,Volume,Close
0,27-08-2004,122.800003,122.800003,119.82,88.088272,30646000.0,120.332497
1,30-08-2004,121.237503,123.75,120.625,90.293549,24465208.0,123.345001
2,31-08-2004,123.3125,123.75,122.0,90.416122,21194656.0,123.512497
3,01-09-2004,123.75,124.375,122.949997,90.39782,19935544.0,123.487503
4,02-09-2004,123.737503,125.574997,123.25,90.924896,21356352.0,124.207497


In [23]:
lr.predict([[233.0,123.737503,125.574997,123.250000,90.924896,21356352.0]])[0]

121.17410765935378

# 2 - Predict data using DecisionTreeRegressor

In [24]:
from sklearn.tree import  DecisionTreeRegressor
from sklearn import tree

In [25]:
dt1=DecisionTreeRegressor()

In [26]:
from sklearn.metrics import r2_score
def mymodel(model):
    model.fit(xtrain,ytrain)
    y_train_pred=model.predict(xtrain)
    y_test_pred=model.predict(xtest)
    print("Train Data")
    print("train r2_score" , r2_score(ytrain,y_train_pred))
    
    print("Test Data")
    print("test r2_score" , r2_score(ytest,y_test_pred))
    


In [27]:
mymodel(dt1)

Train Data
train r2_score 1.0
Test Data
test r2_score 0.9998963234431784


In [28]:
dt1.score(xtrain,ytrain)

1.0

In [29]:
dt1.score(xtest,ytest)

0.9998963234431784

In [30]:
for i in range(1,30):
    dt1=DecisionTreeRegressor(max_depth=i)
    dt1.fit(xtrain,ytrain)
    y_train_pred=dt1.predict(xtrain)
    y_test_pred=dt1.predict(xtest)
    
    r1=r2_score(ytrain,y_train_pred)
    r2=r2_score(ytest,y_test_pred)
    print(f" Max_depth:{i} Accuracy:{r1}")
    print(f" Max_depth:{i} Accuracy:{r2}")

 Max_depth:1 Accuracy:0.7331776543841623
 Max_depth:1 Accuracy:0.7400792710827797
 Max_depth:2 Accuracy:0.9613570507430269
 Max_depth:2 Accuracy:0.9649477846586687
 Max_depth:3 Accuracy:0.9894832739364113
 Max_depth:3 Accuracy:0.9912435441650919
 Max_depth:4 Accuracy:0.996779623558727
 Max_depth:4 Accuracy:0.9971670424887642
 Max_depth:5 Accuracy:0.9991559374991866
 Max_depth:5 Accuracy:0.9991578871858916
 Max_depth:6 Accuracy:0.9997408782907956
 Max_depth:6 Accuracy:0.999659822128328
 Max_depth:7 Accuracy:0.9999013060582126
 Max_depth:7 Accuracy:0.9998268434195346
 Max_depth:8 Accuracy:0.9999498898121859
 Max_depth:8 Accuracy:0.9998680915243607
 Max_depth:9 Accuracy:0.9999726528078753
 Max_depth:9 Accuracy:0.999882624256297
 Max_depth:10 Accuracy:0.9999872857205612
 Max_depth:10 Accuracy:0.9998975915710344
 Max_depth:11 Accuracy:0.9999940770918568
 Max_depth:11 Accuracy:0.9998911190386444
 Max_depth:12 Accuracy:0.9999973343969369
 Max_depth:12 Accuracy:0.9998890475783945
 Max_depth:13

In [31]:
dt2=DecisionTreeRegressor(max_depth=4)
mymodel(dt2)

Train Data
train r2_score 0.996779623558727
Test Data
test r2_score 0.9971670424887642


In [32]:
dt2.score(xtrain,ytrain)

0.996779623558727

In [33]:
dt2.score(xtest,ytest)

0.9971670424887642

# The provided code calculates and prints the Mean Squared Error (MSE) and the R-squared (R2) score, providing insights into the model's prediction accuracy and goodness of fit.

In [34]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(ytest, ypred)
r2 = r2_score(ytest, ypred)

print("Mean Squared Error:", mse)
print("R-squared (R2) Score:", r2)

Mean Squared Error: 86.55892590215834
R-squared (R2) Score: 0.9999185548342606


In [35]:
dt1.predict([[233.0,123.737503,125.574997,123.250000,90.924896,21356352.0]])[0]

124.207497

# 3 - Predict data using suport vector regressor

In [36]:
from sklearn.svm import SVR

In [37]:
svr = SVR(kernel='linear')  # You can choose a different kernel if needed (e.g., 'rbf')
svr.fit(xtrain, ytrain)

In [38]:
ypred = svr.predict(xtest)

# The provided code calculates and prints the Mean Squared Error (MSE) and the R-squared (R2) score, providing insights into the model's prediction accuracy and goodness of fit.

In [39]:
# Evaluate the model
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(ytest, ypred)
r2 = r2_score(ytest, ypred)

print("Mean Squared Error:", mse)
print("R-squared (R2) Score:", r2)


In [40]:
svr.predict([[233.0,123.737503,125.574997,123.250000,90.924896,21356352.0]])[0]

# 4 - Predict data using XGB Regressor

In [41]:
import xgboost as xgb

In [42]:
model = xgb.XGBRegressor(
    n_estimators=100,  # Number of boosting rounds
    max_depth=3,       # Maximum depth of each tree
)

In [43]:
# Fit the model to the training data
model.fit(xtrain, ytrain)

# Make predictions on the test data
ypred = model.predict(xtest)

# Evaluate the model
mse = mean_squared_error(ytest, ypred)
print(f"Mean Squared Error: {mse}")


Mean Squared Error: 201.8991319167036


# The provided code calculates and prints the Mean Squared Error (MSE) and the R-squared (R2) score, providing insights into the model's prediction accuracy and goodness of fit.

In [44]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(ytest, ypred)
r2 = r2_score(ytest, ypred)

print("Mean Squared Error:", mse)
print("R-squared (R2) Score:", r2)

Mean Squared Error: 201.8991319167036
R-squared (R2) Score: 0.9998100287394951


In [45]:
model.predict([[233.0,123.737503,125.574997,123.250000,90.924896,21356352.0]])[0]

125.12077

# 5 - Predict data using Random Forest Regressor

In [46]:
from sklearn.ensemble import RandomForestRegressor


In [47]:
model = RandomForestRegressor()
mymodel = model.fit(xtrain,ytrain)
# training the model


In [48]:
ypred = mymodel.predict(xtest)


# The provided code calculates and prints the Mean Squared Error (MSE) and the R-squared (R2) score, providing insights into the model's prediction accuracy and goodness of fit.

In [49]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(ytest, ypred)
r2 = r2_score(ytest, ypred)

print("Mean Squared Error:", mse)
print("R-squared (R2) Score:", r2)

Mean Squared Error: 76.97736568546495
R-squared (R2) Score: 0.9999275703315275


In [50]:
model.predict([[233.0,123.737503,125.574997,123.250000,90.924896,21356352.0]])[0]

124.23089803999991