In [2]:
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import pandas as pd
from datetime import datetime
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


In [27]:
data = pd.read_csv("stock_price.csv")

In [28]:
data.head()

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2022-03-02,442.41,448.98,439.39,447.69,25791134
1,2022-03-03,432.03,453.18,429.95,453.18,29599672
2,2022-03-04,412.41,426.84,409.94,426.84,38765185
3,2022-03-07,389.43,397.09,383.0,397.09,44836205
4,2022-03-08,387.2,390.86,371.98,385.97,55543383


In [29]:
data.tail()

Unnamed: 0,Date,Close,High,Low,Open,Volume
804,2025-06-05,710.15,714.45,704.0,712.8,10203331
805,2025-06-06,711.0,715.9,701.2,710.0,11975812
806,2025-06-09,717.8,726.0,713.1,715.0,9900412
807,2025-06-10,732.25,734.75,715.35,721.0,19151889
808,2025-06-11,736.4,744.0,730.15,734.95,12626754


In [34]:
data.describe()

Unnamed: 0,Date,Close,High,Low,Open,Volume
count,809,809.0,809.0,809.0,809.0,809.0
mean,2023-10-20 11:21:43.831891200,652.377491,660.80754,644.933857,653.855946,13920310.0
min,2022-03-02 00:00:00,367.98,380.19,361.95,375.84,0.0
25%,2022-12-23 00:00:00,434.99,439.89,429.95,436.03,8957930.0
50%,2023-10-18 00:00:00,625.17,634.74,620.86,628.62,11871770.0
75%,2024-08-19 00:00:00,801.56,811.57,790.65,804.09,15762690.0
max,2025-06-11 00:00:00,1151.95,1168.95,1135.54,1157.05,71425750.0
std,,216.189463,219.073222,213.605807,216.906128,8619822.0


In [33]:
data["Date"] = pd.to_datetime(data["Date"])

In [36]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 809 entries, 0 to 808
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    809 non-null    datetime64[ns]
 1   Close   809 non-null    float64       
 2   High    809 non-null    float64       
 3   Low     809 non-null    float64       
 4   Open    809 non-null    float64       
 5   Volume  809 non-null    int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 38.1 KB


In [7]:
H_df = pd.read_csv("head.csv",encoding="latin 1")

In [8]:
H_df.head()

Unnamed: 0,Date,Headline
0,01-01-2022,Tata Motors overtakes Hyundai to grab the numb...
1,01-01-2022,Honda Cars reports 26 pc jump in wholesales fo...
2,01-01-2022,Tata Motors reports 50 pc jump in PV sales to ...
3,02-01-2022,PM Modi can't claim to be 'fakir' after adding...
4,02-01-2022,Going electric: Is the EV two-wheeler buzz rea...


In [9]:
H_df.tail()

Unnamed: 0,Date,Headline
6297,11-06-2025,"Reduce Tata Motors, target price Rs 733: HDFC..."
6298,11-06-2025,Tata Electronics sends hundreds of staff to Ta...
6299,11-06-2025,Qcomm cos on govt radar; Tata upskills in Taiwan
6300,11-06-2025,New directors set to board Tata Sons with 출칙?3...
6301,11-06-2025,These 2 Tata Group companies to trade ex-divid...


In [10]:
H_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6302 entries, 0 to 6301
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Date      6302 non-null   object
 1   Headline  6302 non-null   object
dtypes: object(2)
memory usage: 98.6+ KB


In [11]:
H_df["Date"] = pd.to_datetime(H_df["Date"],format="%d-%m-%Y")

In [12]:
H_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6302 entries, 0 to 6301
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      6302 non-null   datetime64[ns]
 1   Headline  6302 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 98.6+ KB


In [13]:
H_df.describe()

Unnamed: 0,Date
count,6302
mean,2023-07-25 02:56:24.068549888
min,2022-01-01 00:00:00
25%,2022-10-03 00:00:00
50%,2023-08-03 00:00:00
75%,2024-04-08 00:00:00
max,2025-06-11 00:00:00


In [14]:
print(H_df.columns)

Index(['Date', 'Headline'], dtype='object')


In [15]:
analyzer = SentimentIntensityAnalyzer()
H_df["sentiment"] = H_df["Headline"].apply(lambda x: analyzer.polarity_scores(str(x))["compound"])

In [16]:
daily_sentiment = H_df.groupby("Date")["sentiment"].sum().reset_index()

In [31]:
daily_sentiment.head()

Unnamed: 0,Date,sentiment
0,2022-01-01,0.0772
1,2022-01-02,0.0
2,2022-01-03,-0.3166
3,2022-01-04,-0.4162
4,2022-01-05,1.1716


In [30]:
daily_sentiment.tail()

Unnamed: 0,Date,sentiment
971,2025-06-07,0.4535
972,2025-06-08,0.0
973,2025-06-09,0.28
974,2025-06-10,0.7351
975,2025-06-11,1.574


In [37]:
merged_df = pd.merge(data,daily_sentiment,on = "Date",how = "left")

In [40]:
merged_df.head()

Unnamed: 0,Date,Close,High,Low,Open,Volume,sentiment
0,2022-03-02,442.41,448.98,439.39,447.69,25791134,-0.8141
1,2022-03-03,432.03,453.18,429.95,453.18,29599672,0.8383
2,2022-03-04,412.41,426.84,409.94,426.84,38765185,0.7921
3,2022-03-07,389.43,397.09,383.0,397.09,44836205,0.3699
4,2022-03-08,387.2,390.86,371.98,385.97,55543383,0.6037


In [39]:
merged_df.tail()

Unnamed: 0,Date,Close,High,Low,Open,Volume,sentiment
804,2025-06-05,710.15,714.45,704.0,712.8,10203331,2.0623
805,2025-06-06,711.0,715.9,701.2,710.0,11975812,-0.4484
806,2025-06-09,717.8,726.0,713.1,715.0,9900412,0.28
807,2025-06-10,732.25,734.75,715.35,721.0,19151889,0.7351
808,2025-06-11,736.4,744.0,730.15,734.95,12626754,1.574


In [41]:
merged_df.isnull().sum()

Date           0
Close          0
High           0
Low            0
Open           0
Volume         0
sentiment    169
dtype: int64

In [42]:
merged_df["sentiment"].fillna(0,inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_df["sentiment"].fillna(0,inplace=True)


In [43]:
merged_df.isnull().sum()

Date         0
Close        0
High         0
Low          0
Open         0
Volume       0
sentiment    0
dtype: int64