In [57]:
import pandas as pd
import requests
from datetime import datetime, timedelta
import nltk
from bs4 import BeautifulSoup
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [31]:
API_KEY = "QCC4AQUW1ZKX4T7P"
symbol = "TCS.BSE"

In [32]:
url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=${symbol}&outputsize=full&apikey=${API_KEY}&datatype=csv"

In [20]:
response = requests.get(url)

In [21]:
# print response
if response.status_code == 200:
    with open('tcs_stock_data.csv', 'wb') as file:
        file.write(response.content)
    print("Data saved to tcs_stock_data.csv")


Data saved to tcs_stock_data.csv


In [36]:
# store the data in a pandas dataframe
df = pd.read_csv('tcs_stock_data.csv', index_col=0, parse_dates=True)

In [37]:
df.index = pd.to_datetime(df.index)
df = df.sort_index()
print(df.tail())

                 open       high        low      close  volume
timestamp                                                     
2025-06-30  3444.9500  3465.0000  3430.1499  3461.0500   80588
2025-07-01  3460.9500  3485.0000  3414.4500  3429.4500  221565
2025-07-02  3450.0500  3489.8501  3420.1001  3423.3501   54493
2025-07-03  3434.8999  3435.0000  3398.1499  3400.7500  113506
2025-07-04  3401.1001  3426.0000  3388.6499  3420.9500  170988


In [38]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-01-03,168.725,169.75,167.2814,169.15,1185200
2005-01-04,168.8438,169.5,166.75,167.5438,1312656
2005-01-05,166.25,167.2626,160.125,165.6,3669696
2005-01-06,165.875,168.125,161.5188,164.9876,3441640
2005-01-07,166.25,168.0876,163.8876,167.425,2627176


Call api for downloading RSI indicator data

In [33]:
rsi_url = f"https://www.alphavantage.co/query?function=RSI&symbol={symbol}&interval=daily&time_period=14&series_type=close&apikey={API_KEY}"
rsi_data = requests.get(rsi_url).json()

rsi_df = pd.DataFrame.from_dict(rsi_data['Technical Analysis: RSI'], orient='index')
rsi_df = rsi_df.rename(columns={'RSI': 'RSI'})
rsi_df.index = pd.to_datetime(rsi_df.index)
rsi_df = rsi_df.astype(float)


In [34]:
rsi_df.head()

Unnamed: 0,RSI
2005-01-24,13.8729
2005-01-25,20.8349
2005-01-27,22.9666
2005-01-28,40.9205
2005-01-31,43.9547


Merge RSI data with main df

In [40]:
# Align dates and merge
df = df.merge(rsi_df, how='left', left_index=True, right_index=True)
df.dropna(inplace=True)


In [52]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,RSI_x,RSI_y,SMA,EMA,Real Upper Band,Real Middle Band,Real Lower Band
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2005-02-01,163.125,165.125,161.0,164.4876,4152952,47.9975,47.9975,68.5229,69.1659,74.1247,69.9532,65.7818
2005-02-02,164.625,165.375,162.6564,163.2064,2140728,45.6159,45.6159,68.5876,69.5611,73.7248,69.8244,65.9241
2005-02-03,163.625,164.25,162.65,163.7064,1440992,53.537,53.537,68.9326,70.0882,73.7042,69.819,65.9339
2005-02-04,163.75,165.2312,162.75,163.3188,834168,52.7357,52.7357,69.3048,70.5295,73.7958,69.8471,65.8985
2005-02-07,164.375,166.0,162.625,163.8876,1431688,53.8282,53.8282,69.7859,70.941,73.9663,69.9011,65.836


Call api for downloading SMA indicator data and add it to main df

In [46]:
sma_url = f"https://www.alphavantage.co/query?function=SMA&symbol={symbol}&interval=daily&time_period=10&series_type=open&apikey={API_KEY}"
sma_data = requests.get(sma_url).json()
sma_df = pd.DataFrame.from_dict(sma_data['Technical Analysis: SMA'], orient='index')
sma_df = sma_df.astype(float)
sma_df.index = pd.to_datetime(sma_df.index)
df = df.merge(sma_df, how='left', left_index=True, right_index=True)
df.dropna(inplace=True)

Call api for downloading EMA indicator data and add it to main df

In [49]:
ema_url = f"https://www.alphavantage.co/query?function=EMA&symbol={symbol}&interval=daily&time_period=10&series_type=open&apikey={API_KEY}"
ema_data = requests.get(ema_url).json()
ema_df = pd.DataFrame.from_dict(ema_data['Technical Analysis: EMA'], orient='index')
ema_df = ema_df.astype(float)
ema_df.index = pd.to_datetime(ema_df.index)
df = df.merge(ema_df, how='left', left_index=True, right_index=True)
df.dropna(inplace=True)

Call api for downloading BBANDS indicator data and add it to main df

In [51]:
bbands_url = f"https://www.alphavantage.co/query?function=BBANDS&symbol={symbol}&interval=daily&time_period=20&series_type=close&apikey={API_KEY}"
bbands_data = requests.get(bbands_url).json()

bbands_df = pd.DataFrame.from_dict(bbands_data['Technical Analysis: BBANDS'], orient='index')
bbands_df = bbands_df.astype(float)
bbands_df.index = pd.to_datetime(bbands_df.index)

# Merge Bollinger Bands with main dataframe
df = df.merge(bbands_df, how='left', left_index=True, right_index=True)
df.dropna(inplace=True)


In [54]:
# Filter data from the last 3 years
cutoff_date = datetime.now() - timedelta(days=3*365)
df_recent = df[df.index >= cutoff_date]

print(df_recent.head())

               open       high        low      close  volume    RSI_x  \
timestamp                                                               
2022-07-07  3269.95  3319.0000  3265.0000  3286.9500   77239  50.0642   
2022-07-08  3297.00  3300.2500  3250.0500  3264.8501   76501  48.0545   
2022-07-11  3220.00  3223.0500  3105.8501  3113.2500  202245  37.0637   
2022-07-12  3125.00  3136.2000  3081.0000  3084.6499  188774  35.4179   
2022-07-13  3108.50  3109.8999  3033.7000  3038.5500  107114  32.8833   

              RSI_y        SMA        EMA  Real Upper Band  Real Middle Band  \
timestamp                                                                      
2022-07-07  50.0642  3013.2342  2992.5628        3104.6644         2975.9559   
2022-07-08  48.0545  3008.9230  2998.3292        3090.0343         2971.3466   
2022-07-11  37.0637  2998.0991  2990.2053        3095.2944         2966.4437   
2022-07-12  35.4179  2980.8863  2967.7145        3102.5246         2960.6373   
2022-07-

In [65]:
def get_news_headlines(keyword="TCS", num_days=30):
    headlines = {}

    for i in range(num_days):
        date = datetime.now() - timedelta(days=i)
        formatted_date = date.strftime('%Y-%m-%d')
        
        # Google News RSS (can filter date later)
        url = f"https://news.google.com/rss/search?q={keyword}+when:{i}d"
        r = requests.get(url)
        soup = BeautifulSoup(r.content, features="html.parser")
        items = soup.findAll("item")

        headlines[formatted_date] = [item.title.text for item in items]

    return headlines


In [59]:
def compute_daily_sentiment(headlines_dict):
    analyzer = SentimentIntensityAnalyzer()
    sentiment_scores = {}

    for date, titles in headlines_dict.items():
        scores = [analyzer.polarity_scores(title)['compound'] for title in titles]
        avg_score = sum(scores) / len(scores) if scores else 0
        sentiment_scores[date] = avg_score

    return sentiment_scores

In [70]:
headline_data = get_news_headlines("Reliance Industry", num_days=90)
sentiment_scores = compute_daily_sentiment(headline_data)

sentiment_df = pd.DataFrame(list(sentiment_scores.items()), columns=['Date', 'Sentiment'])
sentiment_df['Date'] = pd.to_datetime(sentiment_df['Date'])
sentiment_df.set_index('Date', inplace=True)




In [72]:
sentiment_df.head(40)

Unnamed: 0_level_0,Sentiment
Date,Unnamed: 1_level_1
2025-07-05,0.16222
2025-07-04,0.290021
2025-07-03,0.212865
2025-07-02,0.158436
2025-07-01,0.075857
2025-06-30,0.096871
2025-06-29,0.084752
2025-06-28,0.100479
2025-06-27,0.100479
2025-06-26,0.113123
