In [3]:
import pandas as pd
import pandas_ta as ta
import requests
from datetime import datetime, timedelta
import nltk
from bs4 import BeautifulSoup
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [4]:
API_KEY = "QCC4AQUW1ZKX4T7P"
symbol = "TCS.BSE"

In [5]:
url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&outputsize=full&apikey={API_KEY}&datatype=csv"

In [6]:
response = requests.get(url)

In [7]:
# print response
if response.status_code == 200:
    with open('tcs_stock_data.csv', 'wb') as file:
        file.write(response.content)
    print("Data saved to tcs_stock_data.csv")


Data saved to tcs_stock_data.csv


In [8]:
# store the data in a pandas dataframe
df = pd.read_csv('tcs_stock_data.csv', index_col=0, parse_dates=True)

In [13]:
df.index = pd.to_datetime(df.index)
df = df.sort_index()
print(df.tail())

                 open       high        low      close  volume
timestamp                                                     
2025-06-30  3444.9500  3465.0000  3430.1499  3461.0500   80588
2025-07-01  3460.9500  3485.0000  3414.4500  3429.4500  221565
2025-07-02  3450.0500  3489.8501  3420.1001  3423.3501   54493
2025-07-03  3434.8999  3435.0000  3398.1499  3400.7500  113506
2025-07-04  3401.1001  3426.0000  3388.6499  3420.9500  170988


In [16]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,RSI_14,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,EMA_20,...,DMP_14,DMN_14,OBV,BBL_5_2.0,BBM_5_2.0,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,CCI_14_0.015,ATRr_14
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-03-15,181.875,182.25,179.375,179.7876,1448920,66.275892,3.328701,0.502425,2.826275,174.13599,...,32.267671,8.113486,-10920688.0,173.902165,178.33752,182.772875,4.974113,0.663468,136.475123,3.531382
2005-03-16,180.0,183.125,180.0,181.4564,1416680,68.709608,3.505174,0.543119,2.962055,174.833172,...,31.847303,7.54971,-9504008.0,175.086492,179.4138,183.741108,4.823829,0.736013,134.319623,3.517177
2005-03-17,180.625,184.25,176.2814,182.8938,2336608,70.672779,3.718156,0.604881,3.113275,175.600851,...,27.019639,13.479213,-7167400.0,177.865565,180.85632,183.847075,3.307327,0.84063,107.814914,3.842565
2005-03-18,182.4626,183.7564,178.3938,183.5126,1985600,71.50172,3.892013,0.622991,3.269023,176.354351,...,24.345072,12.144959,-5181800.0,179.251638,181.8376,184.423562,2.844254,0.823864,104.262396,3.953491
2005-03-21,183.875,184.375,180.375,181.525,1549280,65.133472,3.825318,0.445036,3.380282,176.846794,...,23.690991,11.250396,-6731080.0,179.24793,181.83508,184.42223,2.845601,0.440073,93.502189,3.95688


In [15]:
# Add multiple indicators
df.ta.rsi(append=True)                      # RSI
df.ta.macd(append=True)                     # MACD (MACDEXT style)
df.ta.ema(length=20, append=True)           # EMA20
df.ta.sma(length=50, append=True)           # SMA50
df.ta.adx(append=True)                      # ADX
df.ta.obv(append=True)                      # On Balance Volume
df.ta.bbands(append=True)                   # Bollinger Bands
df.ta.cci(append=True)                      # CCI
df.ta.atr(append=True)                      # ATR

# Clean up: Drop NaN values created by indicators (lookback periods)
df.dropna(inplace=True)

In [49]:
# Filter data from the last 3 years
cutoff_date = datetime.now() - timedelta(days=3*365)
df_recent = df[df.index >= cutoff_date]

print(df_recent.head())

                 open       high        low      close  volume      RSI  \
timestamp                                                                 
2025-07-04  3401.1001  3426.0000  3388.6499  3420.9500  170988  47.6243   
2025-07-03  3434.8999  3435.0000  3398.1499  3400.7500  113506  44.2725   
2025-07-02  3450.0500  3489.8501  3420.1001  3423.3501   54493  47.4255   
2025-07-01  3460.9500  3485.0000  3414.4500  3429.4500  221565  48.2875   
2025-06-30  3444.9500  3465.0000  3430.1499  3461.0500   80588  52.9134   

                 SMA        EMA  Real Upper Band  Real Middle Band  \
timestamp                                                            
2025-07-04  3434.860  3436.4619        3501.4278         3440.5700   
2025-07-03  3437.185  3444.3201        3503.7413         3438.8075   
2025-07-02  3438.565  3446.4134        3506.7391         3437.3675   
2025-07-01  3445.995  3445.6053        3508.7152         3435.2275   
2025-06-30  3450.900  3442.1954        3510.5360      

In [50]:
def get_news_headlines(keyword="TCS", num_days=30):
    headlines = {}

    for i in range(num_days):
        date = datetime.now() - timedelta(days=i)
        formatted_date = date.strftime('%Y-%m-%d')
        
        # Google News RSS (can filter date later)
        url = f"https://news.google.com/rss/search?q={keyword}+when:{i}d"
        r = requests.get(url)
        soup = BeautifulSoup(r.content, features="html.parser")
        items = soup.findAll("item")

        headlines[formatted_date] = [item.title.text for item in items]

    return headlines


In [51]:
def compute_daily_sentiment(headlines_dict):
    analyzer = SentimentIntensityAnalyzer()
    sentiment_scores = {}

    for date, titles in headlines_dict.items():
        scores = [analyzer.polarity_scores(title)['compound'] for title in titles]
        avg_score = sum(scores) / len(scores) if scores else 0
        sentiment_scores[date] = avg_score

    return sentiment_scores

In [None]:
headline_data = get_news_headlines("Reliance Industry", num_days=365*3)
sentiment_scores = compute_daily_sentiment(headline_data)

sentiment_df = pd.DataFrame(list(sentiment_scores.items()), columns=['Date', 'Sentiment'])
sentiment_df['Date'] = pd.to_datetime(sentiment_df['Date'])
sentiment_df.set_index('Date', inplace=True)



In [None]:
sentiment_df.head(40)

Unnamed: 0_level_0,Sentiment
Date,Unnamed: 1_level_1
2025-07-05,0.16222
2025-07-04,0.290021
2025-07-03,0.212865
2025-07-02,0.158436
2025-07-01,0.075857
2025-06-30,0.096871
2025-06-29,0.084752
2025-06-28,0.100479
2025-06-27,0.100479
2025-06-26,0.113123
