In [36]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import requests
from datetime import datetime, timedelta
import nltk

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from bs4 import BeautifulSoup
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [38]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.


In [4]:
API_KEY = "QCC4AQUW1ZKX4T7P"
symbol = "TCS.BSE"

In [5]:
url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&outputsize=full&apikey={API_KEY}&datatype=csv"

In [6]:
response = requests.get(url)

In [7]:
# print response
if response.status_code == 200:
    with open('tcs_stock_data.csv', 'wb') as file:
        file.write(response.content)
    print("Data saved to tcs_stock_data.csv")


Data saved to tcs_stock_data.csv


In [18]:
# store the data in a pandas dataframe
df = pd.read_csv('tcs_stock_data.csv', index_col=0, parse_dates=True)

In [19]:
df.index = pd.to_datetime(df.index)
df = df.sort_index()
print(df.tail())

                 open       high        low      close  volume
timestamp                                                     
2025-06-30  3444.9500  3465.0000  3430.1499  3461.0500   80588
2025-07-01  3460.9500  3485.0000  3414.4500  3429.4500  221565
2025-07-02  3450.0500  3489.8501  3420.1001  3423.3501   54493
2025-07-03  3434.8999  3435.0000  3398.1499  3400.7500  113506
2025-07-04  3401.1001  3426.0000  3388.6499  3420.9500  170988


In [31]:
# show all column names
print("Column names:", df.columns.tolist())

Column names: ['open', 'high', 'low', 'close', 'volume', 'RSI_14', 'MACD_12_26_9', 'MACDh_12_26_9', 'MACDs_12_26_9', 'EMA_20', 'SMA_50', 'OBV', 'BBL_5_2.0', 'BBM_5_2.0', 'BBU_5_2.0', 'BBB_5_2.0', 'BBP_5_2.0', 'CCI_14_0.015', 'ATRr_14', 'ADX', 'DI+', 'DI-', 'trend_signal', 'MA100', 'MA200', 'MA_signal']


In [None]:
df.tail()

In [20]:
# Add multiple indicators
df.ta.rsi(append=True)                      # RSI
df.ta.macd(append=True)                     # MACD (MACDEXT style)
df.ta.ema(length=20, append=True)           # EMA20
df.ta.sma(length=50, append=True)           # SMA50
df.ta.obv(append=True)                      # On Balance Volume
df.ta.bbands(append=True)                   # Bollinger Bands
df.ta.cci(append=True)                      # CCI
df.ta.atr(append=True)                      # ATR

# Clean up: Drop NaN values created by indicators (lookback periods)
df.dropna(inplace=True)

In [22]:
# Calculate ADX with 14-day period (default)
adx = ta.adx(df['high'], df['low'], df['close'])

# Merge into your main DataFrame
df = pd.concat([df, adx], axis=1)

# Optional: Simplify naming
df.rename(columns={
    'ADX_14': 'ADX',
    'DMP_14': 'DI+',
    'DMN_14': 'DI-'
}, inplace=True)

In [23]:
def trend_direction(row):
    if row['ADX'] > 25:
        if row['DI+'] > row['DI-']:
            return 1   # strong uptrend
        else:
            return -1  # strong downtrend
    return 0  # no clear trend

df['trend_signal'] = df.apply(trend_direction, axis=1)

In [29]:
# Calculate MA
df['MA100'] = df['close'].rolling(100).mean()
df['MA200'] = df['close'].rolling(200).mean()
df['MA_signal'] = (df['MA100'] > df['MA200']).astype(int)

# Drop NA
df = df.dropna()

In [34]:
# 1️⃣ Select features
features = ['open', 'high', 'low', 'close', 'volume',
            'RSI_14', 'MACD_12_26_9', 'MACDh_12_26_9', 'MACDs_12_26_9',
            'EMA_20', 'SMA_50', 'OBV', 'BBL_5_2.0', 'BBM_5_2.0', 'BBU_5_2.0',
            'BBB_5_2.0', 'BBP_5_2.0', 'CCI_14_0.015', 'ATRr_14',
            'ADX', 'DI+', 'DI-', 'trend_signal', 'MA100', 'MA200', 'MA_signal']

# 2️⃣ Scaling features
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[features])

# 3️⃣ Create sequences
X = []
y = []

sequence_length = 60  # last 60 days
target_col_index = features.index('close')  # we'll predict next 'close' price

for i in range(sequence_length, len(scaled_data)):
    X.append(scaled_data[i - sequence_length:i])
    y.append(scaled_data[i][target_col_index])  # predicting close price

X, y = np.array(X), np.array(y)

# 4️⃣ Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [39]:
model = Sequential()
model.add(LSTM(units=128, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=64, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))  # Predicting the closing price

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 60, 128)           79360     
                                                                 
 dropout (Dropout)           (None, 60, 128)           0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 128,833
Trainable params: 128,833
Non-trainable params: 0
_________________________________________________________________


In [49]:
# Filter data from the last 3 years
cutoff_date = datetime.now() - timedelta(days=3*365)
df_recent = df[df.index >= cutoff_date]

print(df_recent.head())

                 open       high        low      close  volume      RSI  \
timestamp                                                                 
2025-07-04  3401.1001  3426.0000  3388.6499  3420.9500  170988  47.6243   
2025-07-03  3434.8999  3435.0000  3398.1499  3400.7500  113506  44.2725   
2025-07-02  3450.0500  3489.8501  3420.1001  3423.3501   54493  47.4255   
2025-07-01  3460.9500  3485.0000  3414.4500  3429.4500  221565  48.2875   
2025-06-30  3444.9500  3465.0000  3430.1499  3461.0500   80588  52.9134   

                 SMA        EMA  Real Upper Band  Real Middle Band  \
timestamp                                                            
2025-07-04  3434.860  3436.4619        3501.4278         3440.5700   
2025-07-03  3437.185  3444.3201        3503.7413         3438.8075   
2025-07-02  3438.565  3446.4134        3506.7391         3437.3675   
2025-07-01  3445.995  3445.6053        3508.7152         3435.2275   
2025-06-30  3450.900  3442.1954        3510.5360      

In [50]:
def get_news_headlines(keyword="TCS", num_days=30):
    headlines = {}

    for i in range(num_days):
        date = datetime.now() - timedelta(days=i)
        formatted_date = date.strftime('%Y-%m-%d')
        
        # Google News RSS (can filter date later)
        url = f"https://news.google.com/rss/search?q={keyword}+when:{i}d"
        r = requests.get(url)
        soup = BeautifulSoup(r.content, features="html.parser")
        items = soup.findAll("item")

        headlines[formatted_date] = [item.title.text for item in items]

    return headlines


In [51]:
def compute_daily_sentiment(headlines_dict):
    analyzer = SentimentIntensityAnalyzer()
    sentiment_scores = {}

    for date, titles in headlines_dict.items():
        scores = [analyzer.polarity_scores(title)['compound'] for title in titles]
        avg_score = sum(scores) / len(scores) if scores else 0
        sentiment_scores[date] = avg_score

    return sentiment_scores

In [None]:
headline_data = get_news_headlines("Reliance Industry", num_days=365*3)
sentiment_scores = compute_daily_sentiment(headline_data)

sentiment_df = pd.DataFrame(list(sentiment_scores.items()), columns=['Date', 'Sentiment'])
sentiment_df['Date'] = pd.to_datetime(sentiment_df['Date'])
sentiment_df.set_index('Date', inplace=True)

