In [None]:
# This command installs a set of compatible library versions
!pip install numpy==1.26.4 pandas==2.2.2 pandas_ta vaderSentiment requests keras-tuner -q

# This command will forcefully crash and restart the Colab kernel
# to ensure the new libraries are loaded correctly.
import os
os.kill(os.getpid(), 9)

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/115.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m61.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pandas_ta (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resolv

In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
import joblib
import os
import time
import requests
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas_ta as ta

analyzer = SentimentIntensityAnalyzer()
NEWS_API_KEY = "f4720ca914da4e1eba03a3f520aa17f7"

def get_sentiment_for_daterange(stock_ticker, start_date, end_date):
    if NEWS_API_KEY == "YOUR_NEWS_API_KEY":
        return pd.Series(0.0, index=pd.date_range(start=start_date, end=end_date), name=f'{stock_ticker}_Sentiment')
    query = stock_ticker.split('.')[0]
    url = f"https://newsapi.org/v2/everything?q={query}&apiKey={NEWS_API_KEY}&language=en&from={start_date}&to={end_date}&sortBy=publishedAt&pageSize=100"
    try:
        r = requests.get(url)
        articles = r.json().get('articles', [])
        if not articles:
            return pd.Series(0.0, index=pd.date_range(start=start_date, end=end_date), name=f'{stock_ticker}_Sentiment')
        df = pd.DataFrame(articles)
        df['publishedAt'] = pd.to_datetime(df['publishedAt']).dt.date
        df['sentiment'] = df['title'].apply(lambda t: analyzer.polarity_scores(t)['compound'] if t else 0)
        daily = df.groupby('publishedAt')['sentiment'].mean()
        daily.index = pd.to_datetime(daily.index)
        all_days = pd.date_range(start=start_date, end=end_date)
        out = daily.reindex(all_days, fill_value=0.0)
        out.name = f'{stock_ticker}_Sentiment'
        return out
    except Exception as e:
        print(f"couldn't fetch news for {stock_ticker}: {e}")
        return pd.Series(0.0, index=pd.date_range(start=start_date, end=end_date), name=f'{stock_ticker}_Sentiment')

def build_model(hp, input_shape, num_outputs):
    m = Sequential()
    u1 = hp.Int('units_1', 32, 128, step=32)
    m.add(LSTM(u1, return_sequences=True, input_shape=input_shape))
    d1 = hp.Float('dropout_1', 0.2, 0.5, step=0.1)
    m.add(Dropout(d1))
    u2 = hp.Int('units_2', 32, 128, step=32)
    m.add(LSTM(u2, return_sequences=False))
    d2 = hp.Float('dropout_2', 0.2, 0.5, step=0.1)
    m.add(Dropout(d2))
    u3 = hp.Int('units_3', 16, 64, step=16)
    m.add(Dense(u3, activation='relu'))
    m.add(Dense(num_outputs))
    lr = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
    m.compile(optimizer=Adam(learning_rate=lr), loss='mean_squared_error')
    return m

def tune_and_train_final_model(cfg):
    print(f"\n>>> tuning {cfg['sector_name']} <<<")
    start_date = '2015-01-01'
    end_date = pd.to_datetime('today').strftime('%Y-%m-%d')
    data = yf.download(cfg['feature_tickers'], start=start_date, end=end_date)
    df = data['Close'].copy()
    if cfg['rename_map']:
        df.rename(columns=cfg['rename_map'], inplace=True)
    df.ffill(inplace=True)
    for s in cfg['target_stocks']:
        df[f'{s}_SMA_20'] = ta.sma(df[s], length=20)
        df[f'{s}_RSI_14'] = ta.rsi(df[s], length=14)
    for s in cfg['target_stocks']:
        sent = get_sentiment_for_daterange(s, start_date, end_date)
        df = df.join(sent)
        time.sleep(1)
    df.ffill(inplace=True)
    for s in cfg['target_stocks']:
        df[f'{s}_Return'] = df[s].pct_change()
    df.dropna(inplace=True)

    feat = [c for c in df.columns if '_Return' not in c]
    tgt = [f'{s}_Return' for s in cfg['target_stocks']]

    train_size = int(len(df) * 0.85)
    train, val = df.iloc[:train_size], df.iloc[train_size:]
    if train.empty or len(train) < 60 or val.empty or len(val) < 60:
        print(f"skip {cfg['sector_name']} (not enough data)")
        return

    scaler = MinMaxScaler()
    scaler.fit(train)

    s_train = scaler.transform(train)
    s_val = scaler.transform(val)
    train_df = pd.DataFrame(s_train, columns=df.columns, index=train.index)
    val_df = pd.DataFrame(s_val, columns=df.columns, index=val.index)

    step = 60
    X_train, y_train = [], []
    for i in range(step, len(train_df)):
        X_train.append(train_df[feat].values[i-step:i, :])
        y_train.append(train_df[tgt].values[i, :])
    X_train, y_train = np.array(X_train), np.array(y_train)

    X_val, y_val = [], []
    for i in range(step, len(val_df)):
        X_val.append(val_df[feat].values[i-step:i, :])
        y_val.append(val_df[tgt].values[i, :])
    X_val, y_val = np.array(X_val), np.array(y_val)

    tuner = kt.RandomSearch(
        lambda hp: build_model(hp, input_shape=(X_train.shape[1], X_train.shape[2]), num_outputs=len(cfg['target_stocks'])),
        objective='val_loss',
        max_trials=10,
        executions_per_trial=1,
        directory='keras_tuner_dir',
        project_name=f"{cfg['sector_name']}_tuning"
    )
    tuner.search(X_train, y_train, epochs=25, validation_data=(X_val, y_val))
    best_hp = tuner.get_best_hyperparameters(1)[0]
    model = tuner.hypermodel.build(best_hp)

    full_X = np.concatenate((X_train, X_val))
    full_y = np.concatenate((y_train, y_val))
    model.fit(full_X, full_y, epochs=75, batch_size=32, verbose=0)

    model.save(cfg['model_save_path'])
    joblib.dump(scaler, cfg['scaler_save_path'])
    print(f"{cfg['sector_name']} done! best params: {best_hp.values}")

if __name__ == '__main__':
    SECTORS = [
        {'sector_name': 'IT','companies': ['TCS.NS','INFY.NS','WIPRO.NS','HCLTECH.NS'],'indices': {'^CNXIT': 'Nifty_IT_Index','^NSEI': 'Nifty_50_Index'}},
        {'sector_name': 'Auto','companies': ['TATAMOTORS.NS','MARUTI.NS','M&M.NS','BAJAJ-AUTO.NS'],'indices': {'^CNXAUTO': 'Nifty_Auto_Index'}},
        {'sector_name': 'Banking','companies': ['HDFCBANK.NS','ICICIBANK.NS','SBIN.NS','KOTAKBANK.NS'],'indices': {'^NSEBANK': 'Nifty_Bank_Index'}},
        {'sector_name': 'FMCG','companies': ['HINDUNILVR.NS','ITC.NS','NESTLEIND.NS','BRITANNIA.NS'],'indices': {'^CNXFMCG': 'Nifty_FMCG_Index'}},
        {'sector_name': 'Pharma','companies': ['SUNPHARMA.NS','CIPLA.NS','DRREDDY.NS','DIVISLAB.NS'],'indices': {'^CNXPHARMA': 'Nifty_Pharma_Index'}}
    ]

    for s in SECTORS:
        name = s['sector_name'].lower()
        feats = s['companies'] + list(s['indices'].keys())
        cfg = {
            'sector_name': s['sector_name'],
            'target_stocks': s['companies'],
            'feature_tickers': feats,
            'rename_map': s['indices'],
            'model_save_path': f'best_model_{name}.keras',
            'scaler_save_path': f'best_scaler_{name}.save'
        }
        tune_and_train_final_model(cfg)
    print("\nall done :)")


Trial 10 Complete [00h 00m 28s]
val_loss: 0.0029092878103256226

Best val_loss So Far: 0.002766660414636135
Total elapsed time: 00h 04m 31s
Pharma done! best params: {'units_1': 64, 'dropout_1': 0.30000000000000004, 'units_2': 96, 'dropout_2': 0.30000000000000004, 'units_3': 16, 'learning_rate': 0.001}

all done :)
