In [None]:
# This command installs a set of compatible library versions
!pip install numpy==1.26.4 pandas==2.2.2 pandas_ta vaderSentiment requests keras-tuner -q

# This command will forcefully crash and restart the Colab kernel
# to ensure the new libraries are loaded correctly.
import os
os.kill(os.getpid(), 9)

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/115.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m108.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pandas_ta (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resol

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
import joblib
import os
import time
import requests
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas_ta as ta

# --- 1. Sentiment Analysis Setup ---
analyzer = SentimentIntensityAnalyzer()
NEWS_API_KEY = "API_KEY" # <-- IMPORTANT: Replace with your key

def get_sentiment_for_daterange(stock_ticker, start_date, end_date):
    if NEWS_API_KEY == "YOUR_NEWS_API_KEY":
        return pd.Series(0.0, index=pd.date_range(start=start_date, end=end_date), name=f'{stock_ticker}_Sentiment')
    query = stock_ticker.split('.')[0]
    url = f"https://newsapi.org/v2/everything?q={query}&apiKey={NEWS_API_KEY}&language=en&from={start_date}&to={end_date}&sortBy=publishedAt&pageSize=100"
    try:
        response = requests.get(url)
        articles = response.json().get('articles', [])
        if not articles:
            return pd.Series(0.0, index=pd.date_range(start=start_date, end=end_date), name=f'{stock_ticker}_Sentiment')
        news_df = pd.DataFrame(articles)
        news_df['publishedAt'] = pd.to_datetime(news_df['publishedAt']).dt.date
        news_df['sentiment'] = news_df['title'].apply(lambda title: analyzer.polarity_scores(title)['compound'] if title else 0)
        daily_sentiment = news_df.groupby('publishedAt')['sentiment'].mean()
        daily_sentiment.index = pd.to_datetime(daily_sentiment.index)
        all_days = pd.date_range(start=start_date, end=end_date)
        sentiment_series = daily_sentiment.reindex(all_days, fill_value=0.0)
        sentiment_series.name = f'{stock_ticker}_Sentiment'
        return sentiment_series
    except Exception as e:
        print(f"  - Could not fetch news for {stock_ticker}: {e}")
        return pd.Series(0.0, index=pd.date_range(start=start_date, end=end_date), name=f'{stock_ticker}_Sentiment')

# --- 2. KerasTuner Model Builder Function ---
def build_model(hp, input_shape, num_outputs):
    """
    This function defines the search space for the hyperparameter tuner.
    """
    model = Sequential()

    # Tune the number of units in the first LSTM layer
    hp_units_1 = hp.Int('units_1', min_value=32, max_value=128, step=32)
    model.add(LSTM(units=hp_units_1, return_sequences=True, input_shape=input_shape))

    # Tune the dropout rate
    hp_dropout_1 = hp.Float('dropout_1', min_value=0.2, max_value=0.5, step=0.1)
    model.add(Dropout(hp_dropout_1))

    # Tune the number of units in the second LSTM layer
    hp_units_2 = hp.Int('units_2', min_value=32, max_value=128, step=32)
    model.add(LSTM(units=hp_units_2, return_sequences=False))

    hp_dropout_2 = hp.Float('dropout_2', min_value=0.2, max_value=0.5, step=0.1)
    model.add(Dropout(hp_dropout_2))

    # Tune the number of units in the Dense layer
    hp_units_3 = hp.Int('units_3', min_value=16, max_value=64, step=16)
    model.add(Dense(units=hp_units_3, activation='relu'))

    model.add(Dense(units=num_outputs))

    # Tune the learning rate for the optimizer
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss='mean_squared_error')

    return model

def tune_and_train_final_model(config):
    print(f"\n--- Starting FINAL HYPERPARAMETER TUNING for the {config['sector_name']} sector model ---")

    # --- 3. Load and Prepare Data (as before) ---
    print("Loading and preparing data...")
    start_date = '2015-01-01'
    end_date = pd.to_datetime('today').strftime('%Y-%m-%d')
    data = yf.download(config['feature_tickers'], start=start_date, end=end_date)
    df = data['Close'].copy()
    if config['rename_map']:
        df.rename(columns=config['rename_map'], inplace=True)
    df.ffill(inplace=True)

    print("Calculating technical indicators...")
    for stock in config['target_stocks']:
        df[f'{stock}_SMA_20'] = ta.sma(df[stock], length=20)
        df[f'{stock}_RSI_14'] = ta.rsi(df[stock], length=14)

    print("Fetching and adding sentiment scores...")
    for stock in config['target_stocks']:
        sentiment_data = get_sentiment_for_daterange(stock, start_date, end_date)
        df = df.join(sentiment_data)
        time.sleep(1)

    df.ffill(inplace=True)

    for stock in config['target_stocks']:
        df[f'{stock}_Return'] = df[stock].pct_change()
    df.dropna(inplace=True)

    feature_cols = [col for col in df.columns if '_Return' not in col]
    target_cols = [f'{stock}_Return' for stock in config['target_stocks']]

    train_size = int(len(df) * 0.85)
    train_data = df.iloc[:train_size]
    val_data = df.iloc[train_size:] # Use the test set as a validation set for tuning

    if train_data.empty or len(train_data) < 60 or val_data.empty or len(val_data) < 60:
        print(f"Skipping {config['sector_name']} due to insufficient data.")
        return

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaler.fit(train_data)

    # Prepare training data
    scaled_train_data = scaler.transform(train_data)
    scaled_train_df = pd.DataFrame(scaled_train_data, columns=df.columns, index=train_data.index)
    time_step = 60
    X_train, y_train = [], []
    for i in range(time_step, len(scaled_train_df)):
        X_train.append(scaled_train_df[feature_cols].values[i-time_step:i, :])
        y_train.append(scaled_train_df[target_cols].values[i, :])
    X_train, y_train = np.array(X_train), np.array(y_train)

    # Prepare validation data
    scaled_val_data = scaler.transform(val_data)
    scaled_val_df = pd.DataFrame(scaled_val_data, columns=df.columns, index=val_data.index)
    X_val, y_val = [], []
    for i in range(time_step, len(scaled_val_df)):
        X_val.append(scaled_val_df[feature_cols].values[i-time_step:i, :])
        y_val.append(scaled_val_df[target_cols].values[i, :])
    X_val, y_val = np.array(X_val), np.array(y_val)

    # --- 4. Run the Hyperparameter Search ---
    print("Starting hyperparameter search...")
    tuner = kt.RandomSearch(
        lambda hp: build_model(hp, input_shape=(X_train.shape[1], X_train.shape[2]), num_outputs=len(config['target_stocks'])),
        objective='val_loss',
        max_trials=10, # Number of different model combinations to test
        executions_per_trial=1,
        directory='keras_tuner_dir',
        project_name=f"{config['sector_name']}_tuning"
    )

    tuner.search(X_train, y_train, epochs=25, validation_data=(X_val, y_val)) # Search with fewer epochs for speed

    # --- 5. Get the Best Model and Retrain It ---
    print("Search complete. Retraining the best model on the full dataset...")
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    best_model = tuner.hypermodel.build(best_hps)

    # Combine train and val data to retrain on the full dataset
    full_X = np.concatenate((X_train, X_val))
    full_y = np.concatenate((y_train, y_val))

    best_model.fit(full_X, full_y, epochs=75, batch_size=32, verbose=0) # Retrain for more epochs

    # --- 6. Save the Final, Optimized Artifacts ---
    best_model.save(config['model_save_path'])
    joblib.dump(scaler, config['scaler_save_path'])
    print(f"Successfully trained and saved BEST model to {config['model_save_path']}")
    print(f"Successfully saved BEST scaler to {config['scaler_save_path']}")
    print(f"Optimal hyperparameters: {best_hps.values}")


if __name__ == '__main__':
    SECTOR_DEFINITIONS = [
        {
            'sector_name': 'IT',
            'companies': ['TCS.NS', 'INFY.NS', 'WIPRO.NS', 'HCLTECH.NS'],
            'indices': {'^CNXIT': 'Nifty_IT_Index', '^NSEI': 'Nifty_50_Index'}
        },
        {
            'sector_name': 'Auto',
            'companies': ['TATAMOTORS.NS', 'MARUTI.NS', 'M&M.NS', 'BAJAJ-AUTO.NS'],
            'indices': {'^CNXAUTO': 'Nifty_Auto_Index'}
        },
        {
            'sector_name': 'Banking',
            'companies': ['HDFCBANK.NS', 'ICICIBANK.NS', 'SBIN.NS', 'KOTAKBANK.NS'],
            'indices': {'^NSEBANK': 'Nifty_Bank_Index'}
        },
        {
            'sector_name': 'FMCG',
            'companies': ['HINDUNILVR.NS', 'ITC.NS', 'NESTLEIND.NS', 'BRITANNIA.NS'],
            'indices': {'^CNXFMCG': 'Nifty_FMCG_Index'}
        },
        {
            'sector_name': 'Pharma',
            'companies': ['SUNPHARMA.NS', 'CIPLA.NS', 'DRREDDY.NS', 'DIVISLAB.NS'],
            'indices': {'^CNXPHARMA': 'Nifty_Pharma_Index'}
        }
    ]

    for sector_config in SECTOR_DEFINITIONS:
        sector_name_safe = sector_config['sector_name'].lower()
        feature_tickers = sector_config['companies'] + list(sector_config['indices'].keys())

        job_config = {
            'sector_name': sector_config['sector_name'],
            'target_stocks': sector_config['companies'],
            'feature_tickers': feature_tickers,
            'rename_map': sector_config['indices'],
            'model_save_path': f'best_model_{sector_name_safe}.keras',
            'scaler_save_path': f'best_scaler_{sector_name_safe}.save'
        }

        tune_and_train_final_model(job_config)

    print("\n--- All models have been tuned and trained successfully! ---")


Trial 10 Complete [00h 00m 29s]
val_loss: 0.0027795617934316397

Best val_loss So Far: 0.0027773799374699593
Total elapsed time: 00h 04m 56s
Search complete. Retraining the best model on the full dataset...
Successfully trained and saved BEST model to best_model_pharma.keras
Successfully saved BEST scaler to best_scaler_pharma.save
Optimal hyperparameters: {'units_1': 96, 'dropout_1': 0.30000000000000004, 'units_2': 64, 'dropout_2': 0.2, 'units_3': 32, 'learning_rate': 0.001}

--- All models have been tuned and trained successfully! ---
