In [32]:
# Cell 1: Imports and setup
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import os
import pickle
import ta
import time
from datetime import datetime, timedelta
from pandas.tseries.offsets import BDay
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Setup logging
logging.basicConfig(filename='download_errors.log', level=logging.ERROR)

# Define holidays for 2025 (India)
holidays_2025 = [
    '2025-01-26',  # Republic Day
    '2025-03-14',  # Holi
    '2025-04-10',  # Good Friday
    '2025-04-14',  # Ambedkar Jayanti
    '2025-05-01',  # Maharashtra Day
    '2025-08-15',  # Independence Day
    '2025-10-02',  # Gandhi Jayanti
    '2025-10-20',  # Diwali (Laxmi Pujan, approximate)
    '2025-12-25',  # Christmas
]

In [34]:
# Check if market is closed
def is_market_closed(date):
    date_str = date.strftime('%Y-%m-%d')
    is_weekend = date.weekday() >= 5
    is_holiday = date_str in holidays_2025
    return is_weekend or is_holiday

# Download stock data and compute 27 indicators
def get_stock_data(ticker, start_date, end_date, retries=5):
    for attempt in range(retries):
        try:
            stock_data = yf.download(ticker, start=start_date, end=end_date, progress=False)
            if stock_data.empty:
                raise ValueError(f"No data returned for {ticker}")
            # Ensure flat column structure
            df = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
            df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']  # Flatten columns
            
            # Trend Indicators
            df['EMA_50'] = ta.trend.EMAIndicator(df['Close'], window=50).ema_indicator()
            df['SMA_200'] = ta.trend.SMAIndicator(df['Close'], window=200).sma_indicator()
            df['MACD'] = ta.trend.MACD(df['Close']).macd_diff()
            df['EMA_20'] = ta.trend.EMAIndicator(df['Close'], window=20).ema_indicator()
            ichimoku = ta.trend.IchimokuIndicator(df['High'], df['Low'])
            df['Ichimoku_Tenkan'] = ichimoku.ichimoku_conversion_line()
            df['Ichimoku_Kijun'] = ichimoku.ichimoku_base_line()
            df['Ichimoku_Senkou_A'] = ichimoku.ichimoku_a()
            df['Ichimoku_Senkou_B'] = ichimoku.ichimoku_b()
            df['Parabolic_SAR'] = ta.trend.PSARIndicator(df['High'], df['Low'], df['Close']).psar()
            
            # Momentum Indicators
            df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
            stochastic = ta.momentum.StochasticOscillator(df['High'], df['Low'], df['Close'], window=14)
            df['Stochastic_K'] = stochastic.stoch()
            df['Stochastic_D'] = stochastic.stoch_signal()
            df['Williams_R'] = ta.momentum.WilliamsRIndicator(df['High'], df['Low'], df['Close'], lbp=14).williams_r()
            df['CCI'] = ta.trend.CCIIndicator(df['High'], df['Low'], df['Close'], window=20).cci()
            df['ROC'] = ta.momentum.ROCIndicator(df['Close'], window=12).roc()
            df['MOM'] = df['Close'].diff(10)  # Replaced MomentumIndicator with pandas diff
            df['Ultimate_Oscillator'] = ta.momentum.UltimateOscillator(df['High'], df['Low'], df['Close']).ultimate_oscillator()
            
            # Volatility Indicators
            bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
            df['BB_Middle'] = bb.bollinger_mavg()
            df['ATR'] = ta.volatility.AverageTrueRange(df['High'], df['Low'], df['Close'], window=14).average_true_range()
            df['Std_Dev'] = df['Close'].rolling(window=20).std()
            keltner = ta.volatility.KeltnerChannel(df['High'], df['Low'], df['Close'], window=20)
            df['Keltner_Middle'] = keltner.keltner_channel_mband()
            
            # Volume Indicators
            df['OBV'] = ta.volume.OnBalanceVolumeIndicator(df['Close'], df['Volume']).on_balance_volume()
            df['VWAP'] = (df['Close'] * df['Volume']).cumsum() / df['Volume'].cumsum()
            df['CMF'] = ta.volume.ChaikinMoneyFlowIndicator(df['High'], df['Low'], df['Close'], df['Volume'], window=20).chaikin_money_flow()
            df['AD'] = ta.volume.AccDistIndexIndicator(df['High'], df['Low'], df['Close'], df['Volume']).acc_dist_index()
            
            # Support/Resistance Indicators
            df['Pivot_S1'] = df['Close'].shift(1) - (df['High'].shift(1) - df['Low'].shift(1))
            df['Pivot_R1'] = df['Close'].shift(1) + (df['High'].shift(1) - df['Low'].shift(1))
            high_52w = df['High'].rolling(window=252).max().shift(1)
            low_52w = df['Low'].rolling(window=252).min().shift(1)
            df['Fibonacci_50'] = low_52w + 0.5 * (high_52w - low_52w)
            
            df = df.dropna()
            return df[['Close', 'Open', 'High', 'Low', 'Volume', 'EMA_50', 'SMA_200', 'MACD', 'EMA_20',
                       'Ichimoku_Tenkan', 'Ichimoku_Kijun', 'Ichimoku_Senkou_A', 'Ichimoku_Senkou_B', 'Parabolic_SAR',
                       'RSI', 'Stochastic_K', 'Stochastic_D', 'Williams_R', 'CCI', 'ROC', 'MOM', 'Ultimate_Oscillator',
                       'BB_Middle', 'ATR', 'Std_Dev', 'Keltner_Middle', 'OBV', 'VWAP', 'CMF', 'AD', 'Pivot_S1', 'Pivot_R1', 'Fibonacci_50']]
        except Exception as e:
            logging.error(f"Attempt {attempt+1} failed for {ticker}: {str(e)}")
            if attempt < retries - 1:
                time.sleep(3)
            else:
                bse_ticker = ticker.replace('.NS', '.BO')
                logging.error(f"Retrying with BSE ticker {bse_ticker}")
                try:
                    stock_data = yf.download(bse_ticker, start=start_date, end=end_date, progress=False)
                    if stock_data.empty:
                        raise ValueError(f"No data returned for {bse_ticker}")
                    df = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
                    df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
                    df['EMA_50'] = ta.trend.EMAIndicator(df['Close'], window=50).ema_indicator()
                    df['SMA_200'] = ta.trend.SMAIndicator(df['Close'], window=200).sma_indicator()
                    df['MACD'] = ta.trend.MACD(df['Close']).macd_diff()
                    df['EMA_20'] = ta.trend.EMAIndicator(df['Close'], window=20).ema_indicator()
                    ichimoku = ta.trend.IchimokuIndicator(df['High'], df['Low'])
                    df['Ichimoku_Tenkan'] = ichimoku.ichimoku_conversion_line()
                    df['Ichimoku_Kijun'] = ichimoku.ichimoku_base_line()
                    df['Ichimoku_Senkou_A'] = ichimoku.ichimoku_a()
                    df['Ichimoku_Senkou_B'] = ichimoku.ichimoku_b()
                    df['Parabolic_SAR'] = ta.trend.PSARIndicator(df['High'], df['Low'], df['Close']).psar()
                    df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
                    stochastic = ta.momentum.StochasticOscillator(df['High'], df['Low'], df['Close'], window=14)
                    df['Stochastic_K'] = stochastic.stoch()
                    df['Stochastic_D'] = stochastic.stoch_signal()
                    df['Williams_R'] = ta.momentum.WilliamsRIndicator(df['High'], df['Low'], df['Close'], lbp=14).williams_r()
                    df['CCI'] = ta.trend.CCIIndicator(df['High'], df['Low'], df['Close'], window=20).cci()
                    df['ROC'] = ta.momentum.ROCIndicator(df['Close'], window=12).roc()
                    df['MOM'] = df['Close'].diff(10)  # Replaced MomentumIndicator
                    df['Ultimate_Oscillator'] = ta.momentum.UltimateOscillator(df['High'], df['Low'], df['Close']).ultimate_oscillator()
                    bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
                    df['BB_Middle'] = bb.bollinger_mavg()
                    df['ATR'] = ta.volatility.AverageTrueRange(df['High'], df['Low'], df['Close'], window=14).average_true_range()
                    df['Std_Dev'] = df['Close'].rolling(window=20).std()
                    keltner = ta.volatility.KeltnerChannel(df['High'], df['Low'], df['Close'], window=20)
                    df['Keltner_Middle'] = keltner.keltner_channel_mband()
                    df['OBV'] = ta.volume.OnBalanceVolumeIndicator(df['Close'], df['Volume']).on_balance_volume()
                    df['VWAP'] = (df['Close'] * df['Volume']).cumsum() / df['Volume'].cumsum()
                    df['CMF'] = ta.volume.ChaikinMoneyFlowIndicator(df['High'], df['Low'], df['Close'], df['Volume'], window=20).chaikin_money_flow()
                    df['AD'] = ta.volume.AccDistIndexIndicator(df['High'], df['Low'], df['Close'], df['Volume']).acc_dist_index()
                    df['Pivot_S1'] = df['Close'].shift(1) - (df['High'].shift(1) - df['Low'].shift(1))
                    df['Pivot_R1'] = df['Close'].shift(1) + (df['High'].shift(1) - df['Low'].shift(1))
                    high_52w = df['High'].rolling(window=252).max().shift(1)
                    low_52w = df['Low'].rolling(window=252).min().shift(1)
                    df['Fibonacci_50'] = low_52w + 0.5 * (high_52w - low_52w)
                    df = df.dropna()
                    return df[['Close', 'Open', 'High', 'Low', 'Volume', 'EMA_50', 'SMA_200', 'MACD', 'EMA_20',
                               'Ichimoku_Tenkan', 'Ichimoku_Kijun', 'Ichimoku_Senkou_A', 'Ichimoku_Senkou_B', 'Parabolic_SAR',
                               'RSI', 'Stochastic_K', 'Stochastic_D', 'Williams_R', 'CCI', 'ROC', 'MOM', 'Ultimate_Oscillator',
                               'BB_Middle', 'ATR', 'Std_Dev', 'Keltner_Middle', 'OBV', 'VWAP', 'CMF', 'AD', 'Pivot_S1', 'Pivot_R1', 'Fibonacci_50']]
                except Exception as e:
                    logging.error(f"BSE fallback failed for {bse_ticker}: {str(e)}")
                    return pd.DataFrame()
            time.sleep(2)

# Prepare data for LSTM
def prepare_data(data, look_back=100):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)
    
    X, y = [], []
    for i in range(look_back, len(scaled_data)):
        X.append(scaled_data[i-look_back:i])
        y.append(scaled_data[i, 0])  # Predict Close price
    
    X, y = np.array(X), np.array(y)
    return X, y, scaler

# Build LSTM model
def build_model(look_back, n_features):
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(look_back, n_features)))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Predict future prices
def predict_future_prices(model, scaler, last_sequence, start_date, end_date, look_back=100):
    current_date = start_date
    predictions = []
    dates = []
    day_names = []
    market_status = []
    
    last_sequence = last_sequence[-look_back:].reshape(1, look_back, -1)
    
    while current_date <= end_date:
        # Check if the current date is a trading day
        is_closed = is_market_closed(current_date)
        scaled_pred = model.predict(last_sequence, verbose=0) if not is_closed else None
        pred_price = scaler.inverse_transform(
            np.concatenate([scaled_pred, np.zeros((scaled_pred.shape[0], last_sequence.shape[2]-1))], axis=1)
        )[0, 0] if scaled_pred is not None else None
        
        dates.append(current_date)
        day_names.append(current_date.strftime('%A'))
        market_status.append('Closed' if is_closed else 'Open')
        predictions.append(pred_price if not is_closed else None)
        
        if not is_closed:
            new_row = np.zeros((1, last_sequence.shape[2]))
            new_row[0, 0] = scaled_pred[0, 0]
            last_sequence = np.roll(last_sequence, -1, axis=1)
            last_sequence[0, -1] = new_row
        
        current_date += timedelta(days=1)  # Iterate over all calendar days
    
    return pd.DataFrame({
        'Date': dates,
        'Day': day_names,
        'Predicted_Price': predictions,
        'Market_Status': market_status
    })

In [36]:
# Unified list of stocks (test set)
stocks = ['RELIANCE.BO', 'TCS.BO', 'HDFCBANK.BO', 'INFY.BO', 'ITC.BO']

# Define date ranges
start_date = '2015-04-21'
end_date = '2025-04-25'  # Past-only date
future_start = datetime(2025, 1, 1)
future_end = datetime(2025, 12, 31)
look_back = 100

# Create directories for outputs
os.makedirs('models', exist_ok=True)
os.makedirs('predictions', exist_ok=True)

failed_stocks = []

for ticker in stocks:
    print(f"Processing {ticker}...")
    # Fetch data with 27 indicators
    data = get_stock_data(ticker, start_date, end_date)
    if data.empty:
        print(f"No data for {ticker}")
        failed_stocks.append(ticker)
        continue

    # Prepare data for LSTM
    X, y, scaler = prepare_data(data, look_back)
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    # Build and train LSTM model
    model = build_model(look_back, X.shape[2])
    early_stopping = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=20, batch_size=32, callbacks=[early_stopping], verbose=1)

    # Save model and scaler
    with open(f'models/{ticker}_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    with open(f'models/{ticker}_scaler.pkl', 'wb') as f:
        pickle.dump(scaler, f)

    # Predict future prices for 2025
    last_sequence = scaler.transform(data.values)
    future_preds = predict_future_prices(model, scaler, last_sequence, future_start, future_end)
    future_preds.to_csv(f'predictions/{ticker}_2025_predictions.csv', index=False)

    # Plot actual vs. predicted prices for test set
    predictions = model.predict(X_test, verbose=0)
    predictions = scaler.inverse_transform(
        np.concatenate([predictions, np.zeros((predictions.shape[0], X_test.shape[2]-1))], axis=1)
    )[:, 0]
    y_test_scaled = scaler.inverse_transform(
        np.concatenate([y_test.reshape(-1, 1), np.zeros((y_test.shape[0], X_test.shape[2]-1))], axis=1)
    )[:, 0]

    plt.figure(figsize=(10, 5))
    plt.plot(data.index[-len(y_test_scaled):], y_test_scaled, label='Actual Prices')
    plt.plot(data.index[-len(predictions):], predictions, label='Predicted Prices')
    plt.title(f'{ticker} Stock Price Prediction')
    plt.xlabel('Date')
    plt.ylabel('Price (INR)')
    plt.legend()
    plt.savefig(f'predictions/{ticker}_plot.png')
    plt.close()

if failed_stocks:
    print(f"Failed to process {len(failed_stocks)} stocks: {failed_stocks}")
    print("Check download_errors.log for details.")

Processing RELIANCE.BO...
No data for RELIANCE.BO
Processing TCS.BO...
No data for TCS.BO
Processing HDFCBANK.BO...
No data for HDFCBANK.BO
Processing INFY.BO...
No data for INFY.BO
Processing ITC.BO...
No data for ITC.BO
Failed to process 5 stocks: ['RELIANCE.BO', 'TCS.BO', 'HDFCBANK.BO', 'INFY.BO', 'ITC.BO']
Check download_errors.log for details.


In [38]:
with open('download_errors.log', 'r') as f:
    print(f.read())

2025-04-26 10:10:23,418 - 
1 Failed download:
2025-04-26 10:10:23,418 - ['RELIANCE.BO']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
2025-04-26 10:10:23,418 - Attempt 1 failed for RELIANCE.BO: No data returned for RELIANCE.BO
2025-04-26 10:10:28,433 - 
1 Failed download:
2025-04-26 10:10:28,433 - ['RELIANCE.BO']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
2025-04-26 10:10:28,433 - Attempt 2 failed for RELIANCE.BO: No data returned for RELIANCE.BO
2025-04-26 10:10:33,448 - 
1 Failed download:
2025-04-26 10:10:33,449 - ['RELIANCE.BO']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
2025-04-26 10:10:33,449 - Attempt 3 failed for RELIANCE.BO: No data returned for RELIANCE.BO
2025-04-26 10:10:38,463 - 
1 Failed download:
2025-04-26 10:10:38,463 - ['RELIANCE.BO']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
2025-04-26 10:10:38,463 - Attempt 4 failed for RELIANCE.BO: No data returned for RELIANCE.BO
2025-04-26 10:10:43,477 - 
1

In [42]:
import yfinance as yf
stock_data = yf.download('RELIANCE.BO', start='2024-01-01', end='2025-04-25', progress=False)
print(stock_data.head())

Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume]
Index: []
