In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import talib
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
import yfinance as yf
import talib
import numpy as np

# Download necessary NLTK data
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\pande\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [2]:

def get_stock_data(ticker):
    # Fetch all available historical data
    data = yf.download(ticker, progress=False)
    
    # Ensure the Close column is a 1D array before passing to TA-Lib
    close_prices = data['Close'].astype(float).values.flatten()

    data['RSI'] = talib.RSI(close_prices, timeperiod=14)
    data['MA_10'] = talib.SMA(close_prices, timeperiod=10)
    data['MA_30'] = talib.SMA(close_prices, timeperiod=30)
    data['MA_50'] = talib.SMA(close_prices, timeperiod=50)
    data['MA_200'] = talib.SMA(close_prices, timeperiod=200)

    upper, middle, lower = talib.BBANDS(close_prices, timeperiod=20)
    data['Upper_Band'] = upper
    data['Lower_Band'] = lower

    data.dropna(inplace=True)
    return data

# Example Usage
 # Display the first few rows


In [3]:
def preprocess_lstm_data(df, columns=['Close'], time_step=60):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df[columns])
    X, y = [], []
    for i in range(time_step, len(scaled_data)):
        X.append(scaled_data[i-time_step:i])
        y.append(scaled_data[i, 0])
    return np.array(X), np.array(y), scaler

# Build LSTM Model with Optimizations
def build_stacked_lstm_model(input_shape):
    model = Sequential([
        LSTM(128, return_sequences=True, input_shape=input_shape),
        BatchNormalization(),
        Dropout(0.2),
        LSTM(64, return_sequences=True),
        BatchNormalization(),
        Dropout(0.2),
        LSTM(32),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [4]:
def train_lstm_model(X_train, y_train,X_test, y_test ,model_name,epoch):
    model = build_stacked_lstm_model((X_train.shape[1], X_train.shape[2]))
    lr_scheduler = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, min_lr=3.1250e-05, verbose=1)
    history = model.fit(X_train, y_train, epochs=epoch, batch_size=32,validation_data=(X_test, y_test), verbose=1, callbacks=[lr_scheduler])
    model.save(f"preduction_forcaste.keras")
    plt.figure(figsize=(10,5))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()
    return model

# Sentiment Analysis
def get_sentiment_score(text):
    sia = SentimentIntensityAnalyzer()
    score = sia.polarity_scores(text)['compound']
    return score

In [5]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def forecast_ensemble(ticker, forecast_days=30, epoch=50):
    df = get_stock_data(ticker)
    time_step = 60
    
    # Ensure the correct columns are selected for feature engineering (Only 'Close' for indices)
    feature_columns = ['Close']
    
    # Preprocess data
    X, y, scaler = preprocess_lstm_data(df, columns=feature_columns, time_step=time_step)
    X_train, y_train = X[:-forecast_days], y[:-forecast_days]
    X_test, y_test = X[-forecast_days:], y[-forecast_days:]

    try:
        model = load_model(f"preduction_forcaste.keras")
    except:
        model = train_lstm_model(X_train, y_train, X_test, y_test, ticker, epoch)

    predictions = model.predict(X_test)

    # Create an empty array for inverse transform with correct shape
    dummy_array = np.zeros((predictions.shape[0], len(feature_columns)))  # (30, 1)
    dummy_array[:, 0] = predictions[:, 0]  # Fill only the Close column

    # Inverse transform using the full shape
    predictions = scaler.inverse_transform(dummy_array)[:, 0]  # Extract only the Close column

    # Inverse transform actual y values
    y_actual = np.zeros((y_test.shape[0], len(feature_columns)))
    y_actual[:, 0] = y_test  # Fill only the Close column
    y_actual = scaler.inverse_transform(y_actual)[:, 0]  # Extract only Close column

    # Sentiment Adjustment
    headline = f"{ticker} stock market update"
    sentiment_score = get_sentiment_score(headline)
    sentiment_adjustment = 1 + (sentiment_score * 0.03)
    adjusted_preds = predictions * sentiment_adjustment

    # Calculate Evaluation Metrics
    rmse = np.sqrt(mean_squared_error(y_actual, adjusted_preds))
    mae = mean_absolute_error(y_actual, adjusted_preds)
    mape = np.mean(np.abs((y_actual - adjusted_preds) / y_actual)) * 100
    r2 = r2_score(y_actual, adjusted_preds)

    print("RMSE:", rmse)
    print("MAE:", mae)
    print("MAPE:", mape)
    print("R^2 Score:", r2)

    # Plot Results
    last_dates = df.index[-forecast_days:]
    plt.figure(figsize=(12, 6))
    plt.plot(last_dates, y_actual, label='Actual')
    
    plt.plot(last_dates, adjusted_preds, label='Ensemble Forecast (Sentiment)', linestyle='--')
    plt.title(f"{ticker} - Enhanced Ensemble Forecast ({forecast_days} days), {epoch} Epochs")
    plt.xlabel("Date")
    plt.ylabel("Price")
    plt.legend()
    plt.grid(True)
    plt.show()


In [6]:
df = get_stock_data("RELIANCE.NS")  # Replace with your stock ticker
df 

YF.download() has changed argument auto_adjust default to True


Price,Close,High,Low,Open,Volume,RSI,MA_10,MA_30,MA_50,MA_200,Upper_Band,Lower_Band
Ticker,RELIANCE.NS,RELIANCE.NS,RELIANCE.NS,RELIANCE.NS,RELIANCE.NS,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1996-10-04,3.448852,3.476885,3.368373,3.449757,180091079,39.823197,3.476161,3.634709,3.682388,3.700528,3.757615,3.347981
1996-10-07,3.381938,3.456992,3.264384,3.456992,272128046,36.037561,3.462959,3.621296,3.673273,3.700440,3.731014,3.337778
1996-10-08,3.396405,3.436192,3.283372,3.365660,196707309,37.422563,3.448491,3.609389,3.666472,3.700557,3.704961,3.331820
1996-10-09,3.440715,3.472364,3.386459,3.436193,175357589,41.594181,3.435922,3.595464,3.657954,3.700767,3.681073,3.332107
1996-10-10,3.532949,3.544704,3.414491,3.430768,194458201,49.187508,3.438182,3.585818,3.652692,3.701596,3.663105,3.338409
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-28,1275.099976,1295.750000,1269.000000,1280.000000,18147129,57.700779,1268.424988,1234.701668,1245.083999,1360.224517,1319.400084,1164.914918
2025-04-01,1252.599976,1277.900024,1249.300049,1264.599976,12099648,50.443446,1269.799988,1235.918335,1244.088999,1359.195640,1319.661711,1169.903291
2025-04-02,1251.150024,1255.550049,1243.900024,1247.550049,10142590,50.006949,1271.029993,1237.048336,1243.003000,1358.149921,1315.633761,1181.921244
2025-04-03,1248.699951,1251.800049,1233.050049,1233.050049,7434366,49.231717,1271.184985,1237.841667,1242.503000,1357.030660,1306.828568,1199.406430


In [None]:
forecast_ensemble("RELIANCE.NS",epoch=250)

  super().__init__(**kwargs)


Epoch 1/250
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 177ms/step - loss: 0.0313 - val_loss: 0.2347 - learning_rate: 0.0010
Epoch 2/250
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 152ms/step - loss: 0.0039 - val_loss: 0.1131 - learning_rate: 0.0010
Epoch 3/250
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 155ms/step - loss: 0.0020 - val_loss: 0.0096 - learning_rate: 0.0010
Epoch 4/250
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 150ms/step - loss: 0.0015 - val_loss: 4.8885e-04 - learning_rate: 0.0010
Epoch 5/250
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 153ms/step - loss: 0.0010 - val_loss: 0.0012 - learning_rate: 0.0010
Epoch 6/250
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 151ms/step - loss: 0.0011 - val_loss: 4.6898e-04 - learning_rate: 0.0010
Epoch 7/250
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 144ms/step - loss: 8.843

In [None]:
from tensorflow.keras.models import load_model
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import holidays
from datetime import datetime, timedelta

# Load your data
df = pd.read_csv(r'C:\GIT REPOS\Final-Sem-Project\DATA\nse_stock_data\RELIANCE.CSV', skiprows=2,
                 names=['Date', 'Close', 'High', 'Low', 'Open', 'Volume'])

# Initialize and fit the scaler on the last known data
scaler = MinMaxScaler(feature_range=(0, 1))
last_known_data = df[['Close']].values
scaler.fit(last_known_data)  # Fit the scaler with the data

# Get Indian holidays
indian_holidays = holidays.India()

def get_next_trading_day(current_date):
    next_day = current_date + timedelta(days=1)
    while next_day.weekday() >= 5 or next_day in indian_holidays:
        next_day += timedelta(days=1)
    return next_day

def forecast_beyond_data(model, scaler, last_known_data, steps=30):
    predictions = []
    current_data = last_known_data[-60:].reshape(1, 60, 1)
    current_date = pd.to_datetime(df.index[-1])

    for _ in range(steps):
        next_day = get_next_trading_day(current_date)
        predicted_value = model.predict(current_data)[0, 0]
        # Inverse transform using the fitted scaler
        predicted_value = scaler.inverse_transform([[predicted_value]])[0, 0]
        predictions.append((next_day.strftime('%Y-%m-%d'), predicted_value))

        # Update the input for the next prediction
        scaled_prediction = scaler.transform(np.array([[predicted_value]]).reshape(1, -1))
        current_data = np.append(current_data[:, 1:, :], [[scaled_prediction]], axis=1)
        current_date = next_day

    forecast_df = pd.DataFrame(predictions, columns=['Date', 'Forecasted_Close'])
    return forecast_df

# Example Usage
model = load_model("preduction_forcaste.keras")
forecast_df = forecast_beyond_data(model, scaler, last_known_data)
print(forecast_df)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step


ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 3 dimension(s) and the array at index 1 has 4 dimension(s)