<a href="https://colab.research.google.com/github/RubhavSaini/G_colab/blob/main/Prj_stockmrk_pred.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance scikit-learn xgboost tensorflow joblib



In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [None]:
import ta

In [None]:
def create_sequences(data, time_step=60):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:i + time_step])
        y.append(data[i + time_step][0])
    return np.array(X), np.array(y)

In [None]:
def evaluate_model(name, y_true, y_pred):
  non_zero_idx = y_true != 0
    safe_y_true = y_true[non_zero_idx]
    safe_y_pred = y_pred[non_zero_idx]

    mape = np.mean(np.abs((safe_y_true - safe_y_pred) / safe_y_true)) * 100

    return {
        "model": model_name,
        "MAE": mean_absolute_error(y_true, y_pred),
        "MSE": mean_squared_error(y_true, y_pred),
        "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
        "R2": r2_score(y_true, y_pred),
        "MAPE": mape
    }


In [None]:
def train_and_save_all_models(ticker):
    df = yf.download(ticker, start="2015-01-01", end="2025-06-30")
    df = df[['Open', 'High', 'Low', 'Close', 'Volume']].dropna()

   # Calculate indicators
    close_series = df['Close'].squeeze()
    df['RSI'] = ta.momentum.RSIIndicator(close=close_series, window=14).rsi()
    df['SMA'] = ta.trend.SMAIndicator(close=close_series, window=20).sma_indicator()
    df['EMA'] = ta.trend.EMAIndicator(close=close_series, window=20).ema_indicator()
    macd = ta.trend.MACD(close=close_series)
    df['MACD'] = macd.macd()
    df['MACD_signal'] = macd.macd_signal()
    df['MACD_diff'] = macd.macd_diff()
    df.dropna(inplace=True)

   # Select the 7 features
    feature_cols = ['Close', 'RSI', 'SMA', 'EMA', 'MACD', 'MACD_signal', 'MACD_diff']
    df = df[feature_cols]

  #  # Scale and reshape
  #   scaler = MinMaxScaler()
  #   scaled = scaler.fit_transform(df)

  #   X, y = create_sequences(scaled)
  #   X = X.reshape(X.shape[0], X.shape[1], len(feature_cols))

  #   train_size = int(len(X)*0.8)
  #   X_train, X_test = X[:train_size], X[train_size:]
  #   y_train, y_test = y[:train_size], y[train_size:]

  #   X_flat_train = X_train.reshape(X_train.shape[0], -1)
  #   X_flat_test = X_test.reshape(X_test.shape[0], -1)

  #   results = []
  #   early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

  #   # GRU
  #   model_gru = Sequential([
  #   GRU(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),

  #   Dropout(0.2), GRU(50), Dropout(0.2), Dense(1)])
  #   model_gru.compile(optimizer='adam', loss='mean_squared_error')
  #   model_gru.summary()
  #   model_gru.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stop], verbose=0)
  #   y_pred_gru = scaler.inverse_transform(model_gru.predict(X_test))
  #   y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))
  #   results.append({"ticker": ticker, **evaluate_model("GRU", y_test_rescaled, y_pred_gru)})
  #   model_gru.save(f"{ticker.split('.')[0].lower()}_gru.h5")
  #   # model_gru.save(f"{ticker.split('.')[0].lower()}_gru.h5")

  #   # LSTM
  #   model_lstm = Sequential([
  #   LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
  #   Dropout(0.2), LSTM(50), Dropout(0.2), Dense(1)])
  #   model_lstm.compile(optimizer='adam', loss='mean_squared_error')
  #   model_lstm.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stop], verbose=0)
  #   y_pred_lstm = scaler.inverse_transform(model_lstm.predict(X_test))
  #   results.append({"ticker": ticker, **evaluate_model("LSTM", y_test_rescaled, y_pred_lstm)})

  #   # BiLSTM
  #   model_bilstm = Sequential([
  #   Bidirectional(LSTM(50, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])),
  #   Dropout(0.2), Bidirectional(LSTM(50)), Dropout(0.2), Dense(1)])
  #   model_bilstm.compile(optimizer='adam', loss='mean_squared_error')
  #   model_bilstm.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stop], verbose=0)
  #   y_pred_bilstm = scaler.inverse_transform(model_bilstm.predict(X_test))
  #   results.append({"ticker": ticker, **evaluate_model("BiLSTM", y_test_rescaled, y_pred_bilstm)})

  #   # Random Forest
  #   rf = RandomForestRegressor()
  #   rf.fit(X_flat_train, y_train)
  #   y_pred_rf = scaler.inverse_transform(rf.predict(X_flat_test).reshape(-1, 1))
  #   results.append({"ticker": ticker, **evaluate_model("Random Forest", y_test_rescaled, y_pred_rf)})

  #   # XGBoost
  #   xgb = XGBRegressor()
  #   xgb.fit(X_flat_train, y_train)
  #   y_pred_xgb = scaler.inverse_transform(xgb.predict(X_flat_test).reshape(-1, 1))
  #   results.append({"ticker": ticker, **evaluate_model("XGBoost", y_test_rescaled, y_pred_xgb)})

  #   return results


    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df)
    sequence_length = 60
    X, y = [], []
    for i in range(sequence_length, len(scaled_data)):
        X.append(scaled_data[i-sequence_length:i])
        y.append(scaled_data[i, 0])  # Predict 'Close'

    X = np.array(X)
    y = np.array(y)

    # Train-test split
    split = int(0.8 * len(X))
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]

    # Flatten for RF and XGBoost
    X_flat_train = X_train.reshape(X_train.shape[0], -1)
    X_flat_test = X_test.reshape(X_test.shape[0], -1)

    # Rescale y_test for metrics
    y_test_rescaled = scaler.inverse_transform(
        np.concatenate([np.zeros((len(y_test), df.shape[1]-1)), y_test.reshape(-1, 1)], axis=1)
    )[:, -1]

    results = []
    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # GRU
    model_gru = Sequential([
        GRU(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(0.2),
        GRU(50),
        Dropout(0.2),
        Dense(1)
    ])
    model_gru.compile(optimizer='adam', loss='mean_squared_error')
    model_gru.fit(X_train, y_train, epochs=20, batch_size=32,
                  validation_data=(X_test, y_test), callbacks=[early_stop], verbose=0)
    y_pred_gru = model_gru.predict(X_test)
    y_pred_gru = scaler.inverse_transform(
        np.concatenate([np.zeros((len(y_pred_gru), df.shape[1]-1)), y_pred_gru], axis=1)
    )[:, -1]
    results.append({"ticker": ticker, **evaluate_model("GRU", y_test_rescaled, y_pred_gru)})
    model_gru.save(f"{ticker.split('.')[0].lower()}_gru.h5")

    # LSTM
    model_lstm = Sequential([
        LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(0.2),
        LSTM(50),
        Dropout(0.2),
        Dense(1)
    ])
    model_lstm.compile(optimizer='adam', loss='mean_squared_error')
    model_lstm.fit(X_train, y_train, epochs=20, batch_size=32,
                   validation_data=(X_test, y_test), callbacks=[early_stop], verbose=0)
    y_pred_lstm = model_lstm.predict(X_test)
    y_pred_lstm = scaler.inverse_transform(
        np.concatenate([np.zeros((len(y_pred_lstm), df.shape[1]-1)), y_pred_lstm], axis=1)
    )[:, -1]
    results.append({"ticker": ticker, **evaluate_model("LSTM", y_test_rescaled, y_pred_lstm)})
    # model_lstm.save(f"{ticker.split('.')[0].lower()}_lstm.h5")

    # BiLSTM
    model_bilstm = Sequential([
        Bidirectional(LSTM(50, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(0.2),
        Bidirectional(LSTM(50)),
        Dropout(0.2),
        Dense(1)
    ])
    model_bilstm.compile(optimizer='adam', loss='mean_squared_error')
    model_bilstm.fit(X_train, y_train, epochs=20, batch_size=32,
                     validation_data=(X_test, y_test), callbacks=[early_stop], verbose=0)
    y_pred_bilstm = model_bilstm.predict(X_test)
    y_pred_bilstm = scaler.inverse_transform(
        np.concatenate([np.zeros((len(y_pred_bilstm), df.shape[1]-1)), y_pred_bilstm], axis=1)
    )[:, -1]
    results.append({"ticker": ticker, **evaluate_model("BiLSTM", y_test_rescaled, y_pred_bilstm)})
    # model_bilstm.save(f"{ticker.split('.')[0].lower()}_bilstm.h5")

    # Random Forest
    rf = RandomForestRegressor()
    rf.fit(X_flat_train, y_train)
    y_pred_rf = rf.predict(X_flat_test)
    y_pred_rf = scaler.inverse_transform(
        np.concatenate([np.zeros((len(y_pred_rf), df.shape[1]-1)), y_pred_rf.reshape(-1, 1)], axis=1)
    )[:, -1]
    results.append({"ticker": ticker, **evaluate_model("Random Forest", y_test_rescaled, y_pred_rf)})

    # XGBoost
    xgb = XGBRegressor()
    xgb.fit(X_flat_train, y_train)
    y_pred_xgb = xgb.predict(X_flat_test)
    y_pred_xgb = scaler.inverse_transform(
        np.concatenate([np.zeros((len(y_pred_xgb), df.shape[1]-1)), y_pred_xgb.reshape(-1, 1)], axis=1)
    )[:, -1]
    results.append({"ticker": ticker, **evaluate_model("XGBoost", y_test_rescaled, y_pred_xgb)})

    return results

In [None]:
# def train_and_save_all_models(ticker):
#     df = yf.download(ticker, start="2015-01-01", end="2025-6-30")
#     df = df[['Close']].dropna()
#     scaler = MinMaxScaler()
#     scaled = scaler.fit_transform(df)

#     X, y = create_sequences(scaled)
#     X = X.reshape(X.shape[0], X.shape[1], 1)
#     train_size = int(len(X)*0.8)
#     X_train, X_test = X[:train_size], X[train_size:]
#     y_train, y_test = y[:train_size], y[train_size:]

#     X_flat_train = X_train.reshape(X_train.shape[0], -1)
#     X_flat_test = X_test.reshape(X_test.shape[0], -1)

#     results = []

#     # GRU
#     model_gru = Sequential([
#     GRU(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
#     Dropout(0.2), GRU(50), Dropout(0.2), Dense(1)])
#     model_gru.compile(optimizer='adam', loss='mean_squared_error')
#     model_gru.summary()
#     model_gru.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=0)
#     y_pred_gru = scaler.inverse_transform(model_gru.predict(X_test))
#     y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))
#     results.append({"ticker": ticker, **evaluate_model("GRU", y_test_rescaled, y_pred_gru)})
#     model_gru.save(f"{ticker.split('.')[0].lower()}_gru.h5")
#  # LSTM
#     model_lstm = Sequential([
#     LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
#     Dropout(0.2), LSTM(50), Dropout(0.2), Dense(1)])
#     model_lstm.compile(optimizer='adam', loss='mean_squared_error')
#     model_lstm.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=0)
#     y_pred_lstm = scaler.inverse_transform(model_lstm.predict(X_test))
#     results.append({"ticker": ticker, **evaluate_model("LSTM", y_test_rescaled, y_pred_lstm)})

#     # BiLSTM
#     model_bilstm = Sequential([
#     Bidirectional(LSTM(50, return_sequences=True), input_shape=(X_train.shape[1], 1)),
#     Dropout(0.2), Bidirectional(LSTM(50)), Dropout(0.2), Dense(1)])
#     model_bilstm.compile(optimizer='adam', loss='mean_squared_error')
#     model_bilstm.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=0)
#     y_pred_bilstm = scaler.inverse_transform(model_bilstm.predict(X_test))
#     results.append({"ticker": ticker, **evaluate_model("BiLSTM", y_test_rescaled, y_pred_bilstm)})

#     # Random Forest
#     rf = RandomForestRegressor()
#     rf.fit(X_flat_train, y_train)
#     y_pred_rf = scaler.inverse_transform(rf.predict(X_flat_test).reshape(-1, 1))
#     results.append({"ticker": ticker, **evaluate_model("Random Forest", y_test_rescaled, y_pred_rf)})

#     # XGBoost
#     xgb = XGBRegressor()
#     xgb.fit(X_flat_train, y_train)
#     y_pred_xgb = scaler.inverse_transform(xgb.predict(X_flat_test).reshape(-1, 1))
#     results.append({"ticker": ticker, **evaluate_model("XGBoost", y_test_rescaled, y_pred_xgb)})

#     return results


In [None]:
stocks = ["RELIANCE.NS", "TCS.NS", "ADANIPORTS.NS", "INFY.NS", "ITC.NS"]
all_results = []
for stock in stocks:
    all_results += train_and_save_all_models(stock)

df = pd.DataFrame(all_results)
df.to_csv("all_model_results.csv", index=False)

  df = yf.download(ticker, start="2015-01-01", end="2025-06-30")
[*********************100%***********************]  1 of 1 completed
  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 63ms/step


  df = yf.download(ticker, start="2015-01-01", end="2025-06-30")
[*********************100%***********************]  1 of 1 completed
  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 65ms/step


  df = yf.download(ticker, start="2015-01-01", end="2025-06-30")
[*********************100%***********************]  1 of 1 completed
  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 64ms/step


  df = yf.download(ticker, start="2015-01-01", end="2025-06-30")
[*********************100%***********************]  1 of 1 completed
  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 63ms/step


  df = yf.download(ticker, start="2015-01-01", end="2025-06-30")
[*********************100%***********************]  1 of 1 completed
  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 63ms/step


In [None]:
from google.colab import files

In [None]:
import zipfile

with zipfile.ZipFile("models.zip", "w") as zipf:
    for ticker in stocks:
        zipf.write(f"{ticker.split('.')[0].lower()}_gru.h5")
    zipf.write("all_model_results.csv")

In [None]:
# zipf.open("models.zip")

ValueError: Attempt to use ZIP archive that was already closed

In [None]:
files.download("models.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [1]:
all_model_results.csv

NameError: name 'all_model_results' is not defined