<a href="https://colab.research.google.com/github/Dimple626/21Projects-ML-DL-GenAI-/blob/main/Project_13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ashishjangra27/nifty-50-25-yrs-data")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/ashishjangra27/nifty-50-25-yrs-data?dataset_version_number=1...


100%|██████████| 90.9k/90.9k [00:00<00:00, 408kB/s]

Extracting files...
Path to dataset files: /root/.cache/kagglehub/datasets/ashishjangra27/nifty-50-25-yrs-data/versions/1





In [3]:
import os
print(os.listdir(path))


['data.csv']


In [4]:
import os

folder = "/root/.cache/kagglehub/datasets/ashishjangra27/nifty-50-25-yrs-data/versions/1"
print(os.listdir(folder))


['data.csv']


In [5]:
import os

print("Path:", path)  # Print actual download path
print("Files:", os.listdir(path))  # List all files in this folder


Path: /root/.cache/kagglehub/datasets/ashishjangra27/nifty-50-25-yrs-data/versions/1
Files: ['data.csv']


In [6]:
import pandas as pd

df = pd.read_csv("/root/.cache/kagglehub/datasets/ashishjangra27/nifty-50-25-yrs-data/versions/1/data.csv")
print(df.head())




         Date     Open     High      Low   Close
0  2000-01-03  1482.15  1592.90  1482.15  1592.2
1  2000-01-04  1594.40  1641.95  1594.40  1638.7
2  2000-01-05  1634.55  1635.50  1555.05  1595.8
3  2000-01-06  1595.80  1639.00  1595.80  1617.6
4  2000-01-07  1616.60  1628.25  1597.20  1613.3


In [7]:
# --- 1. IMPORTS ---
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
import joblib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, GRU, LSTM, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf

# --- 2. LOAD DATA ---
df = pd.read_csv("/root/.cache/kagglehub/datasets/ashishjangra27/nifty-50-25-yrs-data/versions/1/data.csv")
df['Date'] = pd.to_datetime(df['Date'])
df.sort_values('Date', inplace=True)
data = df['High'].values.reshape(-1,1)

# --- 3. FEATURE SCALING ---
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

# --- 4. DATA PREP FUNCTION ---
def create_data(data, window):
    X, y = [], []
    for i in range(window, len(data)):
        X.append(data[i-window:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

# --- 5. MODELING FUNCTIONS ---

def get_knn_results(X_train, y_train, X_test, y_test):
    model = KNeighborsRegressor(n_neighbors=5)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    return mae, rmse, model


def get_dl_results(model_fn, X_train, y_train, X_test, y_test, epochs):
    # Reshape for RNN input
    X_train_rnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test_rnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    model = model_fn(X_train.shape[1])
    model.compile(optimizer='adam', loss='mse')
    early_stop = EarlyStopping(monitor='val_loss', patience=5)
    model.fit(X_train_rnn, y_train, epochs=epochs, batch_size=32, validation_split=0.1, callbacks=[early_stop], verbose=0)
    y_pred = model.predict(X_test_rnn, verbose=0)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))  # THIS IS THE FIX
    return mae, rmse, model




def get_rnn(input_shape):
    model = Sequential([SimpleRNN(32, input_shape=(input_shape,1)), Dense(1)])
    return model

def get_gru(input_shape):
    model = Sequential([GRU(32, input_shape=(input_shape,1)), Dense(1)])
    return model

def get_lstm(input_shape):
    model = Sequential([LSTM(32, input_shape=(input_shape,1)), Dense(1)])
    return model

def get_bilstm(input_shape):
    model = Sequential([Bidirectional(LSTM(32), input_shape=(input_shape,1)), Dense(1)])
    return model

# --- 6. MAIN LOOP ---
results = []
trained_models = {}

windows = [30, 60, 90]
epochs = 50

for win in windows:
    print(f'Window: {win}')
    X, y = create_data(data, win)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # --- KNN ---
    mae, rmse, model = get_knn_results(X_train, y_train, X_test, y_test)
    results.append({'Model':f'KNN-{win}','MAE':mae,'RMSE':rmse})
    trained_models[f'KNN-{win}'] = model

    # --- RNN ---
    mae, rmse, model = get_dl_results(get_rnn, X_train, y_train, X_test, y_test, epochs)
    results.append({'Model':f'RNN-{win}','MAE':mae,'RMSE':rmse})
    trained_models[f'RNN-{win}'] = model

    # --- GRU ---
    mae, rmse, model = get_dl_results(get_gru, X_train, y_train, X_test, y_test, epochs)
    results.append({'Model':f'GRU-{win}','MAE':mae,'RMSE':rmse})
    trained_models[f'GRU-{win}'] = model

    # --- LSTM ---
    mae, rmse, model = get_dl_results(get_lstm, X_train, y_train, X_test, y_test, epochs)
    results.append({'Model':f'LSTM-{win}','MAE':mae,'RMSE':rmse})
    trained_models[f'LSTM-{win}'] = model

    # --- Bidirectional LSTM ---
    mae, rmse, model = get_dl_results(get_bilstm, X_train, y_train, X_test, y_test, epochs)
    results.append({'Model':f'BiLSTM-{win}','MAE':mae,'RMSE':rmse})
    trained_models[f'BiLSTM-{win}'] = model

# --- 7. SAVE RESULTS ---
results_df = pd.DataFrame(results)
results_df.to_csv('models.csv', index=False)  # Metrics Table

joblib.dump(trained_models, 'trainedmodels.joblib')   # Trained models

# --- 8. DISPLAY RESULTS ---
print(results_df)


Window: 30


  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


Window: 60


  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


Window: 90


  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)
  super().__init__(**kwargs)


        Model       MAE      RMSE
0      KNN-30  0.249845  0.291081
1      RNN-30  0.029698  0.036268
2      GRU-30  0.008146  0.009792
3     LSTM-30  0.010234  0.012157
4   BiLSTM-30  0.006288  0.008119
5      KNN-60  0.254196  0.293988
6      RNN-60  0.017162  0.023953
7      GRU-60  0.005363  0.007117
8     LSTM-60  0.009536  0.011869
9   BiLSTM-60  0.007847  0.009962
10     KNN-90  0.270985  0.310210
11     RNN-90  0.020650  0.029075
12     GRU-90  0.019792  0.023216
13    LSTM-90  0.014204  0.016416
14  BiLSTM-90  0.023322  0.025913
