In [1]:
import os

In [2]:
raw_data_folder = r"X:\AGDrive\ADA-Timeseries project\Raw_Data"
predicted_data_folder = r"X:\AGDrive\ADA-Timeseries project\Pred_Data"
lagging_data_folder = r"X:\AGDrive\ADA-Timeseries project\Lag_Data"

# --- Top 10 NSE Indices (Yahoo Finance symbols) ---
nse_indices = [
    "^NSEI",          # Nifty 50
    "^NSEBANK",       # Nifty Bank
    "^CNXIT",         # Nifty IT
    "^CNXPHARMA",     # Nifty Pharma
    "^CNXFMCG",       # Nifty FMCG
    "^CNXAUTO",       # Nifty Auto
    "^CNXMETAL",      # Nifty Metal
    "^CNXREALTY",     # Nifty Realty
    "^CNXENERGY",     # Nifty Energy
    "NIFTY_FIN_SERVICE.NS"     # Nifty Financial Services
]

# --- Top 10 NSE Stocks (as per popularity / large-cap) ---
nse_stocks = [
    "RELIANCE.NS",
    "TCS.NS",
    "SUNPHARMA.NS",
    "ICICIBANK.NS",
    "INFY.NS",
    "SBIN.NS",
    "BHARTIARTL.NS",
    "ITC.NS",
    "LT.NS",
    "HINDUNILVR.NS"
]

# --- Combine all tickers ---"SUNPHARMA.NS": "SUNPHARMA",
all_tickers = nse_indices + nse_stocks

In [3]:
from update_data_util import fetch_and_update_stock_data, update_predictions_file
# --- Fetch and update data for all tickers ---
for ticker in all_tickers:
    print(f"\n{'='*80}")
    print(f"üìä Processing: {ticker}")
    try:
        fetch_and_update_stock_data(ticker_symbol=ticker, data_dir=raw_data_folder)
    except Exception as e:
        print(f"‚ùå Error fetching {ticker}: {e}")


üìä Processing: ^NSEI
üìà Updating existing data for NIFTY_50...
Last record in file is from: 2025-11-12 10:15:00+05:30
‚úÖ Data updated from 2025-11-12 11:15:00+05:30 to 2025-11-17 15:15:00+05:30
üìä Added 26 new records

üìä Processing: ^NSEBANK
üìà Updating existing data for NIFTY_BANK...
Last record in file is from: 2025-11-12 10:15:00+05:30
‚úÖ Data updated from 2025-11-12 11:15:00+05:30 to 2025-11-17 15:15:00+05:30
üìä Added 26 new records

üìä Processing: ^CNXIT
üìà Updating existing data for NIFTY_IT...
Last record in file is from: 2025-11-12 10:15:00+05:30
‚úÖ Data updated from 2025-11-12 11:15:00+05:30 to 2025-11-17 15:15:00+05:30
üìä Added 26 new records

üìä Processing: ^CNXPHARMA
üìà Updating existing data for NIFTY_PHARMA...
Last record in file is from: 2025-11-12 10:15:00+05:30
‚úÖ Data updated from 2025-11-12 11:15:00+05:30 to 2025-11-17 15:15:00+05:30
üìä Added 26 new records

üìä Processing: ^CNXFMCG
üìà Updating existing data for NIFTY_FMCG...
Last rec

In [6]:
ticker_to_basename = {
    # Indices
    "^NSEI": "NIFTY_50",
    "^NSEBANK": "NIFTY_BANK",
    "^CNXIT": "NIFTY_IT",
    "^CNXPHARMA": "NIFTY_PHARMA",
    "^CNXFMCG": "NIFTY_FMCG",
    "^CNXAUTO": "NIFTY_AUTO",
    "^CNXMETAL": "NIFTY_METAL",
    "^CNXREALTY": "NIFTY_REALTY",
    "^CNXENERGY": "NIFTY_ENERGY",
    "NIFTY_FIN_SERVICE.NS": "NIFTY_FIN_SERVICE",
    
    # Stocks
    "RELIANCE.NS": "RELIANCE_INDUSTRIES_LTD",
    "TCS.NS": "TATA_CONSULTANCY_SERV_LT", # Matched from your image
    "SUNPHARMA.NS": "SUN_PHARMACEUTICAL_IND_L",
    "ICICIBANK.NS": "ICICI_BANK_LTD.",
    "INFY.NS": "INFOSYS_LIMITED",
    "SBIN.NS": "STATE_BANK_OF_INDIA",
    "BHARTIARTL.NS": "BHARTI_AIRTEL_LIMITED",
    "ITC.NS": "ITC_LTD",
    "LT.NS": "LARSEN_&_TOUBRO_LTD.",
    "HINDUNILVR.NS": "HINDUSTAN_UNILEVER_LTD."
}

all_tickers = list(ticker_to_basename.keys())

# -----------------  XGBoost ---------------------

In [7]:
print(f"\n{'='*80}")
print(f"üîÆ Generating predictions for all equities...")

for ticker, basename in ticker_to_basename.items():
    print(f"---> Processing {basename}")
    
    try:
        # 1. Construct the path to the raw data file (Input)
        raw_file_name = f"{basename}_data.csv"
        raw_file_path = os.path.join(raw_data_folder, raw_file_name)
        
        # 2. Construct the path for the new prediction file (Output)
        # Follows your "Equity+prediction+xgboost" format
        pred_file_name = f"{basename}_predictions_xgboost.csv"
        pred_file_path = os.path.join(predicted_data_folder, pred_file_name)
        
        # 3. Call the function (as you assumed)
        update_predictions_file(raw_file_path, pred_file_path)
        print(f"    ‚úÖ Successfully updated: {pred_file_name}")
    
    except Exception as e:
        print(f"    ‚ùå Error processing prediction for {basename}: {e}")

print("\nAll processing complete.")


üîÆ Generating predictions for all equities...
---> Processing NIFTY_50
Appended 2 new entries to X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_50_predictions_xgboost.csv
    ‚úÖ Successfully updated: NIFTY_50_predictions_xgboost.csv
---> Processing NIFTY_BANK
Appended 2 new entries to X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_BANK_predictions_xgboost.csv
    ‚úÖ Successfully updated: NIFTY_BANK_predictions_xgboost.csv
---> Processing NIFTY_IT
Appended 2 new entries to X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_IT_predictions_xgboost.csv
    ‚úÖ Successfully updated: NIFTY_IT_predictions_xgboost.csv
---> Processing NIFTY_PHARMA
Appended 2 new entries to X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_PHARMA_predictions_xgboost.csv
    ‚úÖ Successfully updated: NIFTY_PHARMA_predictions_xgboost.csv
---> Processing NIFTY_FMCG
Appended 2 new entries to X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_FMCG_predictions_xgboost.csv
    ‚úÖ Successfully updated: NIFTY_F

# ---------------- Random Forest ------------------------

In [None]:
# print(f"\n{'='*80}")
# print(f"üîÆ Generating predictions for all equities...")

# for ticker, basename in ticker_to_basename.items():
#     print(f"---> Processing {basename}")
    
#     try:
#         # 1. Construct the path to the raw data file (Input)
#         raw_file_name = f"{basename}_data.csv"
#         raw_file_path = os.path.join(raw_data_folder, raw_file_name)
        
#         # 2. Construct the path for the new prediction file (Output)
#         # Follows your "Equity+prediction+xgboost" format
#         pred_file_name = f"{basename}_predictions_rf.csv"
#         pred_file_path = os.path.join(predicted_data_folder, pred_file_name)
        
#         # 3. Call the function (as you assumed)
#         update_predictions_file(raw_file_path, pred_file_path)
#         print(f"    ‚úÖ Successfully updated: {pred_file_name}")
    
#     except Exception as e:
#         print(f"    ‚ùå Error processing prediction for {basename}: {e}")

# print("\nAll processing complete.")


üîÆ Generating predictions for all equities...
---> Processing NIFTY_50
Created a new file at X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_50_predictions_rf.csv with all data.
    ‚úÖ Successfully updated: NIFTY_50_predictions_rf.csv
---> Processing NIFTY_BANK
Created a new file at X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_BANK_predictions_rf.csv with all data.
    ‚úÖ Successfully updated: NIFTY_BANK_predictions_rf.csv
---> Processing NIFTY_IT
Created a new file at X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_IT_predictions_rf.csv with all data.
    ‚úÖ Successfully updated: NIFTY_IT_predictions_rf.csv
---> Processing NIFTY_PHARMA
Created a new file at X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_PHARMA_predictions_rf.csv with all data.
    ‚úÖ Successfully updated: NIFTY_PHARMA_predictions_rf.csv
---> Processing NIFTY_FMCG
Created a new file at X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_FMCG_predictions_rf.csv with all data.
    ‚úÖ Successfully update

# ------------------ lSTM --------------

In [None]:
# print(f"\n{'='*80}")
# print(f"üîÆ Generating predictions for all equities...")

# for ticker, basename in ticker_to_basename.items():
#     print(f"---> Processing {basename}")
    
#     try:
#         # 1. Construct the path to the raw data file (Input)
#         raw_file_name = f"{basename}_data.csv"
#         raw_file_path = os.path.join(raw_data_folder, raw_file_name)
        
#         # 2. Construct the path for the new prediction file (Output)
#         # Follows your "Equity+prediction+xgboost" format
#         pred_file_name = f"{basename}_predictions_lstm.csv"
#         pred_file_path = os.path.join(predicted_data_folder, pred_file_name)
        
#         # 3. Call the function (as you assumed)
#         update_predictions_file(raw_file_path, pred_file_path)
#         print(f"    ‚úÖ Successfully updated: {pred_file_name}")
    
#     except Exception as e:
#         print(f"    ‚ùå Error processing prediction for {basename}: {e}")

# print("\nAll processing complete.")


üîÆ Generating predictions for all equities...
---> Processing NIFTY_50
Created a new file at X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_50_predictions_lstm.csv with all data.
    ‚úÖ Successfully updated: NIFTY_50_predictions_lstm.csv
---> Processing NIFTY_BANK
Created a new file at X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_BANK_predictions_lstm.csv with all data.
    ‚úÖ Successfully updated: NIFTY_BANK_predictions_lstm.csv
---> Processing NIFTY_IT
Created a new file at X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_IT_predictions_lstm.csv with all data.
    ‚úÖ Successfully updated: NIFTY_IT_predictions_lstm.csv
---> Processing NIFTY_PHARMA
Created a new file at X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_PHARMA_predictions_lstm.csv with all data.
    ‚úÖ Successfully updated: NIFTY_PHARMA_predictions_lstm.csv
---> Processing NIFTY_FMCG
Created a new file at X:\AGDrive\ADA-Timeseries project\Pred_Data\NIFTY_FMCG_predictions_lstm.csv with all data.
    ‚úÖ S

In [9]:
import os
import pandas as pd
from lags_util import create_all_advanced_lags

# --- File Paths ---
predicted_data_folder = r"X:\AGDrive\ADA-Timeseries project\Pred_Data"
lagging_data_folder = r"X:\AGDrive\ADA-Timeseries project\Lag_Data"

# --- Loop Through All Equities to Generate Lag Features ---

print(f"\n{'='*80}")
print(f"üìà Generating lag features for all equities...")
print(f"{'='*80}")

# We just need the "basename" values from the map (e.g., "NIFTY_50")
for basename in ticker_to_basename.values():
    print(f"---> Processing {basename}")
    
    try:
        # 1. Construct the path to the prediction file (Input)
        #    (This assumes the output from your previous script)
        pred_file_name = f"{basename}_predictions_xgboost.csv"
        pred_file_path = os.path.join(predicted_data_folder, pred_file_name)

        # 2. Construct the path for the new lag file (Output)
        lag_file_name = f"{basename}_lagged.csv"
        lag_file_path = os.path.join(lagging_data_folder, lag_file_name)
        
        # 3. Read the prediction data
        data = pd.read_csv(pred_file_path, parse_dates=['Datetime'], index_col='Datetime')
        if data.index.tz is not None:
            data.index = data.index.tz_localize(None)

        # 4. Isolate the 'ActualPrice' column as the base for lags
        data_df = data[['ActualPrice']].copy()

        # 5. Create the advanced lag features
        full_lagged = create_all_advanced_lags(data_df)
        if full_lagged.index.tz is not None:
            full_lagged.index = full_lagged.index.tz_localize(None)

        # 6. Save the new lagged data to the Lag_Data folder
        full_lagged.to_csv(lag_file_path)
        print(f"    ‚úÖ Successfully created: {lag_file_name}")

    except FileNotFoundError:
        print(f"    ‚ö†Ô∏è  Skipping {basename}: Prediction file not found at {pred_file_path}")
    except Exception as e:
        print(f"    ‚ùå Error processing lags for {basename}: {e}")

print("\nAll lag feature generation complete.")


üìà Generating lag features for all equities...
---> Processing NIFTY_50
    ‚úÖ Successfully created: NIFTY_50_lagged.csv
---> Processing NIFTY_BANK
    ‚úÖ Successfully created: NIFTY_BANK_lagged.csv
---> Processing NIFTY_IT
    ‚úÖ Successfully created: NIFTY_IT_lagged.csv
---> Processing NIFTY_PHARMA
    ‚úÖ Successfully created: NIFTY_PHARMA_lagged.csv
---> Processing NIFTY_FMCG
    ‚úÖ Successfully created: NIFTY_FMCG_lagged.csv
---> Processing NIFTY_AUTO
    ‚úÖ Successfully created: NIFTY_AUTO_lagged.csv
---> Processing NIFTY_METAL
    ‚úÖ Successfully created: NIFTY_METAL_lagged.csv
---> Processing NIFTY_REALTY
    ‚úÖ Successfully created: NIFTY_REALTY_lagged.csv
---> Processing NIFTY_ENERGY
    ‚úÖ Successfully created: NIFTY_ENERGY_lagged.csv
---> Processing NIFTY_FIN_SERVICE
    ‚úÖ Successfully created: NIFTY_FIN_SERVICE_lagged.csv
---> Processing RELIANCE_INDUSTRIES_LTD
    ‚úÖ Successfully created: RELIANCE_INDUSTRIES_LTD_lagged.csv
---> Processing TATA_CONSULTANCY_SE