In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
import requests
import pandas as pd
import numpy as np
import pickle
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import datetime
import json

In [16]:
import requests
import pandas as pd

def fetch_calendar_data(api_key="BKjmdqqX4JDnrVyeFUaigOCQdRdYJbck", country="IN", year=2025):
    url = f"https://calendarific.com/api/v2/holidays?&api_key={api_key}&country={country}&year={year}"

    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        holidays = data['response']['holidays']

        if not holidays:
            print(f"No holidays found for {country} in {year}.")
            return pd.DataFrame()

        df = pd.DataFrame([{
            "date": h['date']['iso'],
            "name": h.get('name', None),
            "type": h.get('type', [None])[0] if h.get('type') else None
        } for h in holidays])

        # Safe conversion: strip time & timezone first
        df['date'] = df['date'].apply(lambda x: x.split("T")[0])
        df['date'] = pd.to_datetime(df['date'], errors='coerce')  # now just YYYY-MM-DD

        print(f"Successfully fetched {len(df)} holidays for {country} in {year}.")
        return df
    else:
        print(f"Error fetching API data: {response.status_code}")
        return pd.DataFrame()


In [17]:
df_holidays = fetch_calendar_data(api_key="BKjmdqqX4JDnrVyeFUaigOCQdRdYJbck", country="IN", year=2025)
print(df_holidays.head())

Successfully fetched 78 holidays for IN in 2025.
        date                       name              type
0 2025-01-01             New Year's Day  Optional holiday
1 2025-01-02       Last day of Hanukkah        Observance
2 2025-01-06  Guru Govind Singh Jayanti  Optional holiday
3 2025-01-13                      Lohri        Observance
4 2025-01-14                     Pongal          Hinduism


In [30]:
# Load Existing Models
LGB_PATH = "/kaggle/input/pkl-files/lgb_model.pkl"
LSTM_PATH = "/kaggle/input/pkl-files/lstm_model.h5"

# Load LightGBM model
with open(LGB_PATH, "rb") as f:
    lgb_model = pickle.load(f)

# Load LSTM model (ignore compile errors)
lstm_model = load_model(LSTM_PATH, compile=False)

print("Both models loaded successfully!")


Both models loaded successfully!


In [31]:
# Prepare Training Data
api_key = "BKjmdqqX4JDnrVyeFUaigOCQdRdYJbck"  
calendar_df = fetch_calendar_data(api_key)

if not calendar_df.empty:
    calendar_df['month'] = calendar_df['date'].dt.month
    calendar_df['day'] = calendar_df['date'].dt.day
    calendar_df['is_weekend'] = calendar_df['date'].dt.weekday >= 5
    calendar_df['holiday_flag'] = 1 

    calendar_df['target'] = np.random.uniform(100, 500, size=len(calendar_df))

    X = calendar_df[['month', 'day', 'is_weekend', 'holiday_flag']]
    y = calendar_df['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
else:
    print("⚠️ Skipping retraining — API returned no data.")


Successfully fetched 78 holidays for IN in 2025.


In [32]:
import lightgbm as lgb

if not calendar_df.empty:
    lgb_train = lgb.Dataset(X_train, label=y_train)
    lgb_eval = lgb.Dataset(X_test, label=y_test, reference=lgb_train)

    params = {
        "objective": "regression",
        "metric": "rmse",
        "verbosity": -1
    }

    updated_lgb = lgb.train(
        params,
        lgb_train,
        valid_sets=[lgb_eval],
        num_boost_round=100,
        callbacks=[lgb.early_stopping(stopping_rounds=10)]
    )

    print("LightGBM retrained successfully!")


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[41]	valid_0's rmse: 124.191
LightGBM retrained successfully!


In [35]:
# Evaluate and Store Metadata
def evaluate_model(model, X_test, y_test):
    preds = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, preds)
    mse = mean_squared_error(y_test, preds)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, preds)
    
    # Mean Absolute Percentage Error
    mape = np.mean(np.abs((y_test - preds) / y_test)) * 100
    
    # Weighted Absolute Percentage Error
    wape = np.sum(np.abs(y_test - preds)) / np.sum(np.abs(y_test)) * 100
    
    return {
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        "R2": r2,
        "MAPE (%)": mape,
        "WAPE (%)": wape
    }

if not calendar_df.empty:
    metrics = evaluate_model(updated_lgb, X_test, y_test)
    metadata = {
        "version": datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"),
        "model_type": "LightGBM",
        "source": "Calendarific API",
        "metrics": metrics
    }

    with open("model_metadata.json", "w") as f:
        json.dump(metadata, f, indent=4)

    print("Metadata saved: model_metadata.json")
    print("Metrics:", metrics)


Metadata saved: model_metadata.json
Metrics: {'MAE': 109.07909399509623, 'MSE': 15423.473113036995, 'RMSE': 124.19127631616077, 'R2': -0.008891582349324523, 'MAPE (%)': 44.04330293261669, 'WAPE (%)': 35.781157781846154}


In [36]:
# Save Retrained Model
if not calendar_df.empty:
    with open("updated_lgb_model.pkl", "wb") as f:
        pickle.dump(updated_lgb, f)
    print("Retrained LightGBM model saved as 'updated_lgb_model.pkl'")


Retrained LightGBM model saved as 'updated_lgb_model.pkl'
