In [6]:
import lightgbm as lgb
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import train_test_split

# Creating lag features for time series forecasting, including Exchange Rate
def create_lag_features(df, lag):
    df['new_Exchange_Rate'] = df['Exchange Rate'].shift(lag)
    return df

df = pd.read_csv('CombinedBeansPrice.csv', parse_dates=['Date'])


In [7]:
# Assuming you have a DataFrame 'df' with columns 'Kandy', 'Exchange_Rate' and the index is 'Date'
df = df.reset_index()

# Store the best lag and its corresponding MAPE
best_lag = None
best_mape = float('inf')

# Loop through lags from 7 to 30
for lag in range(7, 31):
    # Generate lag features for the current lag value
    df_lag = df.copy()
    df_lag = create_lag_features(df_lag, lag)

    # Drop rows with NaN values generated due to lagging
    df_lag = df_lag.dropna()

    # Features and labels
    X = df_lag[['new_Exchange_Rate']]
    y_kandy = df_lag['Kandy']

    # Split the dataset into training and test sets
    X_train, X_test, y_kandy_train, y_kandy_test = train_test_split(
        X, y_kandy, test_size=0.2, shuffle=False
    )

    # Train LGBM model for Kandy
    kandy_model = lgb.LGBMRegressor()
    kandy_model.fit(X_train, y_kandy_train)

    # Make predictions
    kandy_predictions = kandy_model.predict(X_test)

    # Calculate MAPE for Kandy
    mape_kandy = mean_absolute_percentage_error(y_kandy_test, kandy_predictions)

    # Check if the current lag gives the best MAPE
    if mape_kandy < best_mape:
        best_mape = mape_kandy
        best_lag = lag

    print(f'Lag: {lag}, MAPE: {mape_kandy:.4f}')

# Output the best lag and corresponding MAPE
print(f'Best Lag: {best_lag}, Best MAPE: {best_mape:.4f}')


KeyError: 'Exchange Rate'