In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load data
df_a = pd.read_csv('bean-interpolated.csv', parse_dates=['Date'], index_col='Date')
df_weather_kandy = pd.read_csv('Weather_Kandy.csv', parse_dates=['Date'], index_col='Date')

# Merge the dataframes on the 'Date' column
df_merged = df_a.merge(df_weather_kandy, on='Date', how='inner')

# Define function to create lagged features
def add_lagged_features(df, lag_days):
    for lag in range(1, lag_days + 1):
        df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
    return df

# Function to create datasets with a given look-back period
def create_dataset(data, look_back=1):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:(i + look_back)])
        y.append(data[i + look_back, 0])  # Predicting price column only
    return np.array(X), np.array(y)

# Function to train model and calculate MAPE for a given lag
def train_and_evaluate_model(df, look_back, lag):
    # Prepare lagged data
    df_lagged = add_lagged_features(df.copy(), lag)
    df_lagged.dropna(inplace=True)
    
    # Separate scalers for full data and price column
    scaler_full = MinMaxScaler(feature_range=(0, 1))
    scaler_price = MinMaxScaler(feature_range=(0, 1))

    # Fit scalers
    df_lagged_scaled_full = scaler_full.fit_transform(df_lagged)
    df_lagged_scaled_price = scaler_price.fit_transform(df_lagged[['Bean_Kandy_price']])
    
    # Create training and testing datasets
    train_size = int(len(df_lagged_scaled_full) * 0.8)
    train, test = df_lagged_scaled_full[:train_size], df_lagged_scaled_full[train_size:]
    
    # Use look_back for LSTM input
    X_train, y_train = create_dataset(train, look_back)
    X_test, y_test = create_dataset(test, look_back)
    
    # Ensure the data has the correct shape (samples, time steps, features)
    num_features = df_lagged.shape[1]  # Number of columns in the lagged dataframe
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], num_features))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], num_features))
    
    # Build the LSTM model
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(look_back, num_features)))
    model.add(LSTM(units=50))
    model.add(Dense(units=1))
    
    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=0)
    
    # Predict using the trained model
    test_predictions = model.predict(X_test)
    
    # Invert predictions using only the price scaler
    test_predictions_price = scaler_price.inverse_transform(test_predictions)
    y_test_actual_price = scaler_price.inverse_transform(y_test.reshape(-1, 1))
    
    # Calculate MAPE
    y_test_actual_flat = y_test_actual_price.flatten()
    test_predictions_flat = test_predictions_price.flatten()
    percentage_errors = np.abs((y_test_actual_flat - test_predictions_flat) / y_test_actual_flat) * 100

    # Calculate mean percentage error
    mape = np.mean(percentage_errors)
    

    
    return mape

# Parameters
look_back = 60
max_lag = 120
best_mape = float('inf')
best_lag = 0

# Iterate over different lag values to find the best MAPE
for lag in range(0, max_lag + 1,7):
    print(1)
    mape = train_and_evaluate_model(df_merged, look_back, lag)
    print(f'Lag: {lag}, MAPE: {mape:.2f}%')
    if mape < best_mape:
        best_mape = mape
        best_lag = lag

print(f'\nBest lag: {best_lag} with MAPE: {best_mape:.2f}%')


1


  super().__init__(**kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 60ms/step
Lag: 0, MAPE: 11.36%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Lag: 7, MAPE: 11.65%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step
Lag: 14, MAPE: 11.82%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step
Lag: 21, MAPE: 11.54%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 81ms/step
Lag: 28, MAPE: 13.45%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step
Lag: 35, MAPE: 14.84%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 82ms/step
Lag: 42, MAPE: 15.52%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 134ms/step
Lag: 49, MAPE: 14.50%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step
Lag: 56, MAPE: 14.95%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 68ms/step
Lag: 63, MAPE: 15.58%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 64ms/step
Lag: 70, MAPE: 14.30%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step
Lag: 77, MAPE: 14.77%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step
Lag: 84, MAPE: 15.22%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 61ms/step
Lag: 91, MAPE: 20.64%
1


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step
Lag: 98, MAPE: 22.93%
1


  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step
Lag: 105, MAPE: 17.08%
1


  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 62ms/step
Lag: 112, MAPE: 18.22%
1


  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shift(lag)
  df[f'rain_sum_lag_{lag}'] = df['rain_sum'].shi

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 81ms/step
Lag: 119, MAPE: 18.51%

Best lag: 0 with MAPE: 11.36%


: 