In [1]:
import os
import pandas as pd
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import pickle

In [2]:
# Folder paths
input_folder = 'city_wise_data_splits'  # City CSVs named like Colombo_train.csv, Colombo_val.csv, Colombo_test.csv
model_folder = 'trained_var_models'
os.makedirs(model_folder, exist_ok=True)

In [3]:
# Cities list (modify if needed)
cities = [
    "Colombo", "Gampaha", "Kalutara", "Kandy", "Matale", "Nuwara_Eliya", "Galle", "Matara",
    "Hambantota", "Jaffna", "Kilinochchi1", "Mannar", "Vavuniya", "Mullaitivu", "Batticaloa",
    "Ampara", "Trincomalee", "Kurunegala", "Puttalam", "Anuradhapura", "Polonnaruwa", "Badulla",
    "Moneragala", "Ratnapura", "Kegalle", "Welimada", "Bandarawela"
]

In [4]:
def evaluate_model(true_df, pred_df):
    print("Evaluation metrics:")
    for col in true_df.columns:
        y_true = true_df[col].values
        y_pred = pred_df[col].values
        print(f"  {col}:")
        print(f"    MAE  = {mean_absolute_error(y_true, y_pred):.4f}")
        print(f"    RMSE = {np.sqrt(mean_squared_error(y_true, y_pred)):.4f}")
        print(f"    R2   = {r2_score(y_true, y_pred):.4f}")

In [5]:
for city in cities:
    city_safe = city.replace(' ', '_').replace('[', '').replace(']', '')
    try:
        # Load train, val, test CSVs for this city
        train_path = os.path.join(input_folder, f"{city_safe}_train.csv")
        val_path = os.path.join(input_folder, f"{city_safe}_val.csv")
        test_path = os.path.join(input_folder, f"{city_safe}_test.csv")

        # Check if required files exist
        if not (os.path.isfile(train_path) and os.path.isfile(val_path) and os.path.isfile(test_path)):
            print(f"[SKIP] Missing train/val/test files for city: {city}")
            continue

        # Load data
        train_df = pd.read_csv(train_path, parse_dates=['date']).sort_values('date').set_index('date')
        val_df = pd.read_csv(val_path, parse_dates=['date']).sort_values('date').set_index('date')
        test_df = pd.read_csv(test_path, parse_dates=['date']).sort_values('date').set_index('date')

        # Required columns check
        required_cols = ['temperature', 'rainfall', 'wind_speed']
        if not all(col in train_df.columns for col in required_cols):
            print(f"[SKIP] Train CSV missing required columns for city: {city}")
            continue

        # Select only required columns
        train_data = train_df[required_cols]
        val_data = val_df[required_cols]
        test_data = test_df[required_cols]

        # Train VAR model
        model = VAR(train_data)
        lag_order_results = model.select_order(maxlags=15)
        lag_order = lag_order_results.aic
        print(f"\nTraining VAR model for city: {city}")
        print(f"Selected lag order (AIC): {lag_order}")

        results = model.fit(lag_order)

        # Validate
        forecast_vals = results.forecast(train_data.values[-lag_order:], steps=len(val_data))
        forecast_df = pd.DataFrame(forecast_vals, index=val_data.index, columns=val_data.columns)

        print(f"Validation results for city: {city}")
        evaluate_model(val_data, forecast_df)

        # Save the trained model as pickle file
        model_filename = os.path.join(model_folder, f"{city_safe}_var_model.pkl")
        with open(model_filename, 'wb') as f:
            pickle.dump(results, f)
        print(f"Saved VAR model for {city} at {model_filename}")

    except Exception as e:
        print(f"[ERROR] Failed processing city {city}: {e}")


  self._init_dates(dates, freq)



Training VAR model for city: Colombo
Selected lag order (AIC): 12
Validation results for city: Colombo
Evaluation metrics:
  temperature:
    MAE  = 0.7010
    RMSE = 0.8694
    R2   = 0.0253
  rainfall:
    MAE  = 6.6587
    RMSE = 10.4273
    R2   = 0.0049
  wind_speed:
    MAE  = 3.6303
    RMSE = 4.6319
    R2   = -0.1807
Saved VAR model for Colombo at trained_var_models\Colombo_var_model.pkl

Training VAR model for city: Gampaha
Selected lag order (AIC): 15


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Validation results for city: Gampaha
Evaluation metrics:
  temperature:
    MAE  = 0.6947
    RMSE = 0.8695
    R2   = 0.1235
  rainfall:
    MAE  = 6.7506
    RMSE = 10.1815
    R2   = 0.0047
  wind_speed:
    MAE  = 2.9539
    RMSE = 3.6597
    R2   = -0.0189
Saved VAR model for Gampaha at trained_var_models\Gampaha_var_model.pkl

Training VAR model for city: Kalutara
Selected lag order (AIC): 15
Validation results for city: Kalutara
Evaluation metrics:
  temperature:
    MAE  = 0.6976
    RMSE = 0.8481
    R2   = -0.0197
  rainfall:
    MAE  = 6.2365
    RMSE = 8.6355
    R2   = 0.0017
  wind_speed:
    MAE  = 3.3297
    RMSE = 4.3035
    R2   = -0.0398
Saved VAR model for Kalutara at trained_var_models\Kalutara_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Kandy
Selected lag order (AIC): 14
Validation results for city: Kandy
Evaluation metrics:
  temperature:
    MAE  = 0.7699
    RMSE = 0.9921
    R2   = 0.0640
  rainfall:
    MAE  = 5.3347
    RMSE = 7.7746
    R2   = -0.0133
  wind_speed:
    MAE  = 4.3912
    RMSE = 5.3513
    R2   = -0.2846
Saved VAR model for Kandy at trained_var_models\Kandy_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Matale
Selected lag order (AIC): 11
Validation results for city: Matale
Evaluation metrics:
  temperature:
    MAE  = 0.9107
    RMSE = 1.1217
    R2   = -0.1201
  rainfall:
    MAE  = 6.2952
    RMSE = 12.1299
    R2   = -0.0029
  wind_speed:
    MAE  = 3.5569
    RMSE = 4.3191
    R2   = -0.0002
Saved VAR model for Matale at trained_var_models\Matale_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Nuwara_Eliya
Selected lag order (AIC): 15
Validation results for city: Nuwara_Eliya
Evaluation metrics:
  temperature:
    MAE  = 1.3752
    RMSE = 1.5609
    R2   = -2.2738
  rainfall:
    MAE  = 7.0286
    RMSE = 10.8165
    R2   = 0.0023
  wind_speed:
    MAE  = 5.2490
    RMSE = 6.9381
    R2   = -0.1430
Saved VAR model for Nuwara_Eliya at trained_var_models\Nuwara_Eliya_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Galle
Selected lag order (AIC): 14
Validation results for city: Galle
Evaluation metrics:
  temperature:
    MAE  = 0.6739
    RMSE = 0.8240
    R2   = 0.0076
  rainfall:
    MAE  = 5.9029
    RMSE = 11.1011
    R2   = -0.0055
  wind_speed:
    MAE  = 4.8611
    RMSE = 5.6870
    R2   = 0.0043
Saved VAR model for Galle at trained_var_models\Galle_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Matara
Selected lag order (AIC): 15
Validation results for city: Matara
Evaluation metrics:
  temperature:
    MAE  = 0.6409
    RMSE = 0.8015
    R2   = -0.2515
  rainfall:
    MAE  = 4.5422
    RMSE = 6.5453
    R2   = -0.0152
  wind_speed:
    MAE  = 6.7456
    RMSE = 8.1951
    R2   = -0.3008
Saved VAR model for Matara at trained_var_models\Matara_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Hambantota
Selected lag order (AIC): 14
Validation results for city: Hambantota
Evaluation metrics:
  temperature:
    MAE  = 0.8530
    RMSE = 1.0453
    R2   = -0.0695
  rainfall:
    MAE  = 3.0217
    RMSE = 5.0970
    R2   = -0.0088
  wind_speed:
    MAE  = 5.4917
    RMSE = 6.3874
    R2   = -0.0644
Saved VAR model for Hambantota at trained_var_models\Hambantota_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Jaffna
Selected lag order (AIC): 11
Validation results for city: Jaffna
Evaluation metrics:
  temperature:
    MAE  = 1.3073
    RMSE = 1.5454
    R2   = -0.1179
  rainfall:
    MAE  = 4.3886
    RMSE = 9.1613
    R2   = -0.0072
  wind_speed:
    MAE  = 6.4473
    RMSE = 7.6863
    R2   = -0.2143
Saved VAR model for Jaffna at trained_var_models\Jaffna_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Kilinochchi1
Selected lag order (AIC): 12
Validation results for city: Kilinochchi1
Evaluation metrics:
  temperature:
    MAE  = 1.6597
    RMSE = 1.8773
    R2   = -0.1264
  rainfall:
    MAE  = 4.7253
    RMSE = 14.3677
    R2   = -0.0059
  wind_speed:
    MAE  = 4.9426
    RMSE = 6.1028
    R2   = -0.1281
Saved VAR model for Kilinochchi1 at trained_var_models\Kilinochchi1_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Mannar
Selected lag order (AIC): 10
Validation results for city: Mannar
Evaluation metrics:
  temperature:
    MAE  = 1.3559
    RMSE = 1.5483
    R2   = -0.0988
  rainfall:
    MAE  = 3.7389
    RMSE = 10.3312
    R2   = -0.0009
  wind_speed:
    MAE  = 6.2393
    RMSE = 7.6418
    R2   = -0.0018
Saved VAR model for Mannar at trained_var_models\Mannar_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Vavuniya
Selected lag order (AIC): 12
Validation results for city: Vavuniya
Evaluation metrics:
  temperature:
    MAE  = 1.5202
    RMSE = 1.7423
    R2   = -0.1171
  rainfall:
    MAE  = 4.6797
    RMSE = 9.8307
    R2   = -0.0044
  wind_speed:
    MAE  = 4.3996
    RMSE = 5.5695
    R2   = -0.0222
Saved VAR model for Vavuniya at trained_var_models\Vavuniya_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Mullaitivu
Selected lag order (AIC): 12
Validation results for city: Mullaitivu
Evaluation metrics:
  temperature:
    MAE  = 1.4247
    RMSE = 1.6586
    R2   = -0.0866
  rainfall:
    MAE  = 4.6899
    RMSE = 11.5486
    R2   = -0.0053
  wind_speed:
    MAE  = 4.0468
    RMSE = 5.0236
    R2   = -0.0413
Saved VAR model for Mullaitivu at trained_var_models\Mullaitivu_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Batticaloa
Selected lag order (AIC): 10
Validation results for city: Batticaloa
Evaluation metrics:
  temperature:
    MAE  = 1.4957
    RMSE = 1.7309
    R2   = -0.1048
  rainfall:
    MAE  = 4.7875
    RMSE = 7.3626
    R2   = -0.0275
  wind_speed:
    MAE  = 3.3990
    RMSE = 4.4663
    R2   = -0.0082
Saved VAR model for Batticaloa at trained_var_models\Batticaloa_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Ampara
Selected lag order (AIC): 7
Validation results for city: Ampara
Evaluation metrics:
  temperature:
    MAE  = 1.6533
    RMSE = 1.8780
    R2   = -0.1154
  rainfall:
    MAE  = 5.3958
    RMSE = 10.2769
    R2   = -0.0122
  wind_speed:
    MAE  = 2.6788
    RMSE = 3.5713
    R2   = 0.0042
Saved VAR model for Ampara at trained_var_models\Ampara_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Trincomalee
Selected lag order (AIC): 12
Validation results for city: Trincomalee
Evaluation metrics:
  temperature:
    MAE  = 1.3792
    RMSE = 1.6099
    R2   = -0.0693
  rainfall:
    MAE  = 5.1166
    RMSE = 11.5743
    R2   = -0.0073
  wind_speed:
    MAE  = 4.7820
    RMSE = 5.6557
    R2   = -0.1460
Saved VAR model for Trincomalee at trained_var_models\Trincomalee_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Kurunegala
Selected lag order (AIC): 11
Validation results for city: Kurunegala
Evaluation metrics:
  temperature:
    MAE  = 0.8594
    RMSE = 1.0983
    R2   = 0.0792
  rainfall:
    MAE  = 5.7792
    RMSE = 9.8259
    R2   = 0.0017
  wind_speed:
    MAE  = 3.3863
    RMSE = 4.1482
    R2   = -0.0220
Saved VAR model for Kurunegala at trained_var_models\Kurunegala_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Puttalam
Selected lag order (AIC): 13
Validation results for city: Puttalam
Evaluation metrics:
  temperature:
    MAE  = 0.8723
    RMSE = 1.0801
    R2   = -0.0288
  rainfall:
    MAE  = 4.8470
    RMSE = 8.7332
    R2   = 0.0029
  wind_speed:
    MAE  = 4.6915
    RMSE = 5.8787
    R2   = -0.1123
Saved VAR model for Puttalam at trained_var_models\Puttalam_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Anuradhapura
Selected lag order (AIC): 15
Validation results for city: Anuradhapura
Evaluation metrics:
  temperature:
    MAE  = 1.2214
    RMSE = 1.4406
    R2   = -0.1246
  rainfall:
    MAE  = 4.6641
    RMSE = 8.1966
    R2   = -0.0023
  wind_speed:
    MAE  = 4.6697
    RMSE = 5.6832
    R2   = -0.0036
Saved VAR model for Anuradhapura at trained_var_models\Anuradhapura_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Polonnaruwa
Selected lag order (AIC): 11
Validation results for city: Polonnaruwa
Evaluation metrics:
  temperature:
    MAE  = 1.7287
    RMSE = 1.9675
    R2   = -0.1258
  rainfall:
    MAE  = 4.8588
    RMSE = 8.0903
    R2   = -0.0110
  wind_speed:
    MAE  = 5.0709
    RMSE = 6.1391
    R2   = -0.0185
Saved VAR model for Polonnaruwa at trained_var_models\Polonnaruwa_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Badulla
Selected lag order (AIC): 10
Validation results for city: Badulla
Evaluation metrics:
  temperature:
    MAE  = 1.2366
    RMSE = 1.5092
    R2   = -0.1550
  rainfall:
    MAE  = 6.1774
    RMSE = 11.6113
    R2   = -0.0089
  wind_speed:
    MAE  = 2.2964
    RMSE = 2.7962
    R2   = -0.2321
Saved VAR model for Badulla at trained_var_models\Badulla_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Moneragala
Selected lag order (AIC): 10
Validation results for city: Moneragala
Evaluation metrics:
  temperature:
    MAE  = 1.4654
    RMSE = 1.7567
    R2   = -0.0923
  rainfall:
    MAE  = 5.4562
    RMSE = 8.0981
    R2   = -0.0023
  wind_speed:
    MAE  = 3.9344
    RMSE = 5.0601
    R2   = -0.1012
Saved VAR model for Moneragala at trained_var_models\Moneragala_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Ratnapura
Selected lag order (AIC): 13
Validation results for city: Ratnapura
Evaluation metrics:
  temperature:
    MAE  = 0.7418
    RMSE = 0.9173
    R2   = -0.3470
  rainfall:
    MAE  = 9.7322
    RMSE = 15.7686
    R2   = -0.0379
  wind_speed:
    MAE  = 3.0067
    RMSE = 3.5109
    R2   = -0.0734
Saved VAR model for Ratnapura at trained_var_models\Ratnapura_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Kegalle
Selected lag order (AIC): 11
Validation results for city: Kegalle
Evaluation metrics:
  temperature:
    MAE  = 0.8567
    RMSE = 1.0897
    R2   = 0.1270
  rainfall:
    MAE  = 6.7086
    RMSE = 10.4154
    R2   = -0.0033
  wind_speed:
    MAE  = 3.0564
    RMSE = 3.7712
    R2   = -0.0684
Saved VAR model for Kegalle at trained_var_models\Kegalle_var_model.pkl


  self._init_dates(dates, freq)



Training VAR model for city: Welimada
Selected lag order (AIC): 11
Validation results for city: Welimada
Evaluation metrics:
  temperature:
    MAE  = 1.2548
    RMSE = 1.5623
    R2   = -0.4132
  rainfall:
    MAE  = 6.0776
    RMSE = 9.3048
    R2   = -0.0373
  wind_speed:
    MAE  = 2.7674
    RMSE = 3.6372
    R2   = -0.0125
Saved VAR model for Welimada at trained_var_models\Welimada_var_model.pkl

Training VAR model for city: Bandarawela
Selected lag order (AIC): 10
Validation results for city: Bandarawela
Evaluation metrics:
  temperature:
    MAE  = 1.1917
    RMSE = 1.4661
    R2   = -0.5732
  rainfall:
    MAE  = 6.0720
    RMSE = 9.6158
    R2   = -0.0215
  wind_speed:
    MAE  = 2.6813
    RMSE = 3.3794
    R2   = 0.0053
Saved VAR model for Bandarawela at trained_var_models\Bandarawela_var_model.pkl


  self._init_dates(dates, freq)
