In [247]:
# Check if running in Google Colab
if 'google.colab' in str(get_ipython()):
    from google.colab import drive
    drive.mount('/content/drive')

    # Define the base path for your data directory for Colab
    basePath = '/content/drive/MyDrive'

else:
    # Running locally or in a different environment
    basePath = '..'

In [248]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import os
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

In [249]:
df = pd.read_csv(f'{basePath}/data/processed/grouped_data2.csv')
only_req_cols = ['COUNTRY', 'DATE', 'TAVG', 'TMIN', 'TMAX', 'PRCP']
trimmed_df = df[only_req_cols].to_csv(f'{basePath}/data/processed/trimmedData.csv')

###**Essential Fns**

In [250]:
def filter_and_select_data(df, column_name, start_year):
    """Selects columns and filters data from a starting year."""
    selected_columns = ['COUNTRY', 'DATE', column_name]
    df_filtered = df[selected_columns].copy()
    return df_filtered[df_filtered['DATE'] >= start_year]

In [251]:
def get_sufficient_countries(df, column_name, completeness_threshold):
    """Returns a list of countries with data completeness >= a given threshold."""
    completeness = df.groupby('COUNTRY')[column_name].apply(lambda x: x.count() / len(x))
    return completeness[completeness >= completeness_threshold].index.tolist()

In [252]:
def split_data_by_year(df, train_end_year):
    """Splits a DataFrame into training and testing sets based on year."""
    df_train = df[df['DATE'] <= train_end_year].copy()
    df_test = df[df['DATE'] > train_end_year].copy()
    return df_train, df_test

In [253]:
def handle_missing_values(df, column_name):
    """Fills missing values in a DataFrame column using a sequence of
    interpolation, backfill, and forward fill, grouped by country.
    """
    df[column_name] = df.groupby('COUNTRY')[column_name].transform(lambda x: x.interpolate())
    df[column_name] = df.groupby('COUNTRY')[column_name].transform(lambda x: x.bfill())
    df[column_name] = df.groupby('COUNTRY')[column_name].transform(lambda x: x.ffill())
    return df

In [254]:
def ensure_consecutive_years(df, column_name, start_year, end_year):
    """
    Ensures a DataFrame has a complete time series of years for each country
    up to a specified end_year. Fills in missing years and interpolates data.
    """
    processed_dfs = []
    unique_countries = df['COUNTRY'].unique()
    for country in unique_countries:
        country_data = df[df['COUNTRY'] == country]

        all_years = list(range(start_year, end_year + 1))
        country_years_df = pd.DataFrame({'COUNTRY': country, 'DATE': all_years})

        merged_country_df = pd.merge(country_years_df, country_data, on=['COUNTRY', 'DATE'], how='left')
        merged_country_df[column_name] = merged_country_df[column_name].interpolate(limit_direction='both')
        processed_dfs.append(merged_country_df)

    return pd.concat(processed_dfs, ignore_index=True)

In [255]:
def filter_by_common_countries(df_train, df_test, column_name):
    """
    Finds countries with at least one non-missing value in both training and
    testing DataFrames and filters both to include only those countries.

    Returns:
        tuple: A tuple containing the filtered training and testing DataFrames.
    """
    # Find countries in training data with at least one non-missing value
    countries_with_train_data = df_train.dropna(subset=[column_name])['COUNTRY'].unique()

    # Find countries in testing data with at least one non-missing value
    countries_with_test_data = df_test.dropna(subset=[column_name])['COUNTRY'].unique()

    # Find the intersection of these two lists
    common_countries = set(countries_with_train_data).intersection(set(countries_with_test_data))

    df_train_filtered = df_train[df_train['COUNTRY'].isin(common_countries)].copy()
    df_test_filtered = df_test[df_test['COUNTRY'].isin(common_countries)].copy()

    return df_train_filtered, df_test_filtered

In [256]:
def prepare_column_data_pipeline(df, column_name, start_year=1980, train_end_year=2010, completeness_threshold=0.8):
    """
    Main pipeline function to prepare and clean time series data for a specific column.
    """
    # Step 1: Filter and select data
    df_filtered = filter_and_select_data(df, column_name, start_year)

    # Step 2: Get a list of countries with sufficient data
    sufficient_countries = get_sufficient_countries(df_filtered, column_name, completeness_threshold)
    df_filtered = df_filtered[df_filtered['COUNTRY'].isin(sufficient_countries)].copy()

    # Step 3: Split into training and testing sets
    df_train, df_test = split_data_by_year(df_filtered, train_end_year)

    #Adding just to check::::
    df_train = ensure_consecutive_years(df_train, column_name, start_year = start_year, end_year = train_end_year)
    df_test = ensure_consecutive_years(df_test, column_name, train_end_year + 1, 2025)

    # Step 4: Handle missing values in both sets
    df_train = handle_missing_values(df_train, column_name)
    df_test = handle_missing_values(df_test, column_name)

    # Step 5: Find and filter for common countries (with non-missing data)
    # This is the corrected line
    df_train, df_test = filter_by_common_countries(df_train, df_test, column_name)

    # Step 6: Ensure a complete, consecutive time series
    df_train_completed = ensure_consecutive_years(df_train, column_name, start_year=start_year, end_year=train_end_year)
    df_test_completed = ensure_consecutive_years(df_test, column_name, train_end_year + 1, 2025)

    return df_train_completed, df_test_completed

###**Training Stuff**

In [257]:
def find_best_arima_order(time_series, arima_orders):
    """
    Finds the best ARIMA model order for a single time series based on AIC.

    Args:
        time_series (pd.Series): The time series data for a single country.
        arima_orders (list of tuples): A list of (p, d, q) orders to test.

    Returns:
        tuple: A tuple containing the best model fit object and its order.
               Returns (None, None) if no model can be fitted.
    """
    best_aic = np.inf
    best_order = None
    best_model_fit = None

    for order in arima_orders:
        try:
            model = ARIMA(time_series, order=order, freq='YS')
            model_fit = model.fit()

            # Get the AIC
            aic = model_fit.aic

            # Update best AIC and order if current model is better
            if aic < best_aic:
                best_aic = aic
                best_order = order
                best_model_fit = model_fit
                print(f"  Order: {order}, AIC: {aic}")

        except Exception as e:
            continue

    return best_model_fit, best_order

In [258]:
def train_models_for_countries(df, column_name, arima_orders):
    """
    Trains the best ARIMA model for each country in the DataFrame.

    Args:
        df (pd.DataFrame): The input DataFrame containing country data.
        column_name (str): The column containing the time series data.
        arima_orders (list of tuples): A list of (p, d, q) orders to test.

    Returns:
        dict: A dictionary where keys are country names and values are dictionaries
              containing the 'best_model' object and its 'best_order' and 'best_aic'.
    """
    best_arima_models = {}
    unique_countries = df['COUNTRY'].unique()

    for country in unique_countries:
        # Filter data for the current country
        country_data = df[df['COUNTRY'] == country]

        # Set the 'DATE' column as the index and convert it to a DatetimeIndex
        time_series = country_data.set_index(pd.to_datetime(country_data['DATE'], format='%Y'))[column_name]

        # Use the dedicated function to find the best model for this country
        best_model, best_order = find_best_arima_order(time_series, arima_orders)

        # Store the best model and its details
        if best_model is not None:
            best_arima_models[country] = {
                'best_model': best_model,
                'best_order': best_order,
                'best_aic': best_model.aic
            }
        else:
            print(f"Warning: No best model found for {country}. Skipping.")

    return best_arima_models

In [259]:
def save_trained_models(models_dict, models_dir):
    """
    Saves a dictionary of trained models to individual files in a specified directory.

    #Most probably:
    Args:
        models_dict (dict): A dictionary of trained model objects. Keys are country names.
                            Values should contain the model object under the 'best_model' key.
        models_dir (str): The directory path where the models should be saved.
    """
    
    if not os.path.exists(models_dir):
        os.makedirs(models_dir)

    for country, model_info in models_dict.items():
        best_model = model_info.get('best_model')

        if best_model is not None:
            # Create a filename by replacing spaces with underscores
            # sanitized_country_name = sanitize_country_name(country)
            sanitized_country_name = country
            filename = f"{sanitized_country_name}_arima_model.joblib"
            filepath = os.path.join(models_dir, filename)

            joblib.dump(best_model, filepath)
        else:
            print(f"Warning: No model found for {country}, skipping save.")

###**Prediction and Errors**

In [260]:
def load_trained_models(models_dir):
    """
    Loads all trained ARIMA model objects from a specified directory.
    """
    loaded_models = {}

    if not os.path.exists(models_dir):
        print(f"Directory not found: {models_dir}")
        return loaded_models

    for filename in os.listdir(models_dir):
        if filename.endswith('_arima_model.joblib'):
            filepath = os.path.join(models_dir, filename)
            try:
                # The key in our dictionary will be the sanitized name
                sanitized_country_name = filename.replace('_arima_model.joblib', '')

                model = joblib.load(filepath)
                loaded_models[sanitized_country_name] = model
            except Exception as e:
                print(f"Warning: Could not load model from {filepath}. Error: {e}")

    if not loaded_models:
        print(f"No models found in directory: {models_dir}")
    else:
        print(f"Successfully loaded {len(loaded_models)} models.")

    return loaded_models

In [261]:
def predict_single_country(model_fit, n_forecast_steps: int = 10):
    """
    Makes a prediction for a specified number of future steps using a trained ARIMA model.

    Args:
        model_fit (SARIMAXResultsWrapper): The fitted ARIMA model object.
        n_forecast_steps (int): The number of time steps (e.g., years) to forecast into the future.

    Returns:
        pd.Series: A pandas Series containing the forecasted values.
                   The index of the Series will be the corresponding years.
                   Returns None if an error occurs.
    """
    try:
        # Use the get_forecast method to predict
        # This method is more robust than predict() for out-of-sample forecasting
        forecast_result = model_fit.get_forecast(steps=n_forecast_steps)
        
        # The predicted_mean attribute gives the forecasted values as a pandas Series
        predictions = forecast_result.predicted_mean
        
        return predictions
        
    except Exception as e:
        print(f"An error occurred during prediction: {e}")
        return None

In [262]:
def evaluate_single_country(actual_data, predictions):
    """
    Calculates key error metrics for a single country's predictions.

    Args:
        actual_data (pd.Series): The actual time series data.
        predictions (pd.Series): The predicted time series data.

    Returns:
        dict: A dictionary of calculated error metrics (MSE, RMSE, MAE).
    """
    mse = mean_squared_error(actual_data, predictions)
    rmse = math.sqrt(mse)
    mae = mean_absolute_error(actual_data, predictions)

    return {'MSE': mse, 'RMSE': rmse, 'MAE': mae}

In [263]:
def predict_and_evaluate_all_models(models_dir, df_test, column_name):
    """
    Orchestrates the prediction and evaluation process for all countries.

    Args:
        models_dir (str): The directory containing the saved trained models.
        df_test (pd.DataFrame): The cleaned DataFrame with testing data.
        column_name (str): The name of the column to predict.

    Returns:
        dict: A dictionary where keys are country names and values are their
              calculated error metrics.
    """
    all_country_errors = {}

    # Load the models using our modular function
    loaded_models = load_trained_models(models_dir)

    if not loaded_models:
        print("No models were loaded. Cannot proceed with prediction.")
        return all_country_errors

    print("\nStarting prediction and evaluation...")

    # Loop through each country for which we have a trained model
    for country, model in loaded_models.items():
        # sanitized_country_name = sanitize_country_name(country)

        # if sanitized_country_name not in loaded_models:
        #     print(f"Skipping {country}: No model found for sanitized name '{sanitized_country_name}'.")
        #     continue

        # Get actual data for this country from the test set
        country_test_data = df_test[df_test['COUNTRY'] == country].copy()

        # Ensure the test data has a proper DatetimeIndex for alignment
        country_test_data.set_index(pd.to_datetime(country_test_data['DATE'], format='%Y'), inplace=True)
        actual_data = country_test_data[column_name]

        # Get the start and end years for prediction from the actual data
        start_year = actual_data.index.min().year
        end_year = actual_data.index.max().year

        try:
            # Use our modular function to make predictions
            predictions = predict_single_country(model, n_forecast_steps=15)

            print(f"INSIDE predict_and_evaluate_fn")
            print(f"PReditctions for {country} is {predictions}")

            # Use our modular function to evaluate the predictions
            errors = evaluate_single_country(actual_data, predictions)

            all_country_errors[country] = errors
            print(f"Processed {country}: RMSE = {errors['RMSE']:.2f}, MAE = {errors['MAE']:.2f}")

        except Exception as e:
            print(f"Error processing {country}: {e}")

    print("\nPrediction and evaluation complete.")
    return all_country_errors

###**Combining Testing and Training for FINAL training**

In [264]:
def combine_data_for_final_training(df_train, df_test):
    """
    Combines the cleaned training and testing DataFrames into a single DataFrame,
    and sorts the result for clean time series analysis.

    Args:
        df_train (pd.DataFrame): The cleaned DataFrame with training data.
        df_test (pd.DataFrame): The cleaned DataFrame with testing data.

    Returns:
        pd.DataFrame: A single DataFrame containing data from both periods,
                      sorted by country and date.
    """
    df_combined = pd.concat([df_train, df_test], ignore_index=True)

    # Sort the data by country and then by date for a clean, final product
    df_combined_sorted = df_combined.sort_values(by=['COUNTRY', 'DATE']).reset_index(drop=True)

    df_combined_sorted.set_index(pd.to_datetime(df_combined_sorted['DATE'], format='%Y'), inplace=True)
    
    return df_combined_sorted

In [265]:
def train_final_models(df_combined, column_name, arima_orders):
    """
    Trains the final ARIMA models on the complete dataset for each country.

    This function reuses our verified training function.

    Args:
        df_combined (pd.DataFrame): The DataFrame containing the complete time series data.
        column_name (str): The column containing the time series data.
        arima_orders (list of tuples): A list of (p, d, q) orders to test.

    Returns:
        dict: A dictionary of final trained models for each country.
    """
    print(f"Starting final model training for column: {column_name}")
    final_models = train_models_for_countries(df_combined, column_name, arima_orders)
    print("Final model training complete.")
    return final_models

In [266]:
def make_predictions(models_dir, df_combined, column_name, number_of_years: int = 10):
    """
    Args:
        models_dir (str): The directory containing the saved trained models. For our case it will be the same as above as 
        it overwrites when training on the large dataset..
        .....
        .....
    """
    all_predicted_dfs = {}

    loaded_models = load_trained_models(models_dir)

    if not loaded_models:
        print("No models were loaded. Cannot proceed with predictions.")
        return all_predicted_dfs

    print(f"\nStarting Predictions")

    # Loop through each country for which we have a trained model
    for country, model in loaded_models.items():

        # country_test_data = df_combined[df_combined['COUNTRY'] == country].copy()

        # # Ensure the test data has a proper DatetimeIndex for alignment
        # country_test_data.set_index(pd.to_datetime(country_test_data['DATE'], format='%Y'), inplace=True)

        try:
            # Use our modular function to make predictions
            predictions = predict_single_country(model, n_forecast_steps=number_of_years)

            print(f"INSIDE \n\n\n predict fn")
            print(f"PReditctions for {country} is {predictions}")

            if predictions is not None:
                all_predicted_dfs[country] = predictions

        except Exception as e:
            print(f"Error processing {country}: {e}")

    print("\nPrediction complete.")
    return all_predicted_dfs

In [267]:
###Testing Till this::
filepath = f'{basePath}/data/processed/trimmedData.csv'
df = pd.read_csv(filepath)
column_name = 'TAVG'
models_dir = f'{basePath}/modelsNew/trainedModels'
arima_orders = [(1, 0, 0), (1, 1, 1)]
df_train, df_test = prepare_column_data_pipeline(df, column_name, start_year=1980, train_end_year=2010, completeness_threshold=0.8)
best_models_dict = train_models_for_countries(df_train, column_name, arima_orders)
save_trained_models(best_models_dict, models_dir)

loaded_models = load_trained_models(models_dir)


# 4. Predict and evaluate the models
evaluation_results = predict_and_evaluate_all_models(models_dir, df_test, column_name)
print("Evaluation Results:", evaluation_results)

  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 41.73681201623895


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 35.3591108367197


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 155.21525101953569


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 144.2628724422125


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 29.55476966081685


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 92.61848760264718


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 99.91519526885268


  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


  Order: (1, 1, 1), AIC: 99.58509657630998


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 28.538430153800082


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 24.448297041367976


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 72.83214283160525
  Order: (1, 1, 1), AIC: 70.59432253201801


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 50.04542352567869


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 44.80325094975927
  Order: (1, 0, 0), AIC: 86.31504417102755


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 1.7827763808590689


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 88.14926170922186
  Order: (1, 1, 1), AIC: 87.68442861659247


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 71.14086802638523


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 67.76443027379098


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 73.68939886315349


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 131.53571519715072


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: -13.763892049092906


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 77.49388031955432


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 57.57063160087742
  Order: (1, 1, 1), AIC: 52.47682162166616


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 76.58616810728358
  Order: (1, 1, 1), AIC: 73.02290945954164


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  Order: (1, 0, 0), AIC: 78.87145134962132
  Order: (1, 1, 1), AIC: 78.8573104040611
  Order: (1, 0, 0), AIC: 73.4551802324824


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 71.46204208398329
  Order: (1, 0, 0), AIC: -16.663000739405156


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: -17.225905676319627
  Order: (1, 0, 0), AIC: 72.29403469841094


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 77.16147589798553


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: -5.8361036342615975
  Order: (1, 0, 0), AIC: 39.104213224671014


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 30.950392214119375


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 69.09053415969784
  Order: (1, 1, 1), AIC: 65.55353438865878


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  Order: (1, 0, 0), AIC: 70.11396038833942


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  Order: (1, 0, 0), AIC: 107.5205503420513


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


  Order: (1, 0, 0), AIC: 97.33564295300854


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 107.86079180240833
  Order: (1, 1, 1), AIC: 105.90668299356808
  Order: (1, 0, 0), AIC: 70.15677421369783


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  Order: (1, 1, 1), AIC: 68.3264724633053
  Order: (1, 0, 0), AIC: 61.263467165262945


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 86.12087465919787
  Order: (1, 1, 1), AIC: 84.8076609016616


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 41.850603928903624


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 37.303788059239466
  Order: (1, 1, 1), AIC: 36.88908512311481
Successfully loaded 36 models.
Successfully loaded 36 models.

Starting prediction and evaluation...
INSIDE predict_and_evaluate_fn
PReditctions for Marshall Islands is 2011-01-01    28.113183
2012-01-01    28.091912
2013-01-01    28.086700
2014-01-01    28.085423
2015-01-01    28.085110
2016-01-01    28.085033
2017-01-01    28.085015
2018-01-01    28.085010
2019-01-01    28.085009
2020-01-01    28.085009
2021-01-01    28.085009
2022-01-01    28.085009
2023-01-01    28.085009
2024-01-01    28.085009
2025-01-01    28.085009
Freq: YS-JAN, Name: predicted_mean, dtype: float64
Processed Marshall Islands: RMSE = 0.37, MAE = 0.34
INSIDE predict_and_evaluate_fn
PReditctions for France is 2011-01-01    12.516511
2012-01-01    12.613578
2013-01-01    12.638602
2014-01-01    12.645054
2015-01-01    12.646717
2016-01-01    12.647145
2017-01-01    12.647256
2018-01-01    12.647285
2019-01-01    12.647292
2020-01

In [268]:
df_combined = combine_data_for_final_training(df_train, df_test)
models_dir = f'{basePath}/modelsNew/trainedModels'
column_name = 'TAVG'
latest_models_dict = train_final_models(df_combined, column_name, arima_orders)
save_trained_models(latest_models_dict, models_dir)

# Assign the result of make_predictions to a new variable
country_predictions = make_predictions(models_dir, df_combined, column_name, number_of_years=10)

df_final_pred = []
# Iterate over the new variable containing the predictions dictionary
for country, predictions in country_predictions.items():
    # Add a check to ensure 'predictions' is a pandas Series
    if isinstance(predictions, pd.Series):
        # Repeat the country name for each prediction
        df_curr = pd.DataFrame({'COUNTRY': [country] * len(predictions), 'DATE': predictions.index, column_name: predictions.values})
        df_final_pred.append(df_curr)
    else:
        print(f"Skipping {country}: Predictions are not a pandas Series.")


df_final_pred = pd.concat(df_final_pred, ignore_index=True)
df_final_pred

Starting final model training for column: TAVG


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 49.51521340571164


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 43.96904353817026


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 216.18532628758152


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 209.8313383389377
  Order: (1, 0, 0), AIC: 61.31000113531107


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 53.556066286404054
  Order: (1, 0, 0), AIC: 129.09963968592098


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 126.12079250526509
  Order: (1, 0, 0), AIC: 138.77562769779246


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 138.26573818765212


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 111.61176619423185


  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  Order: (1, 1, 1), AIC: 108.6221855947157


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 100.00106683768075


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 92.48683460801719
  Order: (1, 0, 0), AIC: 61.16426722754099


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 50.494801376901115
  Order: (1, 0, 0), AIC: 128.41213003621266


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 123.14180172829737


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 4.378638293830727


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 1.5792286074985693
  Order: (1, 0, 0), AIC: 133.56020986592313


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 125.40908082438338
  Order: (1, 0, 0), AIC: 111.20624130590497


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 98.61460062715834


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 114.27242064801534


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 106.65624772150258


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 223.84257480683672
  Order: (1, 1, 1), AIC: 221.30014233729793
  Order: (1, 0, 0), AIC: -2.7605535421391085


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 109.80785023064351
  Order: (1, 1, 1), AIC: 101.50007150497282


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 85.50317803909671


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 71.42752791861834
  Order: (1, 0, 0), AIC: 112.2804502322374


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 104.32365294004292


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 102.10218137628506


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 109.82336368261952


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 100.1687334904766
  Order: (1, 0, 0), AIC: -10.707443021251535


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


  Order: (1, 1, 1), AIC: -14.94265693946975
  Order: (1, 0, 0), AIC: 109.23118404151757


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 103.0595605775273
  Order: (1, 0, 0), AIC: 104.62444061486393


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: -21.09485053236117


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 49.54036157163743
  Order: (1, 1, 1), AIC: 46.92469538511926
  Order: (1, 0, 0), AIC: 45.09995932082619


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 44.07263118916261


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 94.80433869686763


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 83.41357602902005


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 99.70343263953396


  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


  Order: (1, 1, 1), AIC: 94.55106475877277


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 157.48486315889568


  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


  Order: (1, 0, 0), AIC: 127.88140238934307


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 156.4649289695365
  Order: (1, 1, 1), AIC: 155.20473507323248


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 108.57775244444363
  Order: (1, 1, 1), AIC: 106.62340419830997
  Order: (1, 0, 0), AIC: 106.8648279582218


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 95.30491044109048
  Order: (1, 0, 0), AIC: 118.46616595082959


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 110.58270215385308
  Order: (1, 0, 0), AIC: 65.51049652001328


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 60.11414463036688


  self._init_dates(dates, freq)


  Order: (1, 0, 0), AIC: 59.22177715846311


  self._init_dates(dates, freq)


  Order: (1, 1, 1), AIC: 53.241895116665646
Final model training complete.
Successfully loaded 36 models.

Starting Predictions
INSIDE 


 predict fn
PReditctions for Marshall Islands is 2026-01-01    28.579253
2027-01-01    28.536104
2028-01-01    28.520684
2029-01-01    28.515174
2030-01-01    28.513205
2031-01-01    28.512502
2032-01-01    28.512250
2033-01-01    28.512161
2034-01-01    28.512128
2035-01-01    28.512117
Freq: YS-JAN, Name: predicted_mean, dtype: float64
INSIDE 


 predict fn
PReditctions for France is 2026-01-01    14.221819
2027-01-01    14.223238
2028-01-01    14.223262
2029-01-01    14.223263
2030-01-01    14.223263
2031-01-01    14.223263
2032-01-01    14.223263
2033-01-01    14.223263
2034-01-01    14.223263
2035-01-01    14.223263
Freq: YS-JAN, Name: predicted_mean, dtype: float64
INSIDE 


 predict fn
PReditctions for Iceland is 2026-01-01    5.993728
2027-01-01    5.994546
2028-01-01    5.994439
2029-01-01    5.994453
2030-01-01    5.994451
2031-01-01    5.9

Unnamed: 0,COUNTRY,DATE,TAVG
0,Marshall Islands,2026-01-01,28.579253
1,Marshall Islands,2027-01-01,28.536104
2,Marshall Islands,2028-01-01,28.520684
3,Marshall Islands,2029-01-01,28.515174
4,Marshall Islands,2030-01-01,28.513205
...,...,...,...
355,Austria,2031-01-01,6.699083
356,Austria,2032-01-01,6.698869
357,Austria,2033-01-01,6.698805
358,Austria,2034-01-01,6.698786


In [269]:
df_final_pred.to_csv(f'{basePath}/data/processed/finalPredictions.csv', index=False)

In [270]:
df_final_pred['COUNTRY'].nunique()

36

###**Top n Riskiest countries + Plotting**