### Step 1: Import Libraries and Load Data

In [29]:
%pip install statsmodels

Note: you may need to restart the kernel to use updated packages.


In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.vector_ar.var_model import VAR
from statsmodels.tsa.regime_switching.markov_switching import MarkovSwitching
from sklearn.metrics import mean_absolute_error, mean_squared_error


brent_prices = pd.read_csv("../data/cleaned_data.csv")  
exchange_rates = pd.read_csv("../data/data_collected/Exchange_rates.csv", skiprows=4)
inflation = pd.read_csv("../data/data_collected/inflation.csv", skiprows=4)
gdp_growth = pd.read_csv("../data/data_collected/GDP_growth.csv", skiprows=4)


In [35]:
print(brent_prices.head())

         Date  Price
0  1987-05-20  18.63
1  1987-05-21  18.45
2  1987-05-22  18.55
3  1987-05-25  18.60
4  1987-05-26  18.63


### Step 2: Inspect Datasets and Convert Dates

In [31]:
print(gdp_growth.head())
print(exchange_rates.head())
print(inflation.head())


                  Country Name Country Code         Indicator Name  \
0                        Aruba          ABW  GDP growth (annual %)   
1  Africa Eastern and Southern          AFE  GDP growth (annual %)   
2                  Afghanistan          AFG  GDP growth (annual %)   
3   Africa Western and Central          AFW  GDP growth (annual %)   
4                       Angola          AGO  GDP growth (annual %)   

      Indicator Code  1960      1961      1962      1963      1964      1965  \
0  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   
1  NY.GDP.MKTP.KD.ZG   NaN  0.460106  7.868013  5.616400  4.668135  5.138990   
2  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   
3  NY.GDP.MKTP.KD.ZG   NaN  1.873455  3.707643  7.145784  5.406403  4.102491   
4  NY.GDP.MKTP.KD.ZG   NaN       NaN       NaN       NaN       NaN       NaN   

   ...      2015      2016      2017      2018      2019       2020  \
0  ... -0.623626  1.719625 

### Add economic data preprocessing function and merge datasets

In [33]:
import pandas as pd
import numpy as np

def preprocess_economic_data(file_path, country, data_type):
    df = pd.read_csv(file_path, skiprows=4)

    if country != 'All':
        df = df[df['Country Name'] == country]

    cols_to_drop = ['Country Code', 'Indicator Name', 'Indicator Code']
    df = df.drop(columns=cols_to_drop, errors='ignore')

    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

    df = df.melt(id_vars=['Country Name'], var_name='Year', value_name='Value')

    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
    df.dropna(subset=['Year'], inplace=True)

    df = df.pivot(index='Year', columns='Country Name', values='Value')

    df = df.dropna(axis=1, how='all')

    print(f"DataFrame after preprocessing ({data_type}):")
    print(df.head())

    return df

gdp_growth_path = '../data/data_collected/GDP_growth.csv'
inflation_path = '../data/data_collected/inflation.csv'
exchange_rate_path = '../data/data_collected/Exchange_rates.csv'

datasets = [
    (gdp_growth_path, 'All', 'GDP Growth Rates'),
    (inflation_path, 'All', 'Inflation Rates'),
    (exchange_rate_path, 'All', 'Exchange Rates')
]


cleaned_dataframes = {}
for path, country, data_type in datasets:
    cleaned_dataframes[data_type] = preprocess_economic_data(path, country, data_type)

for data_type, df in cleaned_dataframes.items():
    print(f"\n{data_type} (All Country):")
    print(df.head())

DataFrame after preprocessing (GDP Growth Rates):
Country Name  Afghanistan  Africa Eastern and Southern  \
Year                                                     
1960                  NaN                          NaN   
1961                  NaN                     0.460106   
1962                  NaN                     7.868013   
1963                  NaN                     5.616400   
1964                  NaN                     4.668135   

Country Name  Africa Western and Central  Albania    Algeria  American Samoa  \
Year                                                                           
1960                                 NaN      NaN        NaN             NaN   
1961                            1.873455      NaN -13.605441             NaN   
1962                            3.707643      NaN -19.685042             NaN   
1963                            7.145784      NaN  34.313729             NaN   
1964                            5.406403      NaN   5.839413   

### Merge with Brent Oil

In [36]:
def load_and_transform_economic_data(filepath, target_country, indicator_type):
    try:
        data = pd.read_csv(filepath, skiprows=4)
        
        data = data[data['Country Name'] == target_country]

        data = data.drop(columns=['Country Code', 'Indicator Name', 'Indicator Code'])
        
        data = data.loc[:, ~data.columns.str.contains('^Unnamed')]
        
        data = data.melt(id_vars=['Country Name'], var_name='Year', value_name='Value')
        
        data['Year'] = pd.to_numeric(data['Year'], errors='coerce')
        data = data.dropna(subset=['Year', 'Value'])
        
        data = data.pivot(index='Year', columns='Country Name', values='Value')
        
        data = data.dropna(how='all')
        
        print(f"Processed data for {indicator_type}:\n", data.head())  
        
        return data
    except Exception as err:
        print(f"Error encountered during {indicator_type} processing: {err}")
