## Importing Relevant Libraries

In [1]:
import pandas as pd
import numpy as np
import os
from IPython.display import display

## Set working directory for excel

In [2]:
os.chdir('C:/Users/nayrb/Downloads/')
print(os.getcwd())

C:\Users\nayrb\Downloads


### Cleaning and formatting data

In [3]:
#Importing the data
filename = "International_Financial_Statistics.xlsx"
df = pd.read_excel(filename, na_filter = False,header=0)

#Cleaning the data
df = df.transpose()
df.columns = df.iloc[1]
df = df.rename_axis('Date')
df = df[df.index.str.match(r'^[A-Z][a-z]{2} \d{4}$')]

df = df.drop(df.index[1],axis = 0)
df = df.iloc[1:, 0:].apply(pd.to_numeric) #Changing the data to numeric datatype

#Separating germany and USA data from the dataframes
df_germany = df.iloc[:,0:4]
df_usa = df.iloc[:,4:8]

#Changing the index to date time format
df_germany.index = pd.to_datetime(df_germany.index, format='%b %Y')
df_usa.index = pd.to_datetime(df_germany.index, format='%b %Y')

### Constructing Germany Dataframe

In [4]:
#Inserting new column for monthly growth in nominal exchange rate

df_germany.insert(loc=2, column='Nominal_Monthly_Growth_Exchange_Rate', value = df_germany['Exchange Rates, US Dollar per Domestic Currency, Period Average, Rate'].pct_change() * 100)

#Inserting new column for real exchange rate

#Real Exchange rate = Nominal Exchange Rate in marks/dollars * germany price level / US price level
germany_real_exchange_rate = df_germany['Exchange Rates, US Dollar per Domestic Currency, Period Average, Rate'] * df_germany['Prices, Consumer Price Index, All items, Index'] /df_usa['Prices, Consumer Price Index, All items, Index']
df_germany.insert(loc=5, column='Real_Monthly_Exchange_Rate', value = germany_real_exchange_rate)

#Inserting new column for monthly growth in real exchange rate
df_germany.insert(loc=6, column='Real_Monthly_Growth_Exchange_Rate', value =df_germany['Real_Monthly_Exchange_Rate'].pct_change() * 100)

#Inserting new column for The monthly growth in industrial production
df_germany.insert(loc=1, column='Monthly_Growth_Industrial_Production', value = df_germany['Economic Activity, Industrial Production, Index'].pct_change() * 100)

#Inserting new column for The growth in industrial production versus 12 months ago
df_germany.insert(loc=1, column='Yearly_Growth_Industrial_Production', value = df_germany['Economic Activity, Industrial Production, Index'].pct_change(12) * 100)

#Setting an index of the value of national reserves

df_germany.insert(loc=7, column='Index_National_Reserves', value = (df_germany['International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar'] / df_germany['International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar'].iloc[0] * 100).fillna(100))

### Constructing USA Dataframe

In [5]:
#Inserting new column for the monthly inflation rate
df_usa.insert(loc=4, column='Monthly_Inflation_Rate', value = df_usa["Prices, Consumer Price Index, All items, Index"].pct_change() * 100)

#Setting an index of the value of national reserves
df_usa.insert(loc=3, column='Index_National_Reserves', value = (df_usa['International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar'] / df_usa['International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar'].iloc[0] * 100).fillna(100))

In [6]:
#Lets view the germany data frame
df_germany

Unnamed: 1,"Economic Activity, Industrial Production, Index",Yearly_Growth_Industrial_Production,Monthly_Growth_Industrial_Production,"Exchange Rates, US Dollar per Domestic Currency, Period Average, Rate",Nominal_Monthly_Growth_Exchange_Rate,"International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar","Prices, Consumer Price Index, All items, Index",Index_National_Reserves,Real_Monthly_Exchange_Rate,Real_Monthly_Growth_Exchange_Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1960-03-01,32.225254,,,0.238095,,4966.456016,24.477068,100.000000,0.432245,
1960-04-01,34.310512,,6.470881,0.238095,0.000000,5236.120624,24.616929,105.429719,0.433241,0.230473
1960-05-01,33.749096,,-1.636279,0.238095,0.000000,5417.091847,24.686864,109.073589,0.434472,0.284096
1960-06-01,34.422795,,1.996198,0.238095,0.000000,5843.766907,24.686864,117.664727,0.433004,-0.337838
1960-07-01,32.369618,,-5.964585,0.238095,0.000000,6124.728538,24.686864,123.321912,0.433004,0.000000
...,...,...,...,...,...,...,...,...,...,...
1990-08-01,76.015669,6.017897,-3.590683,0.636659,4.405679,72425.738573,67.556702,1458.298198,0.712667,3.775943
1990-09-01,86.554241,5.514275,13.863684,0.637064,0.063706,73197.573621,67.766509,1473.839160,0.709406,-0.457573
1990-10-01,92.705752,6.152998,7.107116,0.656470,3.046019,75011.926830,68.256049,1510.371311,0.731883,3.168449
1990-11-01,89.521723,5.580779,-3.434553,0.672495,2.441157,76166.074709,68.116177,1533.610173,0.746535,2.002014


In [7]:
#Lets view the usa data frame
df_usa


Unnamed: 1,"Economic Activity, Industrial Production, Index","Exchange Rates, US Dollar per Domestic Currency, Period Average, Rate","International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar",Index_National_Reserves,"Prices, Consumer Price Index, All items, Index",Monthly_Inflation_Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1960-03-01,26.883139,1.0,21344.700000,100.000000,13.482806,
1960-04-01,26.671183,1.0,21278.000000,99.687510,13.528666,0.340136
1960-05-01,26.609362,1.0,21234.300000,99.482776,13.528666,0.000000
1960-06-01,26.671183,1.0,21169.800000,99.180593,13.574526,0.338983
1960-07-01,25.063847,1.0,20999.400000,98.382268,13.574526,0.000000
...,...,...,...,...,...,...
1990-08-01,69.548201,1.0,78908.838357,369.688205,60.351608,0.920245
1990-09-01,70.078092,1.0,80024.166133,374.913520,60.856066,0.835866
1990-10-01,69.133119,1.0,82852.196532,388.162853,61.222946,0.602864
1990-11-01,67.340321,1.0,83059.402774,389.133615,61.360525,0.224719


## Identifying Outliers

In [8]:
#The code below attempts to remove the outliers and set the values as NA
def get_outliers_lists(df):
    outliers_df = pd.DataFrame()
    for column in df.select_dtypes(include=[np.number]).columns:
        Q1 = df[column].quantile(0.25)
        Q3 = df[column].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        outlier_values = df[(df[column] < lower_bound) | (df[column] > upper_bound)][column].tolist()
        
        if outlier_values:  # Check if there are any outliers
            outlier_values_df = pd.DataFrame({'Outlier_Value': outlier_values, 'Outlier_Column': column})
            outlier_values_df.index = df.index[df[column].isin(outlier_values)]
            outliers_df = pd.concat([outliers_df, outlier_values_df])
    
    return outliers_df

**Explaining the function**

$$IQR = Interquartile\,range$$

Our criteria for an outlier is adopted from the Tukey's fences method. This refers to data points that lie 1.5 * IQR below the first quartile (Q1) or above the third quartile (Q3).

This function goes through data from columns which are of the numerical datatype and classifies them based on the criteria above and adds them to a list named outlier_values.

The IF function then ascertains if there are any outliers and subsequently adds them to a dataframe with the index being the data

In [9]:
def replace_outliers_with_na(df, outliers_df):
    modified_df = df.copy()  # Create a copy of the original DataFrame to modify
    
    for index, row in outliers_df.iterrows():
        modified_df.loc[index, row['Outlier_Column']] = np.nan
    return modified_df

**Explaining the function**

This function takes 2 arguments: the dataframe we want to augment and the list of outliers. We structured our code in this format because it allows us to change the outlier dataframe if needed. Some of the outliers might not be "true" outliers, hence some discretion is available.

This function iterates through every row and sets any outliers as NA

In [10]:
#Creating a df of outliers for germany
germany_outliers = get_outliers_lists(df_germany)
germany_outliers

Unnamed: 0_level_0,Outlier_Value,Outlier_Column
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1968-06-01,21.671598,Yearly_Growth_Industrial_Production
1968-08-01,19.179229,Yearly_Growth_Industrial_Production
1968-11-01,17.929790,Yearly_Growth_Industrial_Production
1969-01-01,17.477204,Yearly_Growth_Industrial_Production
1969-02-01,17.892465,Yearly_Growth_Industrial_Production
...,...,...
1989-05-01,-4.537512,Real_Monthly_Growth_Exchange_Rate
1989-07-01,4.219568,Real_Monthly_Growth_Exchange_Rate
1989-10-01,4.445080,Real_Monthly_Growth_Exchange_Rate
1989-12-01,5.282682,Real_Monthly_Growth_Exchange_Rate


In [11]:
#Creating a df of outliers for USA
usa_outliers = get_outliers_lists(df_usa)
usa_outliers

Unnamed: 0_level_0,Outlier_Value,Outlier_Column
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1989-07-01,63462.210506,"International Reserves and Liquidity, Reserves..."
1989-08-01,62363.845992,"International Reserves and Liquidity, Reserves..."
1989-09-01,68417.495745,"International Reserves and Liquidity, Reserves..."
1989-10-01,70808.689361,"International Reserves and Liquidity, Reserves..."
1989-11-01,72559.751651,"International Reserves and Liquidity, Reserves..."
1989-12-01,74608.633269,"International Reserves and Liquidity, Reserves..."
1990-01-01,75506.361462,"International Reserves and Liquidity, Reserves..."
1990-02-01,74173.524868,"International Reserves and Liquidity, Reserves..."
1990-03-01,76303.02854,"International Reserves and Liquidity, Reserves..."
1990-04-01,76283.21061,"International Reserves and Liquidity, Reserves..."


Although there might be outliers in the data. It would be asinine to simply set these values as NA without understanding the circumstances behind them. Hence, we decided to extract the values into a dataframe and analyse them. Based on this we realised that the period of outliers was from XX to YY
We conclude that this was due to ZZ.
Hence we decided to remove the data as this was a result of {insert supply shock,etc here}

In [12]:
#Creating a separate dataframe setting extreme values as NA for germany
df_modified_germany = replace_outliers_with_na(df_germany,germany_outliers)
df_modified_germany

Unnamed: 1,"Economic Activity, Industrial Production, Index",Yearly_Growth_Industrial_Production,Monthly_Growth_Industrial_Production,"Exchange Rates, US Dollar per Domestic Currency, Period Average, Rate",Nominal_Monthly_Growth_Exchange_Rate,"International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar","Prices, Consumer Price Index, All items, Index",Index_National_Reserves,Real_Monthly_Exchange_Rate,Real_Monthly_Growth_Exchange_Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1960-03-01,32.225254,,,0.238095,,4966.456016,24.477068,100.000000,0.432245,
1960-04-01,34.310512,,6.470881,0.238095,0.000000,5236.120624,24.616929,105.429719,0.433241,0.230473
1960-05-01,33.749096,,-1.636279,0.238095,0.000000,5417.091847,24.686864,109.073589,0.434472,0.284096
1960-06-01,34.422795,,1.996198,0.238095,0.000000,5843.766907,24.686864,117.664727,0.433004,-0.337838
1960-07-01,32.369618,,-5.964585,0.238095,0.000000,6124.728538,24.686864,123.321912,0.433004,0.000000
...,...,...,...,...,...,...,...,...,...,...
1990-08-01,76.015669,6.017897,-3.590683,0.636659,,72425.738573,67.556702,1458.298198,0.712667,
1990-09-01,86.554241,5.514275,13.863684,0.637064,0.063706,73197.573621,67.766509,1473.839160,0.709406,-0.457573
1990-10-01,92.705752,6.152998,7.107116,0.656470,3.046019,75011.926830,68.256049,1510.371311,0.731883,3.168449
1990-11-01,89.521723,5.580779,-3.434553,0.672495,2.441157,76166.074709,68.116177,1533.610173,0.746535,2.002014


In [13]:
#Creating a separate dataframe setting extreme values as NA for USA
df_modified_usa = replace_outliers_with_na(df_usa, usa_outliers)
df_modified_usa

Unnamed: 1,"Economic Activity, Industrial Production, Index","Exchange Rates, US Dollar per Domestic Currency, Period Average, Rate","International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar",Index_National_Reserves,"Prices, Consumer Price Index, All items, Index",Monthly_Inflation_Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1960-03-01,26.883139,1.0,21344.7,100.000000,13.482806,
1960-04-01,26.671183,1.0,21278.0,99.687510,13.528666,0.340136
1960-05-01,26.609362,1.0,21234.3,99.482776,13.528666,0.000000
1960-06-01,26.671183,1.0,21169.8,99.180593,13.574526,0.338983
1960-07-01,25.063847,1.0,20999.4,98.382268,13.574526,0.000000
...,...,...,...,...,...,...
1990-08-01,69.548201,1.0,,,60.351608,0.920245
1990-09-01,70.078092,1.0,,,60.856066,0.835866
1990-10-01,69.133119,1.0,,,61.222946,0.602864
1990-11-01,67.340321,1.0,,,61.360525,0.224719


## Interpolating Missing Data

In [14]:
#Creating function to interpolate data

def interpolate_missing_data(df):
    interpolated_df = df.copy()
    
    for column in interpolated_df.select_dtypes(include=[np.number]).columns:
        if interpolated_df[column].isnull().any():
            interpolated_df[column] = interpolated_df[column].interpolate(method='linear', limit_direction='both')
    
    return interpolated_df

**Explaining Interpolating Function**

This function basically creates a copy of the dataframe we input and then iterates through each column (which are numerical). It then interpolates the data with the interpolate() function, using specific arguments given in order to calculate the average values based on the next and previous data points.

In [15]:
#Interpolating missing data for germany
interpolate_missing_data(df_modified_germany)

Unnamed: 1,"Economic Activity, Industrial Production, Index",Yearly_Growth_Industrial_Production,Monthly_Growth_Industrial_Production,"Exchange Rates, US Dollar per Domestic Currency, Period Average, Rate",Nominal_Monthly_Growth_Exchange_Rate,"International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar","Prices, Consumer Price Index, All items, Index",Index_National_Reserves,Real_Monthly_Exchange_Rate,Real_Monthly_Growth_Exchange_Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1960-03-01,32.225254,12.493778,6.470881,0.238095,0.000000,4966.456016,24.477068,100.000000,0.432245,0.230473
1960-04-01,34.310512,12.493778,6.470881,0.238095,0.000000,5236.120624,24.616929,105.429719,0.433241,0.230473
1960-05-01,33.749096,12.493778,-1.636279,0.238095,0.000000,5417.091847,24.686864,109.073589,0.434472,0.284096
1960-06-01,34.422795,12.493778,1.996198,0.238095,0.000000,5843.766907,24.686864,117.664727,0.433004,-0.337838
1960-07-01,32.369618,12.493778,-5.964585,0.238095,0.000000,6124.728538,24.686864,123.321912,0.433004,0.000000
...,...,...,...,...,...,...,...,...,...,...
1990-08-01,76.015669,6.017897,-3.590683,0.636659,1.376447,72425.738573,67.556702,1458.298198,0.712667,0.918934
1990-09-01,86.554241,5.514275,13.863684,0.637064,0.063706,73197.573621,67.766509,1473.839160,0.709406,-0.457573
1990-10-01,92.705752,6.152998,7.107116,0.656470,3.046019,75011.926830,68.256049,1510.371311,0.731883,3.168449
1990-11-01,89.521723,5.580779,-3.434553,0.672495,2.441157,76166.074709,68.116177,1533.610173,0.746535,2.002014


In [16]:
#Interpolating missing data for germany
interpolate_missing_data(df_modified_usa)

Unnamed: 1,"Economic Activity, Industrial Production, Index","Exchange Rates, US Dollar per Domestic Currency, Period Average, Rate","International Reserves and Liquidity, Reserves, Official Reserve Assets, US Dollar",Index_National_Reserves,"Prices, Consumer Price Index, All items, Index",Monthly_Inflation_Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1960-03-01,26.883139,1.0,21344.700000,100.000000,13.482806,0.340136
1960-04-01,26.671183,1.0,21278.000000,99.687510,13.528666,0.340136
1960-05-01,26.609362,1.0,21234.300000,99.482776,13.528666,0.000000
1960-06-01,26.671183,1.0,21169.800000,99.180593,13.574526,0.338983
1960-07-01,25.063847,1.0,20999.400000,98.382268,13.574526,0.000000
...,...,...,...,...,...,...
1990-08-01,69.548201,1.0,60503.412504,283.458716,60.351608,0.920245
1990-09-01,70.078092,1.0,60503.412504,283.458716,60.856066,0.835866
1990-10-01,69.133119,1.0,60503.412504,283.458716,61.222946,0.602864
1990-11-01,67.340321,1.0,60503.412504,283.458716,61.360525,0.224719


# Analysis

'C:\\Users\\nayrb\\Downloads'