# Usual Imports

In [135]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

## Read in GDP data

In [136]:
df_gdp = pd.read_csv('fake_data/fake_gdp.csv')
df_gdp.head()

Unnamed: 0,Country,Country_code,2017,2018,2019,2020,2021
0,Australia,AUS,69,14,13,70,86
1,Belgium,BEL,92,87,40,50,90
2,Canada,CAN,33,28,60,17,19
3,Zimbabwe,ZIM,68,17,12,79,51


## Set the index columns

In [137]:
df_gdp.set_index(["Country","Country_code"], inplace=True)

## Use unstack() and stack() to pivot those that are not index columns, and create a new df

In [138]:
new_gdp = df_gdp.unstack().stack(level=0)
new_gdp.head(10)

Unnamed: 0_level_0,Country_code,AUS,BEL,CAN,ZIM
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Australia,2017,69.0,,,
Australia,2018,14.0,,,
Australia,2019,13.0,,,
Australia,2020,70.0,,,
Australia,2021,86.0,,,
Belgium,2017,,92.0,,
Belgium,2018,,87.0,,
Belgium,2019,,40.0,,
Belgium,2020,,50.0,,
Belgium,2021,,90.0,,


## Reset the index

In [139]:
new_gdp.reset_index(inplace=True)
new_gdp.head()

Country_code,Country,level_1,AUS,BEL,CAN,ZIM
0,Australia,2017,69.0,,,
1,Australia,2018,14.0,,,
2,Australia,2019,13.0,,,
3,Australia,2020,70.0,,,
4,Australia,2021,86.0,,,


## Fename the Year column

In [140]:
new_gdp.rename(columns={'level_1': 'Year'}, inplace=True)

## Fill the new null values

In [141]:
new_gdp.fillna(0)

Country_code,Country,Year,AUS,BEL,CAN,ZIM
0,Australia,2017,69.0,0.0,0.0,0.0
1,Australia,2018,14.0,0.0,0.0,0.0
2,Australia,2019,13.0,0.0,0.0,0.0
3,Australia,2020,70.0,0.0,0.0,0.0
4,Australia,2021,86.0,0.0,0.0,0.0
5,Belgium,2017,0.0,92.0,0.0,0.0
6,Belgium,2018,0.0,87.0,0.0,0.0
7,Belgium,2019,0.0,40.0,0.0,0.0
8,Belgium,2020,0.0,50.0,0.0,0.0
9,Belgium,2021,0.0,90.0,0.0,0.0


## Sum the last four columns (those with the annual GDP data)

In [142]:
new_gdp['Total_GDP']= new_gdp.iloc[:, -4:-1].sum(axis=1)
new_gdp.head()

Country_code,Country,Year,AUS,BEL,CAN,ZIM,Total_GDP
0,Australia,2017,69.0,,,,69.0
1,Australia,2018,14.0,,,,14.0
2,Australia,2019,13.0,,,,13.0
3,Australia,2020,70.0,,,,70.0
4,Australia,2021,86.0,,,,86.0


## Drop the unneccessary columns

In [143]:
new_gdp.drop(columns=['AUS','BEL','CAN','ZIM'], inplace=True)
new_gdp.head(10)

Country_code,Country,Year,Total_GDP
0,Australia,2017,69.0
1,Australia,2018,14.0
2,Australia,2019,13.0
3,Australia,2020,70.0
4,Australia,2021,86.0
5,Belgium,2017,92.0
6,Belgium,2018,87.0
7,Belgium,2019,40.0
8,Belgium,2020,50.0
9,Belgium,2021,90.0


## Change the 'Year' from an object to int

In [144]:
new_gdp['Year']=new_gdp['Year'].astype(int)

## Read in the refugee data

In [145]:
df_ref = pd.read_csv('fake_data/fake_ref.csv')
df_ref.head(10)

Unnamed: 0,Country,Country_code,Year,Refugees,Displaced Persons
0,Australia,AUS,2017,851,372
1,Belgium,BEL,2017,380,998
2,Canada,CAN,2017,525,162
3,Zimbabwe,ZIM,2017,740,210
4,Australia,AUS,2018,544,728
5,Belgium,BEL,2018,94,244
6,Canada,CAN,2018,812,708
7,Zimbabwe,ZIM,2018,267,357
8,Australia,AUS,2019,92,941
9,Belgium,BEL,2019,986,199


## Merge the GDP data to the Refugee Data

In [149]:
combo_df = pd.merge(df_ref, new_gdp, how="left", on=['Country','Year'])

## Export combined dataframe

In [150]:
combo_df.to_csv('fake_data/combined_data.csv')