In [47]:
import pandas as pd
import numpy as np 
import matplotlib as mpl
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore')

In [48]:
df_inflows = pd.read_csv("..\Datasets\TotalFDIInflows_2016_2020_ValueUS$_byCountry.csv")
df_outflows = pd.read_csv("..\Datasets\TotalFDIOutflows_2016_2020_ValueUS$_byCountry.csv")


In [49]:
cols = ["Area Code (M49)", "Area", "Item", "Year", "Value", "Unit"]
df_inflows_trimmed = df_inflows[cols]
df_outflows_trimmed = df_outflows[cols]

df_outflows_trimmed

Unnamed: 0,Area Code (M49),Area,Item,Year,Value,Unit
0,4,Afghanistan,Total FDI outflows,2016,14.780000,millions
1,4,Afghanistan,Total FDI outflows,2017,10.800000,millions
2,4,Afghanistan,Total FDI outflows,2018,40.530000,millions
3,4,Afghanistan,Total FDI outflows,2019,25.946667,millions
4,4,Afghanistan,Total FDI outflows,2020,37.112627,millions
...,...,...,...,...,...,...
843,716,Zimbabwe,Total FDI outflows,2016,28.796690,millions
844,716,Zimbabwe,Total FDI outflows,2017,42.231841,millions
845,716,Zimbabwe,Total FDI outflows,2018,26.771877,millions
846,716,Zimbabwe,Total FDI outflows,2019,30.500000,millions


In [50]:
## Dealing with NA/Empty Cells

# Method 1 => Removing all empty rows
def remove_NaN_rows(df_in: pd.DataFrame, val_col_name: str):
    df_out = df_in.loc[df_in[val_col_name].notna(), :]
    return df_out

# Method 2 => Replacing NaN with mean value
def replace_NaN_with_mean(df_in: pd.DataFrame, val_col_name: str):
    ...

df_inflows_trimmed = remove_NaN_rows(df_in=df_inflows_trimmed, val_col_name="Value")
df_outflows_trimmed = remove_NaN_rows(df_in=df_outflows_trimmed, val_col_name="Value")
# df1 = replace_NaN_with_mean(df_in=df1, val_col_name="Value")

In [51]:
## Transforming Data
year_l = [2016, 2017, 2018, 2019, 2020]
"""
Steps:
    1) Generate dictionary: k == Year, v == list of values corresponding to the year
    2) Create 5 new cols for each Year
    3) Assign values into the 5 cols based on dictionary
    4) Drop unnecessary cols
    5) Shift values into a single row and remove extra rows
"""
def transform_data(df1: pd.DataFrame):
    

    d = {}
    for y in year_l:
        d[y] = df1.loc[df1["Year"] == y, "Value"]

    for j in range(len(year_l)):
        col_name = f"{df1['Item'][0]} {year_l[j]}"
        df1[col_name] = 0


    for k,v in d.items():
        df1.loc[v.index, f"{df1['Item'][0]} {str(k)}"] = v

    df1 = df1.drop(["Item", "Year", "Value"], axis=1, inplace=False)

    columns = df1.columns[-5:]
    df1[columns[1]] = df1[columns[1]].shift(periods=-1, fill_value="0")
    df1[columns[2]] = df1[columns[2]].shift(periods=-2, fill_value="0")
    df1[columns[3]] = df1[columns[3]].shift(periods=-3, fill_value="0")
    df1[columns[4]] = df1[columns[4]].shift(periods=-4, fill_value="0")
    df1 = df1.iloc[::5, 0:].reset_index(drop=True)

    return df1

In [52]:
df_inflows_transformed = transform_data(df1=df_inflows_trimmed)
df_outflows_transformed = transform_data(df1=df_outflows_trimmed)
df_outflows_transformed = df_outflows_transformed.drop(["Area", "Unit"], axis=1)
net_fdi_df = df_inflows_transformed.merge(df_outflows_transformed, on="Area Code (M49)", how="outer")

net_fdi_df

Unnamed: 0,Area Code (M49),Area,Unit,Total FDI inflows 2016,Total FDI inflows 2017,Total FDI inflows 2018,Total FDI inflows 2019,Total FDI inflows 2020,Total FDI outflows 2016,Total FDI outflows 2017,Total FDI outflows 2018,Total FDI outflows 2019,Total FDI outflows 2020
0,4,Afghanistan,millions,93.590000,53.39,119.44,38.533333,12.970492,14.780000,10.8,40.53,25.946667,37.112627
1,8,Albania,millions,1100.671891,1148.891224,1289.690235,1287.978184,1106.560092,64.200483,26.197174,82.56079,127.855221,89.433996
2,12,Algeria,millions,1636.299236,1232.341924,1466.084654,1381.811818,1125.414826,46.202384,-28.605242,845.268059,30.989854,15.518489
3,24,Angola,millions,-179.517619,-7397.295409,-6456.076413,-4098.478748,-1866.468113,273.005000,1352.005057,5.714168,-2349.425835,90.514469
4,660,Anguilla,millions,60.253704,53.534815,55.784444,125.088889,26.304077,-1.646667,-0.57963,-0.556296,-0.927531,-0.902742
...,...,...,...,...,...,...,...,...,...,...,...,...,...
196,704,Viet Nam,millions,12600.000000,14100.0,15500.0,16120.0,15800.0,0.000000,0.0,0.0,0.0,0.0
197,887,Yemen,millions,-561.000000,-269.85,-282.098333,-370.982778,0.0,0.000000,0.0,0.0,0.0,0.0
198,894,Zambia,millions,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0
199,716,Zimbabwe,millions,0.000000,0.0,0.0,0.0,0,0.000000,0.0,0.0,0,0


In [54]:
## Finding the net FDI

net_fdi_df.columns
numeric_cols = ['Total FDI inflows 2016','Total FDI inflows 2017', 'Total FDI inflows 2018','Total FDI inflows 2019', 'Total FDI inflows 2020','Total FDI outflows 2016', 'Total FDI outflows 2017','Total FDI outflows 2018', 'Total FDI outflows 2019','Total FDI outflows 2020']
net_fdi_df[numeric_cols] = net_fdi_df[numeric_cols].apply(pd.to_numeric, errors="ignore")

for y in year_l:
    val = net_fdi_df[f"Total FDI inflows {y}"] - net_fdi_df[f"Total FDI outflows {y}"]
    net_fdi_df[f"Net FDI {y}"] = val

net_fdi_df = net_fdi_df.iloc[:-1, :]
net_fdi_df

Unnamed: 0,Area Code (M49),Area,Unit,Total FDI inflows 2016,Total FDI inflows 2017,Total FDI inflows 2018,Total FDI inflows 2019,Total FDI inflows 2020,Total FDI outflows 2016,Total FDI outflows 2017,Total FDI outflows 2018,Total FDI outflows 2019,Total FDI outflows 2020,Net FDI 2016,Net FDI 2017,Net FDI 2018,Net FDI 2019,Net FDI 2020
0,4,Afghanistan,millions,93.590000,53.390000,119.440000,38.533333,12.970492,14.780000,10.800000,40.530000,25.946667,37.112627,78.810000,42.590000,78.910000,12.586666,-24.142135
1,8,Albania,millions,1100.671891,1148.891224,1289.690235,1287.978184,1106.560092,64.200483,26.197174,82.560790,127.855221,89.433996,1036.471408,1122.694050,1207.129445,1160.122963,1017.126096
2,12,Algeria,millions,1636.299236,1232.341924,1466.084654,1381.811818,1125.414826,46.202384,-28.605242,845.268059,30.989854,15.518489,1590.096852,1260.947166,620.816595,1350.821964,1109.896337
3,24,Angola,millions,-179.517619,-7397.295409,-6456.076413,-4098.478748,-1866.468113,273.005000,1352.005057,5.714168,-2349.425835,90.514469,-452.522619,-8749.300466,-6461.790581,-1749.052913,-1956.982582
4,660,Anguilla,millions,60.253704,53.534815,55.784444,125.088889,26.304077,-1.646667,-0.579630,-0.556296,-0.927531,-0.902742,61.900371,54.114445,56.340740,126.016420,27.206819
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,862,Venezuela (Bolivarian Republic of),millions,1068.000000,-68.000000,886.000000,934.000000,958.666667,0.000000,0.000000,0.000000,0.000000,0.000000,1068.000000,-68.000000,886.000000,934.000000,958.666667
196,704,Viet Nam,millions,12600.000000,14100.000000,15500.000000,16120.000000,15800.000000,0.000000,0.000000,0.000000,0.000000,0.000000,12600.000000,14100.000000,15500.000000,16120.000000,15800.000000
197,887,Yemen,millions,-561.000000,-269.850000,-282.098333,-370.982778,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-561.000000,-269.850000,-282.098333,-370.982778,0.000000
198,894,Zambia,millions,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
