In [1]:
import pandas as pd
from pandas import DataFrame
from typing import Set, Any


def remove_others(df: DataFrame, columns: Set[Any]):
    cols_total: Set[Any] = set(df.columns)
    diff: Set[Any] = cols_total - columns
    df.drop(diff, axis=1, inplace=True)


# Importing the csv file as a dataframe

df_Codes = pd.read_csv('Country codes.csv')
df_Codes

# Rempoving unwanted columns
remove_others(df_Codes, {"alpha3", "name"})

# Renaming columns
df_Codes = df_Codes.rename(columns={"alpha3": "Country code"})
df_Codes = df_Codes.rename(columns={"name": "Country Name"})
df_Codes


Unnamed: 0,Country code,Country Name
0,afg,Afghanistan
1,ala,Åland Islands
2,alb,Albania
3,dza,Algeria
4,asm,American Samoa
...,...,...
244,wlf,Wallis and Futuna
245,esh,Western Sahara
246,yem,Yemen
247,zmb,Zambia


In [2]:
# Importing another csv file and renaming some columns

df_Tax = pd.read_csv('World Tax rates data.csv')
df_Tax = df_Tax.rename(columns={"country": "Country Name", "incomeTax": "Income Tax", "salesTax": "Sales Tax", "corpTax": "Corporation Tax"})

df_Tax


Unnamed: 0,Country Name,Income Tax,Sales Tax,Corporation Tax
0,Ivory Coast,60,18.0,25.0
1,Finland,56,24.0,20.0
2,Japan,55,10.0,30.0
3,Austria,55,20.0,25.0
4,Denmark,55,25.0,22.0
...,...,...,...,...
146,Bahrain,0,5.0,0.0
147,Brunei,0,,18.0
148,Bahamas,0,12.0,0.0
149,Cayman Islands,0,,0.0


In [35]:
from functools import reduce

# Combining the country codes dataframe and the tax dataframe
dfs = [df_Codes,df_Tax]
df_merged = reduce(lambda left, right: pd.merge(left, right, on=['Country Name'], how='outer'), dfs)


In [36]:
df_merged
# Checking it worked

Unnamed: 0,Country code,Country Name,Income Tax,Sales Tax,Corporation Tax
0,afg,Afghanistan,20.0,10.0,20.0
1,ala,Åland Islands,,,
2,alb,Albania,23.0,20.0,15.0
3,dza,Algeria,35.0,19.0,26.0
4,asm,American Samoa,,,
...,...,...,...,...,...
263,,Russia,13.0,20.0,20.0
264,,Bolivia,13.0,13.0,25.0
265,,Moldova,12.0,20.0,12.0
266,,Macau,12.0,,12.0


In [61]:
# Droping rows which contain NaN
df_clean = df_merged.dropna(subset=['Income Tax', 'Sales Tax', 'Corporation Tax'], how='all')
df_clean = df_clean.reset_index(drop=True)
df_clean.head(5)


Unnamed: 0,Country code,Country Name,Income Tax,Sales Tax,Corporation Tax
0,afg,Afghanistan,20.0,10.0,20.0
1,alb,Albania,23.0,20.0,15.0
2,dza,Algeria,35.0,19.0,26.0
3,ago,Angola,25.0,14.0,25.0
4,arg,Argentina,35.0,21.0,25.0


In [60]:
# Adding the codes for countrys which where not matched 

df_clean.loc[132, 'Country code'] = 'civ'
df_clean.loc[133, 'Country code'] = 'gbr'
df_clean.loc[134, 'Country code'] = 'kor'
df_clean.loc[135, 'Country code'] = 'tur'
df_clean.loc[136, 'Country code'] = 'twn'
df_clean.loc[137, 'Country code'] = 'cod'
df_clean.loc[138, 'Country code'] = 'usa'
df_clean.loc[139, 'Country code'] = 'vnm'
df_clean.loc[140, 'Country code'] = 'ven'
df_clean.loc[141, 'Country code'] = 'tza'
df_clean.loc[142, 'Country code'] = 'lao'
df_clean.loc[143, 'Country code'] = 'tto'
df_clean.loc[144, 'Country code'] = 'cze'
df_clean.loc[145, 'Country code'] = 'syr'
df_clean.loc[146, 'Country code'] = 'rus'
df_clean.loc[147, 'Country code'] = 'bol'
df_clean.loc[148, 'Country code'] = 'mda'
df_clean.loc[149, 'Country code'] = 'mac'
df_clean.loc[150, 'Country code'] = 'brn'

df_clean.tail(30)

Unnamed: 0,Country code,Country Name,Income Tax,Sales Tax,Corporation Tax
121,tjk,Tajikistan,13.0,18.0,
122,tha,Thailand,35.0,7.0,20.0
123,tun,Tunisia,35.0,19.0,15.0
124,uga,Uganda,40.0,18.0,30.0
125,ukr,Ukraine,18.0,20.0,18.0
126,are,United Arab Emirates,0.0,5.0,0.0
127,ury,Uruguay,36.0,22.0,25.0
128,uzb,Uzbekistan,12.0,15.0,7.0
129,yem,Yemen,15.0,5.0,20.0
130,zmb,Zambia,37.0,16.0,35.0


In [69]:
from functools import reduce

dfs = [df_Codes, df_clean]
df_remainingcountries = reduce(lambda left, right: pd.merge(
    left, right, on=['Country code'], how='outer'), dfs)

df_final = df_remainingcountries.drop(columns=['Country Name_x', 'Country Name_y'])
df_final.head(10)


Unnamed: 0,Country code,Income Tax,Sales Tax,Corporation Tax
0,afg,20.0,10.0,20.0
1,ala,,,
2,alb,23.0,20.0,15.0
3,dza,35.0,19.0,26.0
4,asm,,,
5,and,,,
6,ago,25.0,14.0,25.0
7,aia,,,
8,ata,,,
9,atg,,,


In [78]:
indexNames = df_final[df_final['Country code'] == "ata"].index # Removing Antarctica
df_final.drop(indexNames, inplace=True)

# Exporting dataframe to a csv file
df_final.to_csv('Tax and country code data.csv', encoding='utf-8', index=False)


Unnamed: 0,Country code,Income Tax,Sales Tax,Corporation Tax
0,afg,20.0,10.0,20.0
1,ala,,,
2,alb,23.0,20.0,15.0
3,dza,35.0,19.0,26.0
4,asm,,,
...,...,...,...,...
263,,13.0,20.0,20.0
264,,13.0,13.0,25.0
265,,12.0,20.0,12.0
266,,12.0,,12.0
