In [6]:
import pandas as pd 
import psycopg2
from sqlalchemy import create_engine

In [7]:
df = pd.read_csv("Trade_DetailedTradeMatrix_E_All_Data_(Normalized).csv", encoding='ISO-8859-1')
df.head()

Unnamed: 0,Reporter Country Code,Reporter Countries,Partner Country Code,Partner Countries,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag
0,2,Afghanistan,4,Algeria,230,"Cashew nuts, shelled",5910,Export Quantity,2016,2016,tonnes,3.0,*
1,2,Afghanistan,4,Algeria,230,"Cashew nuts, shelled",5922,Export Value,2016,2016,1000 US$,23.0,*
2,2,Afghanistan,4,Algeria,1293,Crude materials,5922,Export Value,2015,2015,1000 US$,1.0,*
3,2,Afghanistan,4,Algeria,1293,Crude materials,5922,Export Value,2016,2016,1000 US$,1.0,*
4,2,Afghanistan,4,Algeria,1293,Crude materials,5922,Export Value,2017,2017,1000 US$,5.0,R


In [8]:
df.dtypes 

Reporter Country Code      int64
Reporter Countries        object
Partner Country Code       int64
Partner Countries         object
Item Code                  int64
Item                      object
Element Code               int64
Element                   object
Year Code                  int64
Year                       int64
Unit                      object
Value                    float64
Flag                      object
dtype: object

In [9]:
unwanted_columns = [ 
    "Flag","Year Code","Element Code", "Item Code"
]

df_clean_step_1 = df.drop(columns=unwanted_columns)
df_clean_step_1.head()

Unnamed: 0,Reporter Country Code,Reporter Countries,Partner Country Code,Partner Countries,Item,Element,Year,Unit,Value
0,2,Afghanistan,4,Algeria,"Cashew nuts, shelled",Export Quantity,2016,tonnes,3.0
1,2,Afghanistan,4,Algeria,"Cashew nuts, shelled",Export Value,2016,1000 US$,23.0
2,2,Afghanistan,4,Algeria,Crude materials,Export Value,2015,1000 US$,1.0
3,2,Afghanistan,4,Algeria,Crude materials,Export Value,2016,1000 US$,1.0
4,2,Afghanistan,4,Algeria,Crude materials,Export Value,2017,1000 US$,5.0


In [10]:
check_list = df_clean_step_1['Item'].value_counts()
check_list

Food prep nes               762746
Crude materials             642780
Pastry                      530694
Sugar confectionery         509540
Fruit, prepared nes         494784
                             ...  
Sugar flavoured                  4
Ghee, buffalo milk               4
Cake, hempseed                   2
Offals, other camelids           2
Hides, camel, wet salted         2
Name: Item, Length: 426, dtype: int64

In [11]:
no_support_data = [
    'Crude materials',
    'Cigarettes',
    'Pastry',
    'Food prep nes',
    'Fruit, prepared nes',
    'Beverages, non alcoholic',
    'Beverages, distilled alcoholic',
    'Beverages, fermented rice',
    'Beer of barley',
    'Beer of sorghum',
    'Bread', 
    'Infant food',
    'Wine',
    'Waters,ice etc',
    'Vitamins',
    'Wafers',
    'Oil, boiled etc',
    'Mixes and doughs',
    'Food Wastes',
    'Food preparations, flour, malt extract',
    'Cigars, cheroots',
    'Chocolate products nes']

df_clean_step_2 = df_clean_step_1[df_clean_step_1.Item.isin(no_support_data) == False]
df_clean_step_2.head()

Unnamed: 0,Reporter Country Code,Reporter Countries,Partner Country Code,Partner Countries,Item,Element,Year,Unit,Value
0,2,Afghanistan,4,Algeria,"Cashew nuts, shelled",Export Quantity,2016,tonnes,3.0
1,2,Afghanistan,4,Algeria,"Cashew nuts, shelled",Export Value,2016,1000 US$,23.0
5,2,Afghanistan,4,Algeria,Raisins,Export Quantity,2014,tonnes,12.0
6,2,Afghanistan,4,Algeria,Raisins,Export Value,2014,1000 US$,27.0
7,2,Afghanistan,4,Algeria,Spices nes,Export Quantity,2014,tonnes,0.0


In [12]:
df_clean_step_3 = df_clean_step_2[df_clean_step_2.Year > 2010]

In [13]:
df_clean_step_3['Year'].value_counts()

2018    1656307
2019    1640084
2017    1639541
2016    1576096
2015    1545361
2014    1523543
2013    1370458
2012    1321414
2011    1242081
Name: Year, dtype: int64

In [14]:
df_clean_step_4 = df_clean_step_3.rename(columns={
    'Reporter Country Code': 'rep_country_code',
    'Reporter Countries' : 'rep_countries',
    'Partner Country Code' : 'par_country_code',
    'Partner Countries' : 'par_countries',
    'Item' : 'item',
    'Element' : 'element',
    'Year' : 'year',
    'Unit' : 'unit',
    'Value' : 'value'
})
df_clean_step_4

Unnamed: 0,rep_country_code,rep_countries,par_country_code,par_countries,item,element,year,unit,value
0,2,Afghanistan,4,Algeria,"Cashew nuts, shelled",Export Quantity,2016,tonnes,3.0
1,2,Afghanistan,4,Algeria,"Cashew nuts, shelled",Export Value,2016,1000 US$,23.0
5,2,Afghanistan,4,Algeria,Raisins,Export Quantity,2014,tonnes,12.0
6,2,Afghanistan,4,Algeria,Raisins,Export Value,2014,1000 US$,27.0
7,2,Afghanistan,4,Algeria,Spices nes,Export Quantity,2014,tonnes,0.0
...,...,...,...,...,...,...,...,...,...
39473863,181,Zimbabwe,251,Zambia,"Yoghurt, concentrated or not",Import Value,2016,1000 US$,123.0
39473868,181,Zimbabwe,251,Zambia,"Yoghurt, concentrated or not",Export Quantity,2015,tonnes,2.0
39473869,181,Zimbabwe,251,Zambia,"Yoghurt, concentrated or not",Export Quantity,2019,tonnes,76.0
39473874,181,Zimbabwe,251,Zambia,"Yoghurt, concentrated or not",Export Value,2015,1000 US$,5.0


In [16]:
df_clean_step_4 = df_clean_step_4.reset_index()


In [17]:
df_clean_step_4.head()

Unnamed: 0,index,rep_country_code,rep_countries,par_country_code,par_countries,item,element,year,unit,value
0,0,2,Afghanistan,4,Algeria,"Cashew nuts, shelled",Export Quantity,2016,tonnes,3.0
1,1,2,Afghanistan,4,Algeria,"Cashew nuts, shelled",Export Value,2016,1000 US$,23.0
2,5,2,Afghanistan,4,Algeria,Raisins,Export Quantity,2014,tonnes,12.0
3,6,2,Afghanistan,4,Algeria,Raisins,Export Value,2014,1000 US$,27.0
4,7,2,Afghanistan,4,Algeria,Spices nes,Export Quantity,2014,tonnes,0.0


In [18]:
df_clean_step_4.to_csv('trade_2010', index=False, header=True)