# Project GDP
## GitHub Repo: https://github.com/JackBeerman/GDP

In [1]:
import pandas as pd
import numpy as np
import requests
import os
import zipfile
import io
POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD')
import psycopg2
from sqlalchemy import create_engine

import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

import plotly.figure_factory as ff
import plotly.express as px

In [2]:
### Goal: Have a dashboard that allows us to select a country, country it trades with, view imports and exports, tarrifs
### Goal: predictive analysis of tarrifs and GDP..? too much

## Data

* The World Bank https://data.worldbank.org/
* World Integrated Trade Solution https://wits.worldbank.org/
* World Trade Organization https://www.wto.org/english/res_e/statis_e/statis_e.htm
* Our World in Data https://ourworldindata.org/trade-and-globalization
* International Monetary Fund https://data.imf.org/?sk=9d6028d4-f14a-464c-a2f2-59b2cd424b85
* Human Development Reports https://hdr.undp.org/

## Preprocess World Bank

In [3]:
noncountries = ["Arab World", "Central Europe and the Baltics",
"Caribbean small states",
"East Asia & Pacific (excluding high income)",
"Early-demographic dividend","East Asia & Pacific",
"Europe & Central Asia (excluding high income)",
"Europe & Central Asia", "Euro area",
"European Union","Fragile and conflict affected situations",
"High income",
"Heavily indebted poor countries (HIPC)","IBRD only",
"IDA & IBRD total",
"IDA total","IDA blend","IDA only",
"Latin America & Caribbean (excluding high income)",
"Latin America & Caribbean",
"Least developed countries: UN classification",
"Low income","Lower middle income","Low & middle income",
"Late-demographic dividend","Middle East & North Africa",
"Middle income",
"Middle East & North Africa (excluding high income)",
"North America","OECD members",
"Other small states","Pre-demographic dividend",
"Pacific island small states",
"Post-demographic dividend",
"Sub-Saharan Africa (excluding high income)",
"Sub-Saharan Africa",
"Small states","East Asia & Pacific (IDA & IBRD)",
"Europe & Central Asia (IDA & IBRD)",
"Latin America & Caribbean (IDA & IBRD)",
"Middle East & North Africa (IDA & IBRD)","South Asia",
"South Asia (IDA & IBRD)",
"Sub-Saharan Africa (IDA & IBRD)",
"Upper middle income", "World"]

In [4]:
exports = pd.read_csv("exports/goods_exported.csv", skiprows=4)
exports.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,Unnamed: 67
0,Aruba,ABW,"Goods exports (BoP, current US$)",BX.GSR.MRCH.CD,,,,,,,...,254540500.0,329660000.0,284021000.0,134600400.0,184801600.0,130708100.0,87331580.0,123003000.0,205274600.0,
1,Africa Eastern and Southern,AFE,"Goods exports (BoP, current US$)",BX.GSR.MRCH.CD,,,,,,,...,232833700000.0,180778700000.0,171393500000.0,195415200000.0,216162000000.0,202690900000.0,180505300000.0,258564500000.0,289241900000.0,
2,Afghanistan,AFG,"Goods exports (BoP, current US$)",BX.GSR.MRCH.CD,,,,,,,...,641767400.0,577859800.0,614217600.0,783962900.0,875242700.0,863834400.0,776734500.0,,,
3,Africa Western and Central,AFW,"Goods exports (BoP, current US$)",BX.GSR.MRCH.CD,,,,,,,...,165721300000.0,108419400000.0,96448570000.0,119290400000.0,142463400000.0,149679000000.0,118280300000.0,143466200000.0,,
4,Angola,AGO,"Goods exports (BoP, current US$)",BX.GSR.MRCH.CD,,,,,,,...,59169880000.0,33181130000.0,27588880000.0,34613450000.0,40757770000.0,34725560000.0,20937440000.0,33581490000.0,50037990000.0,


In [5]:
#print(exports.columns)

In [6]:
exports = exports.drop(columns=['Indicator Code', 'Unnamed: 67'])
#print(exports.columns)

In [7]:
exports = exports.rename(columns={"Country Code": "country_code", "Country Name": "country_name_exp", "Indicator Name": 'feature'})
#exports

In [8]:
exports = exports.query('country_name_exp != @noncountries')
#exports

In [9]:
imports = pd.read_csv("imports/import_goods_services.csv", skiprows=4)
#imports.head()

In [10]:
#print(imports.columns)

In [11]:
imports = imports.drop(columns=['Indicator Code', 'Unnamed: 67'])
#print(imports.columns)

In [12]:
imports = imports.rename(columns={"Country Code": "country_code", "Country Name": "country_name_imp", "Indicator Name": 'feature'})
#imports

In [13]:
imports = imports.query('country_name_imp != @noncountries')
#imports

#### GDP

In [14]:
gdp = pd.read_csv("gdp/GDP.csv", skiprows=4)
#gdp.head()

In [15]:
#print(gdp.columns)

In [16]:
gdp = gdp.drop(columns=['Indicator Code', 'Unnamed: 67'])
#print(gdp.columns)

In [17]:
gdp = gdp.rename(columns={"Country Code": "country_code", "Country Name": "country_name_gdp", "Indicator Name": 'feature'})
#gdp

In [18]:
gdp = gdp.query('country_name_gdp != @noncountries')
#gdp

##### Tariffs

In [19]:
tariffs = pd.read_csv("tariffs/tarrifs.csv", skiprows=4)
#tariffs.head()

In [20]:
#print(tariffs.columns)

In [21]:
tariffs = tariffs.drop(columns=['Indicator Code', 'Unnamed: 67'])
#print(tariffs.columns)

In [22]:
tariffs = tariffs.rename(columns={"Country Code": "country_code", "Country Name": "country_name_tar", "Indicator Name": 'feature'})
#tariffs

In [23]:
tariffs = tariffs.query('country_name_tar != @noncountries')
#tariffs

#### NEED TO CHANGE Features

In [24]:
#wb = wb.replace(replace_map)
#wb

# Melt and conform to one data frame

In [25]:
exports = pd.melt(exports, id_vars=['country_name_exp', 'country_code'
                         ,'feature'], var_name='year', value_name='value')
#exports

In [26]:
exports = exports.pivot_table(index=['country_name_exp','country_code', 'year'], columns='feature', values='value').reset_index()
 
 
exports.columns.name = None
 
 
exports = exports.reset_index(drop=True)

In [27]:
exports['year'] = pd.to_numeric(exports['year'])

In [28]:
imports = pd.melt(imports, id_vars=['country_name_imp', 'country_code'
                         ,'feature'], var_name='year', value_name='value')
#imports

In [29]:
imports = imports.pivot_table(index=['country_name_imp','country_code', 'year'], columns='feature', values='value').reset_index()
 
 
imports.columns.name = None
 
 
imports = imports.reset_index(drop=True)

In [30]:
imports['year'] = pd.to_numeric(imports['year'])

# Merge 1

In [31]:
merge1 = pd.merge(exports, imports, on = ['country_code', 'year'],
                  how = 'inner')

In [32]:
gdp = pd.melt(gdp, id_vars=['country_name_gdp', 'country_code'
                         ,'feature'], var_name='year', value_name='value')
#gdp

In [33]:
gdp = gdp.pivot_table(index=['country_name_gdp','country_code', 'year'], columns='feature', values='value').reset_index()
 
 
gdp.columns.name = None
 
 
gdp = gdp.reset_index(drop=True)

In [34]:
gdp['year'] = pd.to_numeric(gdp['year'])

In [35]:
tariffs = pd.melt(tariffs, id_vars=['country_name_tar', 'country_code'
                         ,'feature'], var_name='year', value_name='value')
#tariffs

In [36]:
tariffs = tariffs.pivot_table(index=['country_name_tar','country_code', 'year'], columns='feature', values='value').reset_index()
 
 
tariffs.columns.name = None
 
 
tariffs = tariffs.reset_index(drop=True)

In [37]:
tariffs['year'] = pd.to_numeric(tariffs['year'])

# Merge 2

In [38]:
merge2 = pd.merge(gdp, tariffs, on = ['country_code', 'year'],
                  how = 'inner')

# Merge 3

In [39]:
merge = pd.merge(merge1, merge2, on = ['country_code', 'year'],
                  how = 'inner')

In [40]:
merge.columns

Index(['country_name_exp', 'country_code', 'year',
       'Goods exports (BoP, current US$)', 'country_name_imp',
       'Imports of goods, services and primary income (BoP, current US$)',
       'country_name_gdp', 'GDP (current US$)', 'country_name_tar',
       'Tariff rate, most favored nation, weighted mean, all products (%)'],
      dtype='object')

In [58]:
merge = merge.drop(columns=['country_name_imp', 'country_name_gdp', 'country_name_tar'])
merge

Unnamed: 0,country_name_exp,country_code,year,"Goods exports (BoP, current US$)","Imports of goods, services and primary income (BoP, current US$)",GDP (current US$),"Tariff rate, most favored nation, weighted mean, all products (%)"
0,Afghanistan,AFG,2008,5.632992e+08,3.873397e+09,1.024977e+10,5.00
1,Afghanistan,AFG,2012,4.755310e+08,9.891159e+09,2.020357e+10,5.50
2,Afghanistan,AFG,2013,5.057926e+08,1.018433e+10,2.056449e+10,5.50
3,Afghanistan,AFG,2018,8.752427e+08,8.086339e+09,1.841886e+10,5.72
4,Albania,ALB,1997,9.090000e+07,6.976125e+08,2.258514e+09,14.41
...,...,...,...,...,...,...,...
3461,Zimbabwe,ZWE,2010,3.245066e+09,6.593559e+09,1.204166e+10,16.60
3462,Zimbabwe,ZWE,2011,4.527563e+09,9.528102e+09,1.410192e+10,13.49
3463,Zimbabwe,ZWE,2012,3.963773e+09,8.683377e+09,1.711485e+10,14.76
3464,Zimbabwe,ZWE,2015,3.577478e+09,7.584347e+09,1.996312e+10,12.86


In [59]:
merge.columns

Index(['country_name_exp', 'country_code', 'year',
       'Goods exports (BoP, current US$)',
       'Imports of goods, services and primary income (BoP, current US$)',
       'GDP (current US$)',
       'Tariff rate, most favored nation, weighted mean, all products (%)'],
      dtype='object')

## Preprocess Human Development Reports

need to add openpyxl to requirements

In [42]:
hdi = pd.read_excel("HDR21-22_Statistical_Annex_HDI_Table.xlsx")

In [43]:
hdi.head()

Unnamed: 0,HDI rank,Country,Human Development Index (HDI),Life expectancy at birth,Expected years of schooling,Mean years of schooling,Gross national income (GNI) per capita,GNI per capita rank minus HDI rank,Previous HDI rank
0,1,Switzerland,0.962,83.9872,16.500299,13.85966,66933.00454,5,3
1,2,Norway,0.961,83.2339,18.1852,13.00363,64660.10622,6,1
2,3,Iceland,0.959,82.6782,19.163059,13.76717,55782.04981,11,2
3,4,"Hong Kong, China (SAR)",0.952,85.4734,17.27817,12.22621,62606.8454,6,4
4,5,Australia,0.951,84.5265,21.05459,12.72682,49238.43335,18,5


In [44]:
hdi.dtypes

HDI rank                                    int64
Country                                    object
Human Development Index (HDI)             float64
Life expectancy at birth                  float64
Expected years of schooling               float64
Mean years of schooling                   float64
Gross national income (GNI) per capita    float64
GNI per capita rank minus HDI rank          int64
Previous HDI rank                           int64
dtype: object

In [45]:
hdi = hdi.rename(columns={"HDI rank": "rank","Country": "country_name_hdi", "Human Development Index (HDI)": "hdi", "Life expectancy at birth": 'life_exp', "Expected years of schooling": 'exp_years_of_school', "Mean years of schooling": 'mean_years_of_school', "Gross national income (GNI) per capita": 'gni', "GNI per capita rank minus HDI rank": 'gni_minus_hdi', "Previous HDI rank": 'old_hdi' })
hdi

Unnamed: 0,rank,country_name_hdi,Human Development Index (HDI),life_exp,exp_years_of_school,mean_years_of_school,gni,gni_minus_hdi,old_hdi
0,1,Switzerland,0.962,83.9872,16.500299,13.859660,66933.004540,5,3
1,2,Norway,0.961,83.2339,18.185200,13.003630,64660.106220,6,1
2,3,Iceland,0.959,82.6782,19.163059,13.767170,55782.049810,11,2
3,4,"Hong Kong, China (SAR)",0.952,85.4734,17.278170,12.226210,62606.845400,6,4
4,5,Australia,0.951,84.5265,21.054590,12.726820,49238.433350,18,5
...,...,...,...,...,...,...,...,...,...
186,187,Burundi,0.426,61.6627,10.722722,3.129267,731.786709,4,187
187,188,Central African Republic,0.404,53.8947,8.040172,4.334000,966.058611,1,188
188,189,Niger,0.400,61.5763,6.957112,2.116717,1239.866936,-3,189
189,190,Chad,0.394,52.5254,8.035914,2.573774,1364.169417,-7,190


In [46]:
merge_full = pd.merge(merge, hdi, left_on='country_name_exp', right_on='country_name_hdi', 
                      how='inner')

In [57]:
merge_full.columns

Index(['country_name_exp', 'country_code', 'year',
       'Goods exports (BoP, current US$)', 'country_name_imp',
       'Imports of goods, services and primary income (BoP, current US$)',
       'country_name_gdp', 'GDP (current US$)', 'country_name_tar',
       'Tariff rate, most favored nation, weighted mean, all products (%)',
       'rank', 'country_name_hdi', 'Human Development Index (HDI) ',
       'life_exp', 'exp_years_of_school', 'mean_years_of_school', 'gni',
       'gni_minus_hdi', 'old_hdi'],
      dtype='object')

# Database Time

In [48]:
dbserver = psycopg2.connect(
    host = 'postgres',
    user = 'postgres',
    password = POSTGRES_PASSWORD,
    port = 5432
)
dbserver.autocommit = True

In [49]:
cursor = dbserver.cursor()

In [50]:
try:
    cursor.execute('CREATE DATABASE world')
except:
    cursor.execute('DROP DATABASE world')
    cursor.execute('CREATE DATABASE world')

In [51]:
engine = create_engine('postgresql+psycopg2://{user}:{password}@{host}:{port}/{db}'.format(
    user = 'postgres',
    password = POSTGRES_PASSWORD,
    host = 'postgres',
    port = 5432,
    db = 'world'
))

In [52]:
### review if  I even need to merge every 
#worlddf.to_sql('world', con=engine, index=False, chunksize=1000, if_exists = 'replace')

# DBDOCS

In [53]:
#for col in worlddf.columns:
#    print(col, worlddf[col].dtype)

## gives info for DBdocs

# Dashboard

In [54]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

In [55]:
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div(
    [
        ### Stuff on top
        html.H1("Understand the Global Economy"),
        html.H2("Data collected from the World Bank and Human Development"),
        html.H3("DS 6600: Data Engineering 1, UVA Data Science, Semester Project"),

        #### Side Bar
        html.Div([
            dcc.Markdown("Please select a Country"),
            
        ], style = {'width': '24%', 'float':'left'}),
        
        ### main bar
        html.Div([
            dcc.Tabs([
                dcc.Tab(label = 'Imports', children = [
                ]),
                dcc.Tab(label = 'Exports', children = [
                    ## pass
                ]),
                dcc.Tab(label = 'HDI', children = [
                    ## pass
                ]),
                dcc.Tab(label = 'Location', children = [
                    ## pass
                ]),
            ])
        ], style = {'width':'74%', 'float':'right'})
    ]
)

### operate on the next that appears
#@app.callback([Output(component_id = 'biotable', component_property = 'figure')], 
#             [Input(component_id = 'member', component_property = 'value')])
#
#
#
#def biotable(b):
#    myquery = f'''
#    SELECT directordername AS Name,
#        party AS Party,
#        state AS State,
#        CAST(district AS int) AS District,
#        birthyear AS Birthyear,
#        addressinformation_officeaddress AS Address,
#        CONCAT(addressinformation_city, ', ', addressinformation_district) AS City,
#        addressinformation_phonenumber AS Phone,
#        addressinformation_zipcode AS Zipcode
#    FROM members
#    WHERE bioguideid='{b}'
#    '''
#    mydf = pd.read_sql_query(myquery, con=engine)
#    mydf.columns = [x.capitalize() for x in mydf.columns]
#    mydf = mydf.T.reset_index()
#    mydf = mydf.rename({'index':'', 0:''}, axis=1)
#    #print("here")
#    return [ff.create_table(mydf)]



if __name__=="__main__":
    app.run_server(mode='external', host = "0.0.0.0", port = 8050, debug=False)