# Atlas data updates and growth projection notes

Methodological notes, data sources and interactive visualization from http://atlas.cid.harvard.edu/rankings/growth-predictions/

Important links
* [U.N. Comtrade](http://comtrade.un.org) website
* [Atlas Data](https://github.com/cid-harvard/atlas-data) processing

Visualizations
* Table with ranking values
* Slopegraph of growth projections
* Geo-map with growth projection ranking
* Line chart with rankings

In [51]:
# Required modules to run this notebook
from IPython.display import IFrame
import pandas as pd
import numexpr

# Updated growth predictions data files by CID

In [52]:
#file_projections = 'sourceData/atlas_growth_projections_2023.csv'
file_projections = 'sourceData/Growth_proj_rankings_2014.csv'
file_rankings = 'sourceData/ECI_rankings_2014.csv'

In [53]:
df = pd.read_csv(file_projections)
df.pop('rank2014')
df.head()

Unnamed: 0,countryname,tg2004,tg2005,tg2006,tg2007,tg2008,tg2009,tg2010,tg2011,tg2012,tg2013,tg2014,iso
0,India,7.15,6.99,7.26,7.26,7.24,6.92,7.01,7.05,7.02,6.93,6.98,IND
1,Uganda,6.11,6.01,6.35,6.56,6.53,6.7,6.76,6.83,7.15,7.06,6.04,UGA
2,Kenya,6.32,6.51,6.67,6.36,6.5,6.71,6.52,6.2,6.25,6.5,6.0,KEN
3,Tanzania,6.11,5.98,6.34,6.65,6.52,6.47,6.19,6.29,6.74,6.3,5.96,TZA
4,"Egypt, Arab Rep.",5.0,5.18,4.89,5.03,5.16,5.36,5.15,5.29,5.39,5.85,5.83,EGY


In [54]:
# Turning some columns into rows
df = pd.melt(df, id_vars=["iso", "countryname"], var_name="year", value_name="value")
df.head()

Unnamed: 0,iso,countryname,year,value
0,IND,India,tg2004,7.15
1,UGA,Uganda,tg2004,6.11
2,KEN,Kenya,tg2004,6.32
3,TZA,Tanzania,tg2004,6.11
4,EGY,"Egypt, Arab Rep.",tg2004,5.0


In [55]:
# Formatting year (time) column
for index, row in df.iterrows():
    df.loc[index, "year"] = df.loc[index, "year"][2:]
df.head()

Unnamed: 0,iso,countryname,year,value
0,IND,India,2004,7.15
1,UGA,Uganda,2004,6.11
2,KEN,Kenya,2004,6.32
3,TZA,Tanzania,2004,6.11
4,EGY,"Egypt, Arab Rep.",2004,5.0


In [56]:
df['rank'] = df.groupby('year')['value'].rank(ascending=False, method='first')

In [57]:
df.sort(['rank'], ascending=True).head(15)

Unnamed: 0,iso,countryname,year,value,rank
0,IND,India,2004,7.15,1
369,IND,India,2007,7.26,1
1230,IND,India,2014,6.98,1
246,IND,India,2006,7.26,1
738,IND,India,2010,7.01,1
985,UGA,Uganda,2012,7.15,1
861,IND,India,2011,7.05,1
492,IND,India,2008,7.24,1
123,IND,India,2005,6.99,1
615,IND,India,2009,6.92,1


In [58]:
df[df['rank'].isnull()]

Unnamed: 0,iso,countryname,year,value,rank
38,SRB,Serbia,2004,,
614,SYR,Syrian Arab Republic,2008,,
737,SYR,Syrian Arab Republic,2009,,
860,SYR,Syrian Arab Republic,2010,,
983,SYR,Syrian Arab Republic,2011,,
1106,SYR,Syrian Arab Republic,2012,,
1229,SYR,Syrian Arab Republic,2013,,
1352,SYR,Syrian Arab Republic,2014,,


In [59]:
# Discard countries that don't have data for every time point between 2004 and 2014
countries_null = list(set(df[df['rank'].isnull()]['iso']))
countries_null

['SYR', 'SRB']

In [64]:
# TODO: use the countries_null variable
df = df.query("iso not in ['SRB', 'SYR']")

In [65]:
# Convert rank to integer
df['rank'] = df['rank'].apply(lambda x: int(x))

In [66]:
df.head()

Unnamed: 0,iso,countryname,year,value,rank
0,IND,India,2004,7.15,1
1,UGA,Uganda,2004,6.11,3
2,KEN,Kenya,2004,6.32,2
3,TZA,Tanzania,2004,6.11,4
4,EGY,"Egypt, Arab Rep.",2004,5.0,14


# TODO: generate data for download using this format
<pre>
rank,abbrv,country,eci_value,delta,year,growth_proj_annual_2023
1,JPN,Japan,2.348182,0,2013,2.13
2,CHE,Switzerland,2.331362,0,2013,3.62
3,DEU,Germany,2.03559,0,2013,-1.33
4,KOR,"Korea, Rep.",1.92968,1,2013,3.85
5,SWE,Sweden,1.817256,-1,2013,2.6
</pre>

# Projections of GDP Growth to 2024 Rankings: Selected Top Countries

In [67]:
# from IPython.display import display, HTML
# HTML(df.head().to_html())
df.to_csv('/Users/rvuillemot/Dev/vis-toolkit-datasets/data/atlas_growth_projections_2024.csv')

In [68]:
from ipy_table import *
import numpy as np

df_table = df[(df['year'] == '2014')].head(10).reset_index(drop=True).reset_index()
table = df_table.as_matrix()

header = np.asarray(df_table.columns)
header[0] = 'Index'
header[1] = 'Country'
# df.rename(columns=lambda x: x[1:], inplace=True)
table_with_header = np.concatenate(([header], table))

# Basic themes
# Detais http://nbviewer.ipython.org/github/epmoyer/ipy_table/blob/master/ipy_table-Introduction.ipynb
make_table(table_with_header)
apply_theme('basic')
# Only show the top-10
set_row_style(1, color='yellow')

0,1,2,3,4,5
Index,Country,countryname,year,value,rank
0,IND,India,2014,6.98,1
1,UGA,Uganda,2014,6.04,2
2,KEN,Kenya,2014,6.0,3
3,TZA,Tanzania,2014,5.96,4
4,EGY,"Egypt, Arab Rep.",2014,5.83,5
5,MDG,Madagascar,2014,5.78,6
6,SEN,Senegal,2014,5.77,7
7,PHL,Philippines,2014,5.68,8
8,MWI,Malawi,2014,5.66,9


# Economic Complexity Index: Rank of Expected GDP Growth to 2023


In [71]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/geomap_and_tick.html', width=900, height=450)

# Biggest Winners and Losers in Economic Complexity: 2004-2014


In [30]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/slopegraph_projections.html', width=900, height=350)

# Economic Complexity Index: 2004-2014 Country Rankings – Top 25 Countries

The graph below shows the changes in ECI between 2004 and

In [72]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/linechart_projections.html', width=900, height=350)

In [28]:
# Geomap of ECI by country and grid of countries
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/barchart_vertical_projections.html', width=900, height=550)

# How many countries report their data every year?

In [73]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/barchart_histogram.html', width=900, height=550)