# JSON examples and exercise
****
+ get familiar with packages for dealing with JSON
+ study examples with JSON strings and files 
+ work on exercise to be completed and submitted 
****
+ reference: http://pandas.pydata.org/pandas-docs/stable/io.html#io-json-reader
****

In [1]:
import pandas as pd

## imports for Python, Pandas

In [2]:
import json
from pandas.io.json import json_normalize

## JSON example, with string

+ demonstrates creation of normalized dataframes (tables) from nested json string
+ source: http://pandas.pydata.org/pandas-docs/stable/io.html#normalization

In [3]:
# define json string
data = [{'state': 'Florida', 
         'shortname': 'FL',
         'info': {'governor': 'Rick Scott'},
         'counties': [{'name': 'Dade', 'population': 12345},
                      {'name': 'Broward', 'population': 40000},
                      {'name': 'Palm Beach', 'population': 60000}]},
        {'state': 'Ohio',
         'shortname': 'OH',
         'info': {'governor': 'John Kasich'},
         'counties': [{'name': 'Summit', 'population': 1234},
                      {'name': 'Cuyahoga', 'population': 1337}]}]

In [4]:
# use normalization to create tables from nested element
json_normalize(data, 'counties')

Unnamed: 0,name,population
0,Dade,12345
1,Broward,40000
2,Palm Beach,60000
3,Summit,1234
4,Cuyahoga,1337


In [5]:
# further populate tables created from nested element
json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])

Unnamed: 0,name,population,state,shortname,info.governor
0,Dade,12345,Florida,FL,Rick Scott
1,Broward,40000,Florida,FL,Rick Scott
2,Palm Beach,60000,Florida,FL,Rick Scott
3,Summit,1234,Ohio,OH,John Kasich
4,Cuyahoga,1337,Ohio,OH,John Kasich


****
## JSON example, with file

+ demonstrates reading in a json file as a string and as a table
+ uses small sample file containing data about projects funded by the World Bank 

In [6]:
# load json as string
json.load((open('data/world_bank_projects_less.json')))

[{'_id': {'$oid': '52b213b38594d8a2be17c780'},
  'approvalfy': 1999,
  'board_approval_month': 'November',
  'boardapprovaldate': '2013-11-12T00:00:00Z',
  'borrower': 'FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA',
  'closingdate': '2018-07-07T00:00:00Z',
  'country_namecode': 'Federal Democratic Republic of Ethiopia!$!ET',
  'countrycode': 'ET',
  'countryname': 'Federal Democratic Republic of Ethiopia',
  'countryshortname': 'Ethiopia',
  'docty': 'Project Information Document,Indigenous Peoples Plan,Project Information Document',
  'envassesmentcategorycode': 'C',
  'grantamt': 0,
  'ibrdcommamt': 0,
  'id': 'P129828',
  'idacommamt': 130000000,
  'impagency': 'MINISTRY OF EDUCATION',
  'lendinginstr': 'Investment Project Financing',
  'lendinginstrtype': 'IN',
  'lendprojectcost': 550000000,
  'majorsector_percent': [{'Name': 'Education', 'Percent': 46},
   {'Name': 'Education', 'Percent': 26},
   {'Name': 'Public Administration, Law, and Justice', 'Percent': 16},
   {'Name': 'Educatio

In [7]:
# load as Pandas dataframe
sample_json_df = pd.read_json('data/world_bank_projects_less.json')
sample_json_df.head()
col_names= list(sample_json_df.columns)
print(col_names)

['_id', 'approvalfy', 'board_approval_month', 'boardapprovaldate', 'borrower', 'closingdate', 'country_namecode', 'countrycode', 'countryname', 'countryshortname', 'docty', 'envassesmentcategorycode', 'grantamt', 'ibrdcommamt', 'id', 'idacommamt', 'impagency', 'lendinginstr', 'lendinginstrtype', 'lendprojectcost', 'majorsector_percent', 'mjsector_namecode', 'mjtheme', 'mjtheme_namecode', 'mjthemecode', 'prodline', 'prodlinetext', 'productlinetype', 'project_abstract', 'project_name', 'projectdocs', 'projectfinancialtype', 'projectstatusdisplay', 'regionname', 'sector', 'sector1', 'sector2', 'sector3', 'sector4', 'sector_namecode', 'sectorcode', 'source', 'status', 'supplementprojectflg', 'theme1', 'theme_namecode', 'themecode', 'totalamt', 'totalcommamt', 'url']


In [8]:
sample_json_df.head()

Unnamed: 0,_id,approvalfy,board_approval_month,boardapprovaldate,borrower,closingdate,country_namecode,countrycode,countryname,countryshortname,...,sectorcode,source,status,supplementprojectflg,theme1,theme_namecode,themecode,totalamt,totalcommamt,url
0,{'$oid': '52b213b38594d8a2be17c780'},1999,November,2013-11-12T00:00:00Z,FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA,2018-07-07T00:00:00Z,Federal Democratic Republic of Ethiopia!$!ET,ET,Federal Democratic Republic of Ethiopia,Ethiopia,...,"ET,BS,ES,EP",IBRD,Active,N,"{'Name': 'Education for all', 'Percent': 100}","[{'name': 'Education for all', 'code': '65'}]",65,130000000,130000000,http://www.worldbank.org/projects/P129828/ethi...
1,{'$oid': '52b213b38594d8a2be17c781'},2015,November,2013-11-04T00:00:00Z,GOVERNMENT OF TUNISIA,,Republic of Tunisia!$!TN,TN,Republic of Tunisia,Tunisia,...,"BZ,BS",IBRD,Active,N,"{'Name': 'Other economic management', 'Percent...","[{'name': 'Other economic management', 'code':...",5424,0,4700000,http://www.worldbank.org/projects/P144674?lang=en


****
## JSON exercise

Using data in file 'data/world_bank_projects.json' and the techniques demonstrated above,
1. Find the 10 countries with most projects
2. Find the top 10 major project themes (using column 'mjtheme_namecode')
3. In 2. above you will notice that some entries have only the code and the name is missing. Create a dataframe with the missing names filled in.

In [9]:
#Load JSON into DataFrame and view the first 5 rows
json_df = pd.read_json('data/world_bank_projects.json')
print(type(json_df), len(json_df))
json_df.head()

<class 'pandas.core.frame.DataFrame'> 500


Unnamed: 0,sector,supplementprojectflg,projectfinancialtype,prodline,mjtheme,idacommamt,impagency,project_name,mjthemecode,closingdate,...,majorsector_percent,board_approval_month,theme_namecode,countryname,url,source,projectstatusdisplay,ibrdcommamt,sector_namecode,_id
0,"[{'Name': 'Primary education'}, {'Name': 'Seco...",N,IDA,PE,[Human development],130000000,MINISTRY OF EDUCATION,Ethiopia General Education Quality Improvement...,811,2018-07-07T00:00:00Z,...,"[{'Percent': 46, 'Name': 'Education'}, {'Perce...",November,"[{'code': '65', 'name': 'Education for all'}]",Federal Democratic Republic of Ethiopia,http://www.worldbank.org/projects/P129828/ethi...,IBRD,Active,0,"[{'code': 'EP', 'name': 'Primary education'}, ...",{'$oid': '52b213b38594d8a2be17c780'}
1,[{'Name': 'Public administration- Other social...,N,OTHER,RE,"[Economic management, Social protection and ri...",0,MINISTRY OF FINANCE,TN: DTF Social Protection Reforms Support,16,,...,"[{'Percent': 70, 'Name': 'Public Administratio...",November,"[{'code': '24', 'name': 'Other economic manage...",Republic of Tunisia,http://www.worldbank.org/projects/P144674?lang=en,IBRD,Active,0,"[{'code': 'BS', 'name': 'Public administration...",{'$oid': '52b213b38594d8a2be17c781'}
2,[{'Name': 'Rural and Inter-Urban Roads and Hig...,Y,IDA,PE,"[Trade and integration, Public sector governan...",6060000,MINISTRY OF TRANSPORT AND COMMUNICATIONS,Tuvalu Aviation Investment Project - Additiona...,52116,,...,"[{'Percent': 100, 'Name': 'Transportation'}]",November,"[{'code': '47', 'name': 'Regional integration'...",Tuvalu,http://www.worldbank.org/projects/P145310?lang=en,IBRD,Active,0,"[{'code': 'TI', 'name': 'Rural and Inter-Urban...",{'$oid': '52b213b38594d8a2be17c782'}
3,[{'Name': 'Other social services'}],N,OTHER,RE,"[Social dev/gender/inclusion, Social dev/gende...",0,LABOR INTENSIVE PUBLIC WORKS PROJECT PMU,Gov't and Civil Society Organization Partnership,77,,...,"[{'Percent': 100, 'Name': 'Health and other so...",October,"[{'code': '57', 'name': 'Participation and civ...",Republic of Yemen,http://www.worldbank.org/projects/P144665?lang=en,IBRD,Active,0,"[{'code': 'JB', 'name': 'Other social services'}]",{'$oid': '52b213b38594d8a2be17c783'}
4,[{'Name': 'General industry and trade sector'}...,N,IDA,PE,"[Trade and integration, Financial and private ...",13100000,MINISTRY OF TRADE AND INDUSTRY,Second Private Sector Competitiveness and Econ...,54,2019-04-30T00:00:00Z,...,"[{'Percent': 50, 'Name': 'Industry and trade'}...",October,"[{'code': '45', 'name': 'Export development an...",Kingdom of Lesotho,http://www.worldbank.org/projects/P144933/seco...,IBRD,Active,0,"[{'code': 'YZ', 'name': 'General industry and ...",{'$oid': '52b213b38594d8a2be17c784'}


In [10]:
# The Following are to view all the columns in chuncks
json_df.loc[:,col_names[:5]].head()

Unnamed: 0,_id,approvalfy,board_approval_month,boardapprovaldate,borrower
0,{'$oid': '52b213b38594d8a2be17c780'},1999,November,2013-11-12T00:00:00Z,FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA
1,{'$oid': '52b213b38594d8a2be17c781'},2015,November,2013-11-04T00:00:00Z,GOVERNMENT OF TUNISIA
2,{'$oid': '52b213b38594d8a2be17c782'},2014,November,2013-11-01T00:00:00Z,MINISTRY OF FINANCE AND ECONOMIC DEVEL
3,{'$oid': '52b213b38594d8a2be17c783'},2014,October,2013-10-31T00:00:00Z,MIN. OF PLANNING AND INT'L COOPERATION
4,{'$oid': '52b213b38594d8a2be17c784'},2014,October,2013-10-31T00:00:00Z,MINISTRY OF FINANCE


In [11]:
json_df.loc[:,col_names[5:10]].head()

Unnamed: 0,closingdate,country_namecode,countrycode,countryname,countryshortname
0,2018-07-07T00:00:00Z,Federal Democratic Republic of Ethiopia!$!ET,ET,Federal Democratic Republic of Ethiopia,Ethiopia
1,,Republic of Tunisia!$!TN,TN,Republic of Tunisia,Tunisia
2,,Tuvalu!$!TV,TV,Tuvalu,Tuvalu
3,,Republic of Yemen!$!RY,RY,Republic of Yemen,"Yemen, Republic of"
4,2019-04-30T00:00:00Z,Kingdom of Lesotho!$!LS,LS,Kingdom of Lesotho,Lesotho


In [12]:
json_df.loc[:,col_names[10:15]].head()

Unnamed: 0,docty,envassesmentcategorycode,grantamt,ibrdcommamt,id
0,"Project Information Document,Indigenous People...",C,0,0,P129828
1,"Project Information Document,Integrated Safegu...",C,4700000,0,P144674
2,"Resettlement Plan,Environmental Assessment,Int...",B,0,0,P145310
3,"Procurement Plan,Project Information Document,...",C,1500000,0,P144665
4,"Project Information Document,Integrated Safegu...",B,0,0,P144933


In [13]:
json_df.loc[:,col_names[15:20]].head()

Unnamed: 0,idacommamt,impagency,lendinginstr,lendinginstrtype,lendprojectcost
0,130000000,MINISTRY OF EDUCATION,Investment Project Financing,IN,550000000
1,0,MINISTRY OF FINANCE,Specific Investment Loan,IN,5700000
2,6060000,MINISTRY OF TRANSPORT AND COMMUNICATIONS,Investment Project Financing,IN,6060000
3,0,LABOR INTENSIVE PUBLIC WORKS PROJECT PMU,Technical Assistance Loan,IN,1500000
4,13100000,MINISTRY OF TRADE AND INDUSTRY,Investment Project Financing,IN,15000000


In [14]:
json_df.loc[:,col_names[20:25]].head()

Unnamed: 0,majorsector_percent,mjsector_namecode,mjtheme,mjtheme_namecode,mjthemecode
0,"[{'Percent': 46, 'Name': 'Education'}, {'Perce...","[{'code': 'EX', 'name': 'Education'}, {'code':...",[Human development],"[{'code': '8', 'name': 'Human development'}, {...",811
1,"[{'Percent': 70, 'Name': 'Public Administratio...","[{'code': 'BX', 'name': 'Public Administration...","[Economic management, Social protection and ri...","[{'code': '1', 'name': 'Economic management'},...",16
2,"[{'Percent': 100, 'Name': 'Transportation'}]","[{'code': 'TX', 'name': 'Transportation'}]","[Trade and integration, Public sector governan...","[{'code': '5', 'name': 'Trade and integration'...",52116
3,"[{'Percent': 100, 'Name': 'Health and other so...","[{'code': 'JX', 'name': 'Health and other soci...","[Social dev/gender/inclusion, Social dev/gende...","[{'code': '7', 'name': 'Social dev/gender/incl...",77
4,"[{'Percent': 50, 'Name': 'Industry and trade'}...","[{'code': 'YX', 'name': 'Industry and trade'},...","[Trade and integration, Financial and private ...","[{'code': '5', 'name': 'Trade and integration'...",54


In [15]:
json_df.loc[:,col_names[25:30]].head()

Unnamed: 0,prodline,prodlinetext,productlinetype,project_abstract,project_name
0,PE,IBRD/IDA,L,{'cdata': 'The development objective of the Se...,Ethiopia General Education Quality Improvement...
1,RE,Recipient Executed Activities,L,,TN: DTF Social Protection Reforms Support
2,PE,IBRD/IDA,L,,Tuvalu Aviation Investment Project - Additiona...
3,RE,Recipient Executed Activities,L,,Gov't and Civil Society Organization Partnership
4,PE,IBRD/IDA,L,{'cdata': 'The development objective of the Se...,Second Private Sector Competitiveness and Econ...


In [16]:
json_df.loc[:,col_names[30:35]].head()

Unnamed: 0,projectdocs,projectfinancialtype,projectstatusdisplay,regionname,sector
0,"[{'DocDate': '28-AUG-2013', 'EntityID': '09022...",IDA,Active,Africa,"[{'Name': 'Primary education'}, {'Name': 'Seco..."
1,"[{'DocDate': '29-MAR-2013', 'EntityID': '00033...",OTHER,Active,Middle East and North Africa,[{'Name': 'Public administration- Other social...
2,"[{'DocDate': '21-OCT-2013', 'EntityID': '00033...",IDA,Active,East Asia and Pacific,[{'Name': 'Rural and Inter-Urban Roads and Hig...
3,"[{'DocDate': '15-MAY-2013', 'EntityID': '00035...",OTHER,Active,Middle East and North Africa,[{'Name': 'Other social services'}]
4,"[{'DocDate': '06-SEP-2013', 'EntityID': '09022...",IDA,Active,Africa,[{'Name': 'General industry and trade sector'}...


In [17]:
json_df.loc[:,col_names[35:40]].head()

Unnamed: 0,sector1,sector2,sector3,sector4,sector_namecode
0,"{'Percent': 46, 'Name': 'Primary education'}","{'Percent': 26, 'Name': 'Secondary education'}","{'Percent': 16, 'Name': 'Public administration...","{'Percent': 12, 'Name': 'Tertiary education'}","[{'code': 'EP', 'name': 'Primary education'}, ..."
1,"{'Percent': 70, 'Name': 'Public administration...","{'Percent': 30, 'Name': 'General public admini...",,,"[{'code': 'BS', 'name': 'Public administration..."
2,"{'Percent': 100, 'Name': 'Rural and Inter-Urba...",,,,"[{'code': 'TI', 'name': 'Rural and Inter-Urban..."
3,"{'Percent': 100, 'Name': 'Other social services'}",,,,"[{'code': 'JB', 'name': 'Other social services'}]"
4,"{'Percent': 50, 'Name': 'General industry and ...","{'Percent': 40, 'Name': 'Other industry'}","{'Percent': 10, 'Name': 'SME Finance'}",,"[{'code': 'YZ', 'name': 'General industry and ..."


In [18]:
json_df.loc[:,col_names[40:45]].head()

Unnamed: 0,sectorcode,source,status,supplementprojectflg,theme1
0,"ET,BS,ES,EP",IBRD,Active,N,"{'Percent': 100, 'Name': 'Education for all'}"
1,"BZ,BS",IBRD,Active,N,"{'Percent': 30, 'Name': 'Other economic manage..."
2,TI,IBRD,Active,Y,"{'Percent': 46, 'Name': 'Regional integration'}"
3,JB,IBRD,Active,N,"{'Percent': 50, 'Name': 'Participation and civ..."
4,"FH,YW,YZ",IBRD,Active,N,"{'Percent': 30, 'Name': 'Export development an..."


In [19]:
json_df.loc[:,col_names[45:]].head()

Unnamed: 0,theme_namecode,themecode,totalamt,totalcommamt,url
0,"[{'code': '65', 'name': 'Education for all'}]",65,130000000,130000000,http://www.worldbank.org/projects/P129828/ethi...
1,"[{'code': '24', 'name': 'Other economic manage...",5424,0,4700000,http://www.worldbank.org/projects/P144674?lang=en
2,"[{'code': '47', 'name': 'Regional integration'...",52812547,6060000,6060000,http://www.worldbank.org/projects/P145310?lang=en
3,"[{'code': '57', 'name': 'Participation and civ...",5957,0,1500000,http://www.worldbank.org/projects/P144665?lang=en
4,"[{'code': '45', 'name': 'Export development an...",4145,13100000,13100000,http://www.worldbank.org/projects/P144933/seco...


In [56]:
#Q1 Find the top 10 countries with most project
sorted_pvt = Cntry_Prj_Pvt.sort_values('project_name', ascending = False)
sorted_pvt.rename(columns={'project_name':'Project_Cnt'}, inplace = True)
sorted_pvt.head(10)

Unnamed: 0_level_0,Project_Cnt
countryname,Unnamed: 1_level_1
People's Republic of China,19
Republic of Indonesia,19
Socialist Republic of Vietnam,17
Republic of India,16
Republic of Yemen,13
People's Republic of Bangladesh,12
Nepal,12
Kingdom of Morocco,12
Republic of Mozambique,11
Africa,11


In [22]:
# load json as string
json_data = json.load((open('data/world_bank_projects.json')))

# Look into the data scructure for Mjtheme_namecode column for the nested JSON
print(json_df.loc[1,'mjtheme_namecode'])

# Normalized JSON File
mj_theme_df = json_normalize(json_data, 'mjtheme_namecode')

# Creates a duplicate dataframe
mj_theme_df1 = mj_theme_df

# Add a column of 1s
mj_theme_df1['Ones'] = 1
mj_theme_df1.head()


[{'code': '1', 'name': 'Economic management'}, {'code': '6', 'name': 'Social protection and risk management'}]


Unnamed: 0,code,name,Ones
0,8,Human development,1
1,11,,1
2,1,Economic management,1
3,6,Social protection and risk management,1
4,5,Trade and integration,1


In [65]:
#Q2 show ranking of number of major project theme

#create a pivot table for aggregateing on  Major Project Themes
mj_theme_pvt = pd.pivot_table(mj_theme_df1, index= 'name',values='Ones', aggfunc = 'sum').sort_values('Ones',ascending = False)

#Dropping blank Rows
mj_theme_pvt = mj_theme_pvt.drop('')

#Create a rank list
rank=[i+1 for i in range(len(mj_theme_pvt))]

#Create Rank Column for the Pivot DataFrame
mj_theme_pvt['rank']=rank

# Rename Column and Index
mj_theme_pvt.rename(columns = {'Ones':'Theme_Cnt'}, inplace = True)
mj_theme_pvt = mj_theme_pvt.rename_axis('Major_Theme')

#View The Table
mj_theme_pvt

Unnamed: 0_level_0,Theme_Cnt,rank
Major_Theme,Unnamed: 1_level_1,Unnamed: 2_level_1
Environment and natural resources management,223,1
Rural development,202,2
Human development,197,3
Public sector governance,184,4
Social protection and risk management,158,5
Financial and private sector development,130,6
Social dev/gender/inclusion,119,7
Trade and integration,72,8
Urban development,47,9
Economic management,33,10


In [24]:
#Q3 Filling blanks major themse

#find the code-theme pair
code_theme_df = mj_theme_df.drop_duplicates()
not_blank = code_theme_df['name']!=''

code_theme_df_sorted = code_theme_df.loc[not_blank].sort_values('code')

code_theme_df_sorted 

Unnamed: 0,code,name,Ones
2,1,Economic management,1
18,10,Rural development,1
6,11,Environment and natural resources management,1
5,2,Public sector governance,1
252,3,Rule of law,1
11,4,Financial and private sector development,1
4,5,Trade and integration,1
3,6,Social protection and risk management,1
8,7,Social dev/gender/inclusion,1
0,8,Human development,1


In [25]:
#join dataframe to fill in blanks in the cut out Major Project Theme
merged_df= pd.merge(left = mj_theme_df, right = code_theme_df_sorted, how = 'left', on = 'code', suffixes= ('_Original','_Filled'))
merged_df_final = merged_df.loc[:,['code','name_Filled']]
merged_df_final = merged_df_final.rename(columns = {'code': 'Major_Proj_Code', 'name_Filled': 'Major_Proj_Name'})


In [26]:
merged_df_final

Unnamed: 0,Major_Proj_Code,Major_Proj_Name
0,8,Human development
1,11,Environment and natural resources management
2,1,Economic management
3,6,Social protection and risk management
4,5,Trade and integration
...,...,...
1494,10,Rural development
1495,9,Urban development
1496,8,Human development
1497,5,Trade and integration


In [39]:
#The following is appending the major theme column to the main original table
proj_id_df = json_normalize(json_data,'mjtheme_namecode',['id']).sort_values('id')

In [50]:
proj_id_df_filled = pd.merge(left = proj_id_df, right = code_theme_df_sorted, on = 'code', how = 'left',suffixes= ('_Original','_Filled'))
proj_id_df_filled = proj_id_df_filled.drop(['Ones','name_Original'], axis = 1)
proj_id_df_filled = proj_id_df_filled.rename(columns = {'code': 'Major_Proj_Code', 'name_Filled': 'Major_Proj_Name'})

proj_id_df_filled.head()

Unnamed: 0,Major_Proj_Code,id,Major_Proj_Name
0,10,P075941,Rural development
1,5,P075941,Trade and integration
2,4,P075941,Financial and private sector development
3,7,P075941,Social dev/gender/inclusion
4,11,P085621,Environment and natural resources management


In [53]:
final_df = pd.merge(left = json_df , right = proj_id_df_filled, how = 'outer', on = 'id')
final_df.head()

Unnamed: 0,sector,supplementprojectflg,projectfinancialtype,prodline,mjtheme,idacommamt,impagency,project_name,mjthemecode,closingdate,...,theme_namecode,countryname,url,source,projectstatusdisplay,ibrdcommamt,sector_namecode,_id,Major_Proj_Code,Major_Proj_Name
0,"[{'Name': 'Primary education'}, {'Name': 'Seco...",N,IDA,PE,[Human development],130000000,MINISTRY OF EDUCATION,Ethiopia General Education Quality Improvement...,811,2018-07-07T00:00:00Z,...,"[{'code': '65', 'name': 'Education for all'}]",Federal Democratic Republic of Ethiopia,http://www.worldbank.org/projects/P129828/ethi...,IBRD,Active,0,"[{'code': 'EP', 'name': 'Primary education'}, ...",{'$oid': '52b213b38594d8a2be17c780'},8,Human development
1,"[{'Name': 'Primary education'}, {'Name': 'Seco...",N,IDA,PE,[Human development],130000000,MINISTRY OF EDUCATION,Ethiopia General Education Quality Improvement...,811,2018-07-07T00:00:00Z,...,"[{'code': '65', 'name': 'Education for all'}]",Federal Democratic Republic of Ethiopia,http://www.worldbank.org/projects/P129828/ethi...,IBRD,Active,0,"[{'code': 'EP', 'name': 'Primary education'}, ...",{'$oid': '52b213b38594d8a2be17c780'},11,Environment and natural resources management
2,[{'Name': 'Public administration- Other social...,N,OTHER,RE,"[Economic management, Social protection and ri...",0,MINISTRY OF FINANCE,TN: DTF Social Protection Reforms Support,16,,...,"[{'code': '24', 'name': 'Other economic manage...",Republic of Tunisia,http://www.worldbank.org/projects/P144674?lang=en,IBRD,Active,0,"[{'code': 'BS', 'name': 'Public administration...",{'$oid': '52b213b38594d8a2be17c781'},6,Social protection and risk management
3,[{'Name': 'Public administration- Other social...,N,OTHER,RE,"[Economic management, Social protection and ri...",0,MINISTRY OF FINANCE,TN: DTF Social Protection Reforms Support,16,,...,"[{'code': '24', 'name': 'Other economic manage...",Republic of Tunisia,http://www.worldbank.org/projects/P144674?lang=en,IBRD,Active,0,"[{'code': 'BS', 'name': 'Public administration...",{'$oid': '52b213b38594d8a2be17c781'},1,Economic management
4,[{'Name': 'Rural and Inter-Urban Roads and Hig...,Y,IDA,PE,"[Trade and integration, Public sector governan...",6060000,MINISTRY OF TRANSPORT AND COMMUNICATIONS,Tuvalu Aviation Investment Project - Additiona...,52116,,...,"[{'code': '47', 'name': 'Regional integration'...",Tuvalu,http://www.worldbank.org/projects/P145310?lang=en,IBRD,Active,0,"[{'code': 'TI', 'name': 'Rural and Inter-Urban...",{'$oid': '52b213b38594d8a2be17c782'},5,Trade and integration


In [66]:
#Answers to Excercise
#1.
print('Top 10 Counties')
print(sorted_pvt.head(10))

#2.
print('Top 10 Major Project Theme')
print(mj_theme_pvt.head(10))

#3.
print('Expanded DataFrame')
final_df.head()


Top 10 Counties
                                 Project_Cnt
countryname                                 
People's Republic of China                19
Republic of Indonesia                     19
Socialist Republic of Vietnam             17
Republic of India                         16
Republic of Yemen                         13
People's Republic of Bangladesh           12
Nepal                                     12
Kingdom of Morocco                        12
Republic of Mozambique                    11
Africa                                    11
Top 10 Major Project Theme
                                              Theme_Cnt  rank
Major_Theme                                                  
Environment and natural resources management        223     1
Rural development                                   202     2
Human development                                   197     3
Public sector governance                            184     4
Social protection and risk management        

Unnamed: 0,sector,supplementprojectflg,projectfinancialtype,prodline,mjtheme,idacommamt,impagency,project_name,mjthemecode,closingdate,...,theme_namecode,countryname,url,source,projectstatusdisplay,ibrdcommamt,sector_namecode,_id,Major_Proj_Code,Major_Proj_Name
0,"[{'Name': 'Primary education'}, {'Name': 'Seco...",N,IDA,PE,[Human development],130000000,MINISTRY OF EDUCATION,Ethiopia General Education Quality Improvement...,811,2018-07-07T00:00:00Z,...,"[{'code': '65', 'name': 'Education for all'}]",Federal Democratic Republic of Ethiopia,http://www.worldbank.org/projects/P129828/ethi...,IBRD,Active,0,"[{'code': 'EP', 'name': 'Primary education'}, ...",{'$oid': '52b213b38594d8a2be17c780'},8,Human development
1,"[{'Name': 'Primary education'}, {'Name': 'Seco...",N,IDA,PE,[Human development],130000000,MINISTRY OF EDUCATION,Ethiopia General Education Quality Improvement...,811,2018-07-07T00:00:00Z,...,"[{'code': '65', 'name': 'Education for all'}]",Federal Democratic Republic of Ethiopia,http://www.worldbank.org/projects/P129828/ethi...,IBRD,Active,0,"[{'code': 'EP', 'name': 'Primary education'}, ...",{'$oid': '52b213b38594d8a2be17c780'},11,Environment and natural resources management
2,[{'Name': 'Public administration- Other social...,N,OTHER,RE,"[Economic management, Social protection and ri...",0,MINISTRY OF FINANCE,TN: DTF Social Protection Reforms Support,16,,...,"[{'code': '24', 'name': 'Other economic manage...",Republic of Tunisia,http://www.worldbank.org/projects/P144674?lang=en,IBRD,Active,0,"[{'code': 'BS', 'name': 'Public administration...",{'$oid': '52b213b38594d8a2be17c781'},6,Social protection and risk management
3,[{'Name': 'Public administration- Other social...,N,OTHER,RE,"[Economic management, Social protection and ri...",0,MINISTRY OF FINANCE,TN: DTF Social Protection Reforms Support,16,,...,"[{'code': '24', 'name': 'Other economic manage...",Republic of Tunisia,http://www.worldbank.org/projects/P144674?lang=en,IBRD,Active,0,"[{'code': 'BS', 'name': 'Public administration...",{'$oid': '52b213b38594d8a2be17c781'},1,Economic management
4,[{'Name': 'Rural and Inter-Urban Roads and Hig...,Y,IDA,PE,"[Trade and integration, Public sector governan...",6060000,MINISTRY OF TRANSPORT AND COMMUNICATIONS,Tuvalu Aviation Investment Project - Additiona...,52116,,...,"[{'code': '47', 'name': 'Regional integration'...",Tuvalu,http://www.worldbank.org/projects/P145310?lang=en,IBRD,Active,0,"[{'code': 'TI', 'name': 'Rural and Inter-Urban...",{'$oid': '52b213b38594d8a2be17c782'},5,Trade and integration
