In [1]:
import pandas as pd

In [2]:
from collections import Counter

In [3]:
# Read in JSON data
wb_df = pd.read_json('data/world_bank_projects.json')

# Find top 10 countries with most projects
wb_df['countryname'].value_counts().head(10)

People's Republic of China         19
Republic of Indonesia              19
Socialist Republic of Vietnam      17
Republic of India                  16
Republic of Yemen                  13
Nepal                              12
People's Republic of Bangladesh    12
Kingdom of Morocco                 12
Republic of Mozambique             11
Africa                             11
Name: countryname, dtype: int64

In [4]:
wb_df[['countryname','mjtheme_namecode']].head(10)

Unnamed: 0,countryname,mjtheme_namecode
0,Federal Democratic Republic of Ethiopia,"[{'code': '8', 'name': 'Human development'}, {..."
1,Republic of Tunisia,"[{'code': '1', 'name': 'Economic management'},..."
2,Tuvalu,"[{'code': '5', 'name': 'Trade and integration'..."
3,Republic of Yemen,"[{'code': '7', 'name': 'Social dev/gender/incl..."
4,Kingdom of Lesotho,"[{'code': '5', 'name': 'Trade and integration'..."
5,Republic of Kenya,"[{'code': '6', 'name': 'Social protection and ..."
6,Republic of India,"[{'code': '2', 'name': 'Public sector governan..."
7,People's Republic of China,"[{'code': '11', 'name': 'Environment and natur..."
8,Republic of India,"[{'code': '10', 'name': 'Rural development'}, ..."
9,Kingdom of Morocco,"[{'code': '2', 'name': 'Public sector governan..."


In [5]:
# Find top 10 major project themes
mjtheme = wb_df['mjtheme_namecode']

ct_code = Counter()
ct_name = Counter()

for theme_list in mjtheme:
    for theme in theme_list:
        tc = theme['code']
        tn = theme['name']
        ct_code[tc] += 1
        ct_name[tn] += 1

#Show most common according to code number
ct_code.most_common(10)

[('11', 250),
 ('10', 216),
 ('8', 210),
 ('2', 199),
 ('6', 168),
 ('4', 146),
 ('7', 130),
 ('5', 77),
 ('9', 50),
 ('1', 38)]

In [6]:
#Show most common according to name
ct_name.most_common(10)

[('Environment and natural resources management', 223),
 ('Rural development', 202),
 ('Human development', 197),
 ('Public sector governance', 184),
 ('Social protection and risk management', 158),
 ('Financial and private sector development', 130),
 ('', 122),
 ('Social dev/gender/inclusion', 119),
 ('Trade and integration', 72),
 ('Urban development', 47)]

In [7]:
# Notice empty '' values for name
# Create dictionary of theme codes to theme names to match empty values
theme_name = {}
cnt = 0
for theme_list in mjtheme:    
    for th in theme_list:
        if th['code'] not in theme_name and th['name'] != "":
            theme_name[th['code']] = th['name']
            cnt += 1           
    if cnt > 12:
        break
theme_name

{'8': 'Human development',
 '1': 'Economic management',
 '6': 'Social protection and risk management',
 '5': 'Trade and integration',
 '2': 'Public sector governance',
 '11': 'Environment and natural resources management',
 '7': 'Social dev/gender/inclusion',
 '4': 'Financial and private sector development',
 '10': 'Rural development',
 '9': 'Urban development',
 '3': 'Rule of law'}

In [8]:
# Replace empty string values with corresponding names of code numbers
for theme_list in mjtheme:
    for theme in theme_list:
        if theme['name'] == '':
            theme['name'] = theme_name[theme['code']]

mjtheme.head()

0    [{'code': '8', 'name': 'Human development'}, {...
1    [{'code': '1', 'name': 'Economic management'},...
2    [{'code': '5', 'name': 'Trade and integration'...
3    [{'code': '7', 'name': 'Social dev/gender/incl...
4    [{'code': '5', 'name': 'Trade and integration'...
Name: mjtheme_namecode, dtype: object

In [9]:
# Replace original dataframe column with modified series
wb_df['mjtheme_namecode'] = mjtheme

wb_df[['countryname', 'mjtheme_namecode']].head()

Unnamed: 0,countryname,mjtheme_namecode
0,Federal Democratic Republic of Ethiopia,"[{'code': '8', 'name': 'Human development'}, {..."
1,Republic of Tunisia,"[{'code': '1', 'name': 'Economic management'},..."
2,Tuvalu,"[{'code': '5', 'name': 'Trade and integration'..."
3,Republic of Yemen,"[{'code': '7', 'name': 'Social dev/gender/incl..."
4,Kingdom of Lesotho,"[{'code': '5', 'name': 'Trade and integration'..."


In [None]:
#Done