## Importing required packages for the project

In [1]:
import pandas as pd
import json
from pandas.io.json import json_normalize

## Reading the JSON data

In [2]:
#Reading the JSON data as string
data=json.load((open('world_bank_projects.json'))) 

In [3]:
#Normalizing to read in the important columns
df=json_normalize(data,'mjtheme_namecode',['countryname'])

In [4]:
# Displaying the head
df.head()


Unnamed: 0,code,name,countryname
0,8,Human development,Federal Democratic Republic of Ethiopia
1,11,,Federal Democratic Republic of Ethiopia
2,1,Economic management,Republic of Tunisia
3,6,Social protection and risk management,Republic of Tunisia
4,5,Trade and integration,Tuvalu


In [5]:
# Checking for null values 
df.info()
# No Null Values Found

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1499 entries, 0 to 1498
Data columns (total 3 columns):
code           1499 non-null object
name           1499 non-null object
countryname    1499 non-null object
dtypes: object(3)
memory usage: 35.2+ KB


### Ans 1:Top 10 countries with most projects, alongside no. of projects running in each country.

In [10]:
# Displys Top 10 countries with most projects, alongside no. of projects running in each country.
df['countryname'].value_counts().head(10)


Republic of Indonesia              56
Republic of India                  51
Socialist Republic of Vietnam      43
People's Republic of Bangladesh    41
Federative Republic of Brazil      41
People's Republic of China         40
Africa                             39
Republic of Yemen                  34
Kingdom of Morocco                 32
Republic of Mozambique             31
Name: countryname, dtype: int64

### Ans 2: Top 10 major project themes (using column 'mjtheme_namecode') with missing project theme data

In [14]:
# Displays Top 10 major project themes with missing project theme data
df['name'].value_counts().head(10)


Environment and natural resources management    223
Rural development                               202
Human development                               197
Public sector governance                        184
Social protection and risk management           158
Financial and private sector development        130
                                                122
Social dev/gender/inclusion                     119
Trade and integration                            72
Urban development                                47
Name: name, dtype: int64

### Finding the Missing data

In [15]:
# Finding the Missing data. Finding code for every given project theme name
code_name={}
def det_name(row):
    if row['name']!= '':
        code_name[row['code']]=row['name']

        

In [16]:
# Applying the det_name function to populate the code_name dictionary, which associates the project_code 
#with the project theme name.
p=df.apply(det_name,axis=1)

In [17]:
# Displays the Project code along with corresponding Project theme name
print(sorted(code_name.items()))


[('1', 'Economic management'), ('10', 'Rural development'), ('11', 'Environment and natural resources management'), ('2', 'Public sector governance'), ('3', 'Rule of law'), ('4', 'Financial and private sector development'), ('5', 'Trade and integration'), ('6', 'Social protection and risk management'), ('7', 'Social dev/gender/inclusion'), ('8', 'Human development'), ('9', 'Urban development')]


In [18]:
# Defining function to fill in the Missing Project theme name data.
def put_name(x):
    if x['name']=='':
        x['name']=code_name[x['code']]
    return x
# Applying the function to fill in the missing data        
p=df.apply(put_name,axis=1)

In [19]:
#Renaming the Dataframe Columns.
df=df.rename(columns={'code':'Project_Code','name':'Project_Theme_Name','countryname':'Country_Name'})

### Ans 3: The dataframe with desired columns with missing values filled in.

In [20]:
# Outputing the dataframe with no missing data.
df.head(20)

Unnamed: 0,Project_Code,Project_Theme_Name,Country_Name
0,8,Human development,Federal Democratic Republic of Ethiopia
1,11,Environment and natural resources management,Federal Democratic Republic of Ethiopia
2,1,Economic management,Republic of Tunisia
3,6,Social protection and risk management,Republic of Tunisia
4,5,Trade and integration,Tuvalu
5,2,Public sector governance,Tuvalu
6,11,Environment and natural resources management,Tuvalu
7,6,Social protection and risk management,Tuvalu
8,7,Social dev/gender/inclusion,Republic of Yemen
9,7,Social dev/gender/inclusion,Republic of Yemen


###  Top 10 major project themes with FULL project theme data (No missing Data)

In [21]:
# Displaying Top 10 major project themes with FULL project theme data (No missing Data)
df['Project_Theme_Name'].value_counts().head(10)

Environment and natural resources management    250
Rural development                               216
Human development                               210
Public sector governance                        199
Social protection and risk management           168
Financial and private sector development        146
Social dev/gender/inclusion                     130
Trade and integration                            77
Urban development                                50
Economic management                              38
Name: Project_Theme_Name, dtype: int64