# Importing Necessary Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Loading the Datasets

In [None]:
countries = pd.read_csv("OneDrive/Desktop/World_Bank/Metadata_Country.csv")
population = pd.read_csv("OneDrive/Desktop/World_Bank/country_population.csv")
fertility_rate = pd.read_csv("OneDrive/Desktop/World_Bank/fertility_rate.csv")
life_expectancy = pd.read_csv("OneDrive/Desktop/World_Bank/life_expectancy.csv")

# Data Cleaning / Data Transformation

In [None]:
countries.head()

In [None]:
population.head()

In [None]:
fertility_rate.head()

In [None]:
life_expectancy.head()

In [None]:
countries.info()

In [None]:
countries.isna().sum()

In [None]:
countries = countries[['Country Code','Region']]

In [None]:
countries.shape

In [None]:
countries.head()

In [None]:
countries.isna().sum()

In [None]:
#Handling Missing Values ---> Using Pandas Fillna
countries.Region.fillna(method = 'ffill', inplace = True )
countries

In [None]:
countries.rename(columns={'Country Code': 'country_code'}, inplace=True)

In [None]:
fertility_rate.isna().sum()

In [None]:
population.isna().sum()

In [None]:
life_expectancy.isna().sum()

In [None]:
life_expectancy.columns

In [None]:
life_expectancy.columns = life_expectancy.columns.str.lower().str.replace(' ', '_')
life_expectancy.columns

In [None]:
life_expectancy.drop(columns = ["indicator_name", "indicator_code"], axis =1, inplace = True)
life_expectancy

In [None]:
life_expectancy.shape

In [None]:
#Using PANDAS MELT FUNCTION ---> To convert the years columns to rows.

#Declaring all years column
years = [str(i) for i in range(1960,2017)]

#pd.melt Function
life_expectancy = pd.melt(life_expectancy, id_vars = 'country_code', value_vars = years, var_name = 'year', value_name = 'life_Expectancy')

In [None]:
life_expectancy.shape

In [None]:
life_expectancy.head()

In [None]:
life_expectancy.dropna(axis=0, inplace=True)

In [None]:
life_expectancy.isnull().sum()

In [None]:
population.columns = population.columns.str.lower().str.replace(' ', '_')
population.columns

In [None]:
population.drop(columns = ["indicator_name", "indicator_code"], axis =1, inplace = True)
population

In [None]:
#Using PANDAS MELT FUNCTION ---> To convert the years columns to rows.

#Declaring all years column
years = [str(i) for i in range(1960,2017)]

#pd.melt Function
df1_population = pd.melt(population, id_vars = ['country_code'] , value_vars = years, var_name = 'year', value_name = 'population')

In [None]:
df1_population.shape

In [None]:
df1_population.head()

In [None]:
#Checking if there any NaN value is there in Country_Population dataset
df1_population.isnull().sum()

In [None]:
df1_population.dropna(axis=0, inplace=True)

In [None]:
df1_population.isnull().sum()

In [None]:
df1_population.info()

In [None]:
fertility_rate.info()

In [None]:
fertility_rate.columns

In [None]:
#Converting the Columns Title into lowercase and replacing blank to underscore
fertility_rate.columns = fertility_rate.columns.str.lower().str.replace(' ', '_')
fertility_rate.columns

In [None]:
#Dropping the two columns that is "INDICATOR_NAME", "INDICATOR_CODE"
fertility_rate.drop(columns = ["indicator_name", "indicator_code"], axis =1, inplace = True)
fertility_rate

In [None]:
fertility_rate.isnull().sum()

In [None]:
fertility_rate.shape

In [None]:
#Using PANDAS MELT FUNCTION ---> To convert the years columns to rows.

#Declaring all years column
years = [str(i) for i in range(1960,2017)]

#pd.melt Function
fertility_rate = pd.melt(fertility_rate, id_vars = 'country_code', value_vars = years, var_name = 'year', value_name = 'fertility_rate')

In [None]:
fertility_rate.head()

In [None]:
#Checking if there any NaN value is there in FERTILITY_RATE dataset
fertility_rate.isnull().sum()

In [None]:
fertility_rate.dropna(axis=0, inplace=True)

In [None]:
fertility_rate.info

In [None]:
merged = pd.merge(df1_population, countries, on = ['country_code'], how = 'left')

In [None]:
merged.head()

In [None]:
merged.shape

In [None]:
merged_2 = pd.merge(merged, life_expectancy, on = ['country_code', 'year'], how = 'left')

In [None]:
merged_2.head()

In [None]:
merged_2.shape

In [None]:
final_Merge = pd.merge(merged_2, fertility_rate, on = ['country_code', 'year'], how = 'left')

In [None]:
final_Merge.head()

In [None]:
final_Merge.shape

# Data Visualization

In [None]:
final_Merge['Region'].value_counts().plot(kind = 'pie')

In [None]:
final_Merge['Region'].value_counts().plot(kind = 'bar')

In [None]:
populationTrends_max = final_Merge['population'].max()
year = final_Merge['year']
countryPopulation = final_Merge['population']

print(len('Year'))
print(len('countryPopulation'))

In [None]:
plt.plot(year, countryPopulation, label='Population Line', linewidth = 0.8, linestyle  = "--", color = 'y')
plt.title('Population Trends Over Time', fontsize = 15)
plt.ylabel('Population', fontsize = 13)
plt.xlabel('Year', fontsize = 13)
plt.legend()
plt.show()

In [None]:
fertility_Rate = final_Merge['fertility_rate']

In [None]:
plt.hist(fertility_Rate, label='Decrement in Fertility Rate', rwidth= 0.8, histtype = 'bar', orientation = 'horizontal', color = 'g')
plt.title("Fertility Rate Distribution", fontsize = 15)
plt.xlabel("Fertility Rate", fontsize = 13)
plt.legend()
plt.show()

In [None]:
Life_Expectancy = final_Merge['life_Expectancy']
plt.hist(Life_Expectancy, label='Variation in life expectancy', histtype = 'step', color = 'b')
plt.title("Life Expectancy Variation", fontsize = 15)
plt.xlabel("Life Expectancy", fontsize = 13)
plt.legend()
plt.show()

In [None]:
px.scatter(final_Merge,
           x="fertility_rate",
           y="life_Expectancy",
           animation_frame="year",
           animation_group="country_code",
           size="population",
           color="Region",
           hover_name="country_code",
           log_x=True,
           size_max=55,
           range_x=[1,10],
           range_y=[10,100])