In [5]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [15]:
def read_world_bank_data(filename):
    # Read CSV file into a Pandas dataframe
    df = pd.read_csv(filename)
    
    # Remove unnecessary columns
    df = df.drop(columns=['Indicator Code', 'Unnamed: 66', 'Country Code'])
    
    # Rename the columns for convenience
    df = df.rename(columns={'Country Name': 'country_name',
                            'Indicator Name': 'indicator_name'})
    
    # drop column with more than 75% missing values
    cols_to_trim = remove_missing_features(df)
    df = df.drop(columns=cols_to_trim)
    
    # take mean by row where values are NaN
    numeric_cols = df.select_dtypes(include='number').columns
    df[numeric_cols] = df[numeric_cols].apply(lambda x: x.fillna(x.mean()), axis=1)
    
    # Melt the dataframe to get years as rows
    df_melted = df.melt(id_vars=['country_name', 'indicator_name'],
                        var_name='year', value_name='value')
    
    # Pivot the dataframe to have years as columns
    df_years = df_melted.pivot(index=['country_name', 'indicator_name'],
                               columns='year', values='value')
        
    # Reset the index
    df_years = df_years.reset_index()
    
    # Pivot the dataframe to have countries as columns
    df_countries = df_melted.pivot(index=['indicator_name', 'year'],
                                   columns='country_name', values='value')

    
    # Reset the index
    df_countries = df_countries.reset_index()
    
    return df_years, df_countries


In [16]:
# Plot
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3)
fig.set_figheight(10)
fig.set_figwidth(15)

# Subplot 1
ax1 = all_data[all_data['Urban population (% of total)']>0][['Urban population (% of total)', 'Year']].groupby('Year').mean().plot(y='Urban population (% of total)', style='.-', color='b', ax=ax1, label='World')
all_data[all_data['Country Name']=='France'][['Urban population (% of total)', 'Year']].sort_values(by=['Year'], ascending=True).plot(x='Year', y='Urban population (% of total)', style='.-', color='purple', ax=ax1, label='France')
all_data[all_data['Country Name']=='Niger'][['Urban population (% of total)', 'Year']].sort_values(by=['Year'], ascending=True).plot(x='Year', y='Urban population (% of total)', style='.-', color='g', ax=ax1, label='Niger')
all_data[all_data['Country Name']=='Thailand'][['Urban population (% of total)', 'Year']].sort_values(by=['Year'], ascending=True).plot(x='Year', y='Urban population (% of total)', style='.-', color='orange', ax=ax1, label='Thailand')
all_data[all_data['Country Name']=='Russian Federation'][['Urban population (% of total)', 'Year']].sort_values(by=['Year'], ascending=True).plot(x='Year', y='Urban population (% of total)', style='.-', color='r', ax=ax1, label='Russian Federation')
all_data[all_data['Country Name']=='Colombia'][['Urban population (% of total)', 'Year']].sort_values(by=['Year'], ascending=True).plot(x='Year', y='Urban population (% of total)', style='.-', color='yellow', ax=ax1, label='Colombia')

ax1.set_title("Percentage of urban population", fontsize=14)
ax1.set_xlabel("Year", fontsize=12)
ax1.set_ylabel("Fraction of population", fontsize=12)

# Subplot 2
ax2 = all_data[all_data['Country Name']=='World'][['Urban population growth (annual %)', 'Year']].plot(y = 'Urban population growth (annual %)', style='.-', ax=ax2, legend=False)
ax2.set_title("Urban population growth (annual %)", fontsize=14)
ax2.set_xlabel("Year", fontsize=12)
ax2.set_ylabel("Population growth (annual %)", fontsize=12)

#Subplot 3
ax3 = all_data[all_data['Country Name']=='World'][['Rural population growth (annual %)', 'Year']].plot(y='Rural population growth (annual %)', style='.-', ax=ax3, legend=False)
ax3.set_title("Rural population growth (annual %)", fontsize=14)
ax3.set_xlabel("Year", fontsize=12)
ax3.set_ylabel("Population growth (annual %)", fontsize=12)

#Subplot 4
ax4 = all_data[(all_data['Urban population (% of total)']>0) & (all_data['gdp_per_capita ($)']>0) & (all_data['Year']==2015)][['Urban population (% of total)', 'gdp_per_capita ($)']].plot(x='gdp_per_capita ($)', y='Urban population (% of total)', style='.', ax=ax4, label='2015', legend=True)
ax4.set_title("Urban population by gdp", fontsize=14)
ax4.set_xlabel("gdp_per_capita ($)", fontsize=12)
ax4.set_ylabel("Urban population (% of total)", fontsize=12)
#ax4.set_xscale('log')

#Subplot 5
#ax5 = all_data[(all_data['Urban population (% of total)']>0) & (all_data['Life_expectancy_at_birth']>0) & (all_data['Year']==2015)][['Urban population (% of total)', 'Life_expectancy_at_birth']].plot(x='Urban population (% of total)', y='Life_expectancy_at_birth', style='.', ax=ax5, label='2015', legend=True)
data = all_data[(all_data['Urban population (% of total)']>0) & (all_data['Life_expectancy_at_birth']>0) & (all_data['Year']==2015)][['Urban population (% of total)', 'Life_expectancy_at_birth']]
sns.regplot(x='Urban population (% of total)',y='Life_expectancy_at_birth', data=data, label='2015', marker='.', fit_reg=True, ax=ax5) 
ax5.set_title("Life expectancy by urban/rural population", fontsize=14)
ax5.set_xlabel("Urban population (% of total)", fontsize=12)
ax5.set_ylabel("Life_expectancy_at_birth", fontsize=12)

#Subplot 6
#ax6 = all_data[(all_data['Urban population (% of total)']>0) & (all_data['Fertility_rate']>0) & (all_data['Year']==2015)][['Urban population (% of total)', 'Fertility_rate']].plot(x='Urban population (% of total)', y='Fertility_rate', style='.', ax=ax6, label='2015', legend=True)
data = all_data[(all_data['Urban population (% of total)']>0) & (all_data['Fertility_rate']>0) & (all_data['Year']==2015)][['Urban population (% of total)', 'Fertility_rate']]
sns.regplot(x='Urban population (% of total)',y='Fertility_rate', data=data, label='2015', marker='.', fit_reg=True, ax=ax6) 
ax6.set_title("Fertility rate by urban/rural population", fontsize=14)
ax6.set_xlabel("Urban population (% of total)", fontsize=12)
ax6.set_ylabel("Fertility_rate", fontsize=12)


plt.tight_layout()
plt.savefig("urban_rural.png")
plt.show()