In [None]:
#Import Libarary That needed. 
import numpy as np #library to precess arrays.
import pandas as pd #library to data analyst
import matplotlib.pyplot as plt #library to visualization
import seaborn as sns #library to visualization that more beautiful than pyplot
import plotly.express as px #library to visualization so have variant of kind visualization
from sklearn.feature_extraction.text import TfidfVectorizer #to vectorize strings and collect it if have same value.
from sklearn.metrics.pairwise import linear_kernel #To see correlation between vectors based on linear.
import geopandas #Ploting map data
import pycountry #Getting countries code

In [None]:
#Open data in csv format dan print 5 head of dataset.
data = pd.read_csv('../input/world-happiness-report-2021/world-happiness-report-2021.csv')
print(data.head())

In [None]:
#See shape of dataset.
data.shape

In [None]:
#See information of new dataset. There is 20 columns that counted starting from 0.
data.info()

# Plot Dystopia Country Distribution

In [None]:
data.isnull().sum() #Find non value in every column.

In [None]:
#Making code for contries to get latitude and logitude.
def alpha3code(column):
    CODE=[]
    for country in column:
        try:
            code=pycountry.countries.get(name=country).alpha_3
           # .alpha_3 means 3-letter country code 
           # .alpha_2 means 2-letter country code
            CODE.append(code)
        except:
            CODE.append('None')
    return CODE
# create a column for code 
data['CODE']=alpha3code(data['Country name'])
data.head()

In [None]:
# getting latitude and longitude for mapping data.
# 'naturalearth_lowres' is geopandas datasets so we can use it directly
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
# rename the columns so that we can merge with our data
world.columns=['pop_est', 'continent', 'name', 'CODE', 'gdp_md_est', 'geometry']
# then merge with our data 
merge=pd.merge(world,data,on='CODE')
# last thing we need to do is - merge again with our location data which contains each country’s latitude and longitude
location=pd.read_csv('https://raw.githubusercontent.com/melanieshi0120/COVID-19_global_time_series_panel_data/master/data/countries_latitude_longitude.csv')
merge=merge.merge(location,on='name').sort_values(by='Country name',ascending=False).reset_index()

In [None]:
# plot population world distribution map. More lightening plot, less dystopia population. 
merge.plot(column='Dystopia + residual', scheme='quantiles', cmap='OrRd', legend=True,
           figsize=(25, 20))
plt.title('Dystopia by Country ',fontsize=25)
plt.show()

# Plot Ladder Continent Rank

In [None]:
#Eventhought Sub Sahara Africa has number 1, but it because the number of countries in Sub Sahara Africa is more higher than other contients. Basically, more darker plot is more higher in ladder rank.
data['Ladder_score'] = data['Ladder score'] - data['Standard error of ladder score']
data_ladder_sorted = data.sort_values('Ladder_score', ascending = False)

# Plot the bar figure
fig = px.bar(data_ladder_sorted,
              x = 'Regional indicator',
              y = 'Ladder_score',
              color='Ladder_score',
              hover_name = 'Regional indicator',
              hover_data = ['Ladder_score'],
              color_continuous_scale = 'Peach',
              labels={'Regional indicator':'Countinent','Ladder_score':'Ladder rank'},
              height=500)
fig.update_layout(uniformtext_minsize = 15,
                  xaxis_tickangle = -45,
                  title = 'Ladder Continent Rank',
                  title_x = 0.5)

# Make background transparent
fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)', 'paper_bgcolor': 'rgba(0, 0, 0, 0)'})
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})

# Hide color scale axis
fig.update(layout_coloraxis_showscale=False)

# Show the figure
fig.show()

# Plot Indicator Happines Each Continent

In [None]:
#Making dataframe that show only selected regional so indicator will be based on this regional. 
data_Africa = data[data['Regional indicator'] == 'Sub-Saharan Africa']
# Choose only certain column that I think there are important to analyzed.
cols = ['Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']
data_Africa = data_Africa[cols]
data_Africa.sum().plot(kind = 'barh')
plt.title('Indicator in Sub-Saharan Africa')
plt.show()

In [None]:

data_West_Europe = data[data['Regional indicator'] == 'Western Europe']

cols = ['Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']
data_West_Europe = data_West_Europe[cols]
data_West_Europe.sum().plot(kind = 'barh')
plt.title('Indicator in Western Europe')
plt.show()

In [None]:

data_America_Caribean = data[data['Regional indicator'] == 'Latin America and Caribbean']

cols = ['Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']
data_America_Caribean = data_America_Caribean[cols]
data_America_Caribean.sum().plot(kind = 'barh')
plt.title('Indicator in Latin America and Caribbean')
plt.show()

In [None]:

data_Central_Eastern_Europe = data[data['Regional indicator'] == 'Central and Eastern Europe']

cols = ['Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']
data_Central_Eastern_Europe = data_Central_Eastern_Europe[cols]
data_Central_Eastern_Europe.sum().plot(kind = 'barh')
plt.title('Indicator in Central and Eastern Europe')
plt.show()

In [None]:

data_East_North_Africa = data[data['Regional indicator'] == 'Middle East and North Africa']

cols = ['Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']
data_East_North_Africa = data_East_North_Africa[cols]
data_East_North_Africa.sum().plot(kind = 'barh')
plt.title('Indicator in Middle East and North Africa')
plt.show()

In [None]:

data_Commonwealth_States = data[data['Regional indicator'] == 'Commonwealth of Independent States']

cols = ['Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']
data_Commonwealth_States = data_Commonwealth_States[cols]
data_Commonwealth_States.sum().plot(kind = 'barh')
plt.title('Indicator in Commonwealth of Independent States')
plt.show()

In [None]:

data_Southeast_Asia = data[data['Regional indicator'] == 'Southeast Asia']

cols = ['Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']
data_Southeast_Asia = data_Southeast_Asia[cols]
data_Southeast_Asia.sum().plot(kind = 'barh')
plt.title('Indicator in Southeast Asia')
plt.show()

In [None]:

data_East_Asia = data[data['Regional indicator'] == 'East Asia']

cols = ['Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']
data_East_Asia = data_East_Asia[cols]
data_East_Asia.sum().plot(kind = 'barh')
plt.title('Indicator in East Asia')
plt.show()

In [None]:

data_South_Asia = data[data['Regional indicator'] == 'South Asia']

cols = ['Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']
data_South_Asia = data_South_Asia[cols]
data_South_Asia.sum().plot(kind = 'barh')
plt.title('Indicator in South Asia')
plt.show()

In [None]:

data_North_America_ANZ = data[data['Regional indicator'] == 'North America and ANZ']

cols = ['Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']
data_North_America_ANZ = data_North_America_ANZ[cols]
data_North_America_ANZ.sum().plot(kind = 'barh')
plt.title('Indicator in North America and ANZ')
plt.show()