# WorldWide happiness Analysis with Python and Power BI

## Problem Identification and Overview

## Methodology

## Define Requirements

## Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns 
import matplotlib
import plotly.express as px


%matplotlib inline
plt.style.use('ggplot')
plt.style.use('seaborn')


## Configuring pandas

In [29]:
## configure Pandas to display all columns

pd.set_option("display.max.columns", None)

In [30]:
## change columns to maximum of 2 decimal places

pd.set_option("display.precision", 2)

## Importing the dataset

In [12]:
## reading data from the dataset in csv format.

co2_emission = pd.read_csv('C:/Users/Lenovo/Desktop/A4-Main/WorldWide happiness report/worldwide_happiness_report.csv')

## Understanding of DataSet

In [23]:
# check the length

len(co2_emission)

156

In [14]:
# check out the dimension of the dataset

co2_emission.shape

(156, 9)

In [26]:
# look at the data types for each column

co2_emission.dtypes

Overall rank                      int64
Country or region                object
Score                           float64
GDP per capita                  float64
Social support                  float64
Healthy life expectancy         float64
Freedom to make life choices    float64
Generosity                      float64
Perceptions of corruption       float64
dtype: object

In [16]:
# read the first five rows

co2_emission.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [17]:
# read the last five rows

co2_emission.tail()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
151,152,Rwanda,3.334,0.359,0.711,0.614,0.555,0.217,0.411
152,153,Tanzania,3.231,0.476,0.885,0.499,0.417,0.276,0.147
153,154,Afghanistan,3.203,0.35,0.517,0.361,0.0,0.158,0.025
154,155,Central African Republic,3.083,0.026,0.0,0.105,0.225,0.235,0.035
155,156,South Sudan,2.853,0.306,0.575,0.295,0.01,0.202,0.091


In [18]:
# return an array of column names

co2_emission.columns.values  

array(['Overall rank', 'Country or region', 'Score', 'GDP per capita',
       'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption'], dtype=object)

In [21]:
# return a list of column names

co2_emission.columns.values.tolist() 

['Overall rank',
 'Country or region',
 'Score',
 'GDP per capita',
 'Social support',
 'Healthy life expectancy',
 'Freedom to make life choices',
 'Generosity',
 'Perceptions of corruption']

In [33]:
# check data set information

co2_emission.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 9 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Overall rank                  156 non-null    int64  
 1   Country or region             156 non-null    object 
 2   Score                         156 non-null    float64
 3   GDP per capita                156 non-null    float64
 4   Social support                156 non-null    float64
 5   Healthy life expectancy       156 non-null    float64
 6   Freedom to make life choices  156 non-null    float64
 7   Generosity                    156 non-null    float64
 8   Perceptions of corruption     156 non-null    float64
dtypes: float64(7), int64(1), object(1)
memory usage: 11.1+ KB


In [32]:
# describe the data set

co2_emission.describe()

Unnamed: 0,Overall rank,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
count,156.0,156.0,156.0,156.0,156.0,156.0,156.0,156.0
mean,78.5,5.41,0.91,1.21,0.73,0.39,0.18,0.11
std,45.18,1.11,0.4,0.3,0.24,0.14,0.1,0.09
min,1.0,2.85,0.0,0.0,0.0,0.0,0.0,0.0
25%,39.75,4.54,0.6,1.06,0.55,0.31,0.11,0.05
50%,78.5,5.38,0.96,1.27,0.79,0.42,0.18,0.09
75%,117.25,6.18,1.23,1.45,0.88,0.51,0.25,0.14
max,156.0,7.77,1.68,1.62,1.14,0.63,0.57,0.45


In [31]:
## describe all data types, not limiting to numeric (floats) columns

co2_emission.describe(include=object)

Unnamed: 0,Country or region
count,156
unique,156
top,Nicaragua
freq,1


## Sanity Checks and Resolution

### checking missing values

In [52]:
# checking missing values in whole data set

co2_emission.isnull()  

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...
151,False,False,False,False,False,False,False,False,False
152,False,False,False,False,False,False,False,False,False
153,False,False,False,False,False,False,False,False,False
154,False,False,False,False,False,False,False,False,False


In [51]:
# checking non-missing values in Score column

co2_emission["Score"].isnull().sum()

0

In [45]:
# only want to know if there are any missing values

co2_emission.isnull().any()

Overall rank                    False
Country or region               False
Score                           False
GDP per capita                  False
Social support                  False
Healthy life expectancy         False
Freedom to make life choices    False
Generosity                      False
Perceptions of corruption       False
dtype: bool

In [47]:
co2_emission.isnull().sum() 

Overall rank                    0
Country or region               0
Score                           0
GDP per capita                  0
Social support                  0
Healthy life expectancy         0
Freedom to make life choices    0
Generosity                      0
Perceptions of corruption       0
dtype: int64

## Checking for Duplicates

## Exploratory Data Analysis

## Communicate Results

## Interpreting the Results / Recommendations

## Generating Reports/Story/Insights/

## Conclusion