# World Happiness Report Analysis Pipeline
This notebook implements a comprehensive analysis of the World Happiness Report dataset(kaggle),including data processing, visualization, and statistical analysis.


# Imports


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import plotly.express as px
import plotly.graph_objects as go
from typing import List, Tuple, Dict
from google.colab import drive
import os
drive.mount('/content/drive')

Mounted at /content/drive


Handling analysis of World Happiness Report data.

In [5]:
class HappinessAnalyser:

  def __init__(self, data_path: str):
    self.data_path = data_path
    self.df = None
    self.processed_df = None

  def load_and_clean_data(self) -> pd.DataFrame :
    self.df = pd.read_csv(self.data_path , encoding= 'utf-8')
    #cleaning column names
    self.df.columns = [col.lower().replace(' ' , '_') for col in self.df.columns]
    if 'country' in self.df.columns:
            self.df['country'] = self.df['country'].astype(str)
    #Handling missing values
    numeric_columns = self.df.select_dtypes(include=[np.number]).columns
    self.df[numeric_columns] = self.df[numeric_columns].fillna(self.df[numeric_columns].mean())
    self.processed_df = self.df.copy()

    return self.processed_df

  #performing data analysis and visualisation
  def perform_eda(self) -> Dict [str , plt.Figure]:
    figures = {}
    fig_dist = plt.figure (figsize=(10,6))
    sns.histplot( data = self.processed_df, x = 'happiness_score', bins=30)
    plt.title('Distribution of Happiness Scores')
    figures['Happiness_distribution'] = fig_dist

    #correlation heatmap
    fig_corr = plt.figure(figsize=(10,6))
    numeric_cols = self.processed_df.select_dtypes(include=[np.number]).columns
    sns.heatmap(self.processed_df[numeric_cols].corr(), annot=True, cmap='coolwarm')
    plt.title('Correlation Heatmap')
    figures['Correlation_matrix'] = fig_corr

    #Top 10 happiest countries
    fig_top = plt.figure(figsize=(12,6))
    top_10= self.processed_df.nlargest(10, 'happiness_score')
    sns.barplot(data=top_10, x='country', y='happiness_score')
    plt.xticks(rotation = 45)
    plt.title('Top 10 happiest countries')
    figures['top_countries'] = fig_top

    return figures

  def create_interactive_dashboard(self) -> None:
    viz_df = self.processed_df.copy()

    fig_map = px.choropleth(
        viz_df,
        locations='country_or_region',
        locationmode='country names',
        color='score',
        hover_data=[
            'overall_rank',
            'gdp_per_capita',
            'social_support',
            'healthy_life_expectancy'
        ],
        color_continuous_scale='Viridis',
        color_continuous_midpoint=viz_df['score'].mean(),
        title='World Happiness Map (2019)'
    )

    fig_map.update_layout(
        title_x=0.5,
        geo=dict(showframe=False),
        width=1000,
        height=600,
        coloraxis_colorbar=dict(
            title='Happiness Score'
        )
    )

    # Saving the interactive map
    fig_map.write_html('world_happiness_map2.html')
    dimensions = [
        'score',
        'gdp_per_capita',
        'social_support',
        'healthy_life_expectancy'
    ]

    fig_scatter = px.scatter_matrix(
        viz_df,
        dimensions=dimensions,
        title='Relationships between Key Happiness Factors',
        labels={
            'score': 'Happiness Score',
            'gdp_per_capita': 'GDP per Capita',
            'social_support': 'Social Support',
            'healthy_life_expectancy': 'Healthy Life Expectancy'
        }
    )

    fig_scatter.update_layout(
        title_x=0.5,
        width=1000,
        height=1000,
        showlegend=True
    )

    fig_scatter.write_html('scatter_matrix2.html')

  def perform_statistical_analysis(self) -> Dict[str, float] :
    stats_results = {}
    #Basic statistics
    stats_results['mean_happiness'] = self.processed_df['happiness_score'].mean()
    stats_results['median_happiness'] = self.processed_df['happiness_score'].median()
    stats_results['std_happiness'] = self.processed_df['happiness_score'].std()

    #correlation analysis
    gdp_correlation = stats.pearsonr( self.processed_df['happiness_score'], self.processed_df['gdp_per_capita'])
    stats_results['gdp_correlation'] = gdp_correlation[0]
    stats_results['gdp_pvalue'] = gdp_correlation[1]

    return stats_results



In [6]:
if __name__ == '__main__':
  analyser = HappinessAnalyser('/content/drive/MyDrive/Model_datasets/Happy_dataset/2018.csv')
  analyser.load_and_clean_data()
  analyser.create_interactive_dashboard()
