In [None]:
World Happiness Report Analysis

I'll create a series of data visualizations using the World Happiness Report dataset to uncover insights about global happiness trends and influencing factors.
Step 1: Load and Inspect Data
python

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
happiness_data = pd.read_csv('world_happiness_report.csv')

# Filter for year 2019
happiness_2019 = happiness_data[happiness_data['Year'] == 2019].copy()

# Display basic info
print(happiness_2019.info())
print(happiness_2019.head())

Step 2: Data Preprocessing
python

# Check for missing values
print(happiness_2019.isnull().sum())

# Handle missing values (fill with median for numerical columns)
numerical_cols = ['Happiness_Score', 'GDP_per_Capita', 'Social_Support', 
                 'Healthy_Life_Expectancy', 'Freedom_to_Make_Life_Choices', 
                 'Generosity', 'Perceptions_of_Corruption']

for col in numerical_cols:
    happiness_2019[col].fillna(happiness_2019[col].median(), inplace=True)

# Verify no more missing values
print(happiness_2019.isnull().sum())

Step 3: Social Support and Happiness Relationship
python

plt.figure(figsize=(10, 6))
sns.scatterplot(x='Social_Support', y='Happiness_Score', data=happiness_2019, 
                hue='Region', palette='viridis', s=100)
plt.title('Relationship Between Social Support and Happiness Score (2019)')
plt.xlabel('Social Support')
plt.ylabel('Happiness Score')
plt.grid(True)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

Step 4: Regional Comparisons of GDP and Health
python

# Calculate regional averages
regional_avg = happiness_2019.groupby('Region')[['GDP_per_Capita', 'Healthy_Life_Expectancy']].mean().reset_index()

# Create subplot
fig, ax = plt.subplots(figsize=(12, 7))

# Bar plot for GDP per Capita
ax.bar(regional_avg['Region'], regional_avg['GDP_per_Capita'], color='skyblue', alpha=0.7, label='GDP per Capita')

# Line plot for Healthy Life Expectancy on secondary y-axis
ax2 = ax.twinx()
ax2.plot(regional_avg['Region'], regional_avg['Healthy_Life_Expectancy'], 
         color='red', marker='o', linewidth=2, label='Healthy Life Expectancy')

# Customize the plot
ax.set_title('Regional Comparison of GDP per Capita and Healthy Life Expectancy (2019)')
ax.set_xlabel('Region')
ax.set_ylabel('GDP per Capita')
ax2.set_ylabel('Healthy Life Expectancy')
ax.tick_params(axis='x', rotation=45)

# Combine legends
lines, labels = ax.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2, loc='upper right')

plt.tight_layout()
plt.show()

Additional Insights (Bonus Visualization)
python

# Correlation heatmap of happiness factors
plt.figure(figsize=(10, 8))
corr_matrix = happiness_2019[numerical_cols].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Happiness Factors (2019)')
plt.show()