# Advanced Assignment: Data Visualization Dashboard

In this assignment, we'll create a comprehensive data visualization dashboard using Matplotlib to analyze and present insights from the World Happiness Report dataset.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LinearSegmentedColormap

%matplotlib inline
plt.style.use('default')

## 1. Data Preparation

In [2]:
# Load the data
df = pd.read_csv('World-Happiness-Report.csv')

# Display column names
print("Columns in the dataset:")
print(df.columns)

# Display the first few rows
print("\nFirst few rows of the dataset:")
print(df.head())

# Data cleaning and preprocessing
# (We'll adjust this part based on the actual structure of your data)

# Display the first few rows and data info
print("\nDataset info:")
print(df.info())


Columns in the dataset:
Index(['Country', 'Happiness Rank', 'Happiness Score', 'Economy', 'Family',
       'Health', 'Freedom', 'Generosity', 'Corruption', 'Dystopia',
       'Job Satisfaction', 'Region'],
      dtype='object')

First few rows of the dataset:
       Country  Happiness Rank  Happiness Score   Economy    Family    Health  \
0       Norway               1            7.537  1.616463  1.533524  0.796667   
1      Denmark               2            7.522  1.482383  1.551122  0.792566   
2      Iceland               3            7.504  1.480633  1.610574  0.833552   
3  Switzerland               4            7.494  1.564980  1.516912  0.858131   
4      Finland               5            7.469  1.443572  1.540247  0.809158   

    Freedom  Generosity  Corruption  Dystopia  Job Satisfaction  \
0  0.635423    0.362012    0.315964  2.277027              94.6   
1  0.626007    0.355280    0.400770  2.313707              93.5   
2  0.627163    0.475540    0.153527  2.322715       

In [3]:
# Load the data
df = pd.read_csv('World-Happiness-Report.csv')

# Display the first few rows and data info
print(df.head())
print(df.info())

       Country  Happiness Rank  Happiness Score   Economy    Family    Health  \
0       Norway               1            7.537  1.616463  1.533524  0.796667   
1      Denmark               2            7.522  1.482383  1.551122  0.792566   
2      Iceland               3            7.504  1.480633  1.610574  0.833552   
3  Switzerland               4            7.494  1.564980  1.516912  0.858131   
4      Finland               5            7.469  1.443572  1.540247  0.809158   

    Freedom  Generosity  Corruption  Dystopia  Job Satisfaction  \
0  0.635423    0.362012    0.315964  2.277027              94.6   
1  0.626007    0.355280    0.400770  2.313707              93.5   
2  0.627163    0.475540    0.153527  2.322715              94.5   
3  0.620071    0.290549    0.367007  2.276716              93.7   
4  0.617951    0.245483    0.382612  2.430182              91.2   

           Region  
0  Western Europe  
1  Western Europe  
2  Western Europe  
3  Western Europe  
4  Western

## 2. Create the Dashboard

In [None]:
# Set up the dashboard layout
fig = plt.figure(figsize=(20, 30))
gs = GridSpec(3, 2, figure=fig)

# Color scheme
colors = plt.cm.viridis(np.linspace(0, 1, 10))

# a. Bar Chart: Top 10 Happiest Countries
ax1 = fig.add_subplot(gs[0, 0])
top_10 = df.nlargest(10, 'Happiness Score')
bars = ax1.bar(top_10['Country'], top_10['Happiness Score'], color=colors)
ax1.set_title('Top 10 Happiest Countries', fontsize=16)
ax1.set_xlabel('Country', fontsize=12)
ax1.set_ylabel('Happiness Score', fontsize=12)
ax1.tick_params(axis='x', rotation=45)
for bar in bars:
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height, f'{height:.2f}', 
             ha='center', va='bottom')

# b. Scatter Plot: Economy vs. Happiness Score
ax2 = fig.add_subplot(gs[0, 1])
scatter = ax2.scatter(df['Economy'], df['Happiness Score'], c=df['Happiness Score'], cmap='viridis', alpha=0.6)
ax2.set_title('Economy vs. Happiness Score', fontsize=16)
ax2.set_xlabel('Economy', fontsize=12)
ax2.set_ylabel('Happiness Score', fontsize=12)
plt.colorbar(scatter, ax=ax2, label='Happiness Score')

# c. Box Plot: Distribution of Happiness Scores Across Regions
ax3 = fig.add_subplot(gs[1, 0])
df.boxplot(column=['Happiness Score'], by='Region', ax=ax3)
ax3.set_title('Distribution of Happiness Scores Across Regions', fontsize=16)
ax3.set_xlabel('Region', fontsize=12)
ax3.set_ylabel('Happiness Score', fontsize=12)
ax3.tick_params(axis='x', rotation=90)

# d. Heatmap: Correlation Between Factors
ax4 = fig.add_subplot(gs[1, 1])
factors = ['Happiness Score', 'Economy', 'Family', 'Health', 'Freedom', 'Generosity', 'Corruption']
corr = df[factors].corr()
im = ax4.imshow(corr, cmap='coolwarm')
ax4.set_xticks(np.arange(len(factors)))
ax4.set_yticks(np.arange(len(factors)))
ax4.set_xticklabels(factors, rotation=45, ha='right')
ax4.set_yticklabels(factors)
for i in range(len(factors)):
    for j in range(len(factors)):
        text = ax4.text(j, i, f"{corr.iloc[i, j]:.2f}",
                        ha="center", va="center", color="black")
ax4.set_title('Correlation Between Factors', fontsize=16)
plt.colorbar(im, ax=ax4)

# e. Pie Chart: Factor Importance for Happiest Country
ax5 = fig.add_subplot(gs[2, 0])
happiest_country = df.loc[df['Happiness Score'].idxmax()]
factors = ['Economy', 'Family', 'Health', 'Freedom', 'Generosity', 'Corruption']
values = happiest_country[factors]
ax5.pie(values, labels=factors, autopct='%1.1f%%', startangle=90, colors=colors)
ax5.set_title(f'Factor Importance for {happiest_country["Country"]}', fontsize=16)

# f. Scatter Plot: Freedom vs. Happiness Score
ax6 = fig.add_subplot(gs[2, 1])
scatter = ax6.scatter(df['Freedom'], df['Happiness Score'], c=df['Happiness Score'], cmap='viridis', alpha=0.6)
ax6.set_title('Freedom vs. Happiness Score', fontsize=16)
ax6.set_xlabel('Freedom', fontsize=12)
ax6.set_ylabel('Happiness Score', fontsize=12)
plt.colorbar(scatter, ax=ax6, label='Happiness Score')

plt.tight_layout()
plt.savefig('world_happiness_dashboard.png', dpi=300, bbox_inches='tight')
plt.show()

## 3. Interactivity

In [None]:
def on_click(event):
    if event.inaxes == ax1:
        for i, bar in enumerate(bars):
            if bar.contains(event)[0]:
                country = top_10.iloc[i]['Country']
                score = top_10.iloc[i]['Happiness Score']
                print(f"{country}: Happiness Score = {score:.2f}")

fig.canvas.mpl_connect('button_press_event', on_click)

# Note: This interactivity will only work in an interactive environment like Jupyter Notebook

## 4. Bonus Challenge

In [None]:
# Implement a custom colormap for Happiness Score distribution
happiness_cmap = LinearSegmentedColormap.from_list("happiness", ['#FF9999', '#FFFF99', '#99FF99'], N=100)
plt.figure(figsize=(15, 10))
plt.imshow(df.sort_values('Happiness Score')[['Country', 'Happiness Score']].set_index('Country'), 
           cmap=happiness_cmap, aspect='auto')
plt.colorbar(label='Happiness Score')
plt.title('Happiness Scores by Country')
plt.xlabel('Happiness Score Rank')
plt.ylabel('Country')
plt.show()

## 5. Conclusion

This dashboard provides a comprehensive visualization of the World Happiness Report data. It includes various types of plots to showcase different aspects of the data, including comparisons between countries, correlations between factors, and the distribution of happiness scores across regions. The interactive elements and bonus visualization offer additional insights into the complex relationships within the dataset.