<a href="https://colab.research.google.com/github/Dineshkarthi006/data-science/blob/main/DSplots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('happiness_2017.csv')

# Clean the data: remove rows where Region or HappinessScore is null
df_clean = df.dropna(subset=['Region', 'HappinessScore'])

# 1. Descriptive Statistics
# Generates a summary of mean, std, min, max, and quartiles for all numeric columns
descriptive_stats = df_clean.describe()
descriptive_stats.to_csv('descriptive_statistics.csv')

# 2. Histogram: Distribution of Happiness Score
plt.figure(figsize=(10, 6))
sns.histplot(df_clean['HappinessScore'], kde=True, color='skyblue')
plt.title('Distribution of Happiness Score (2017)')
plt.xlabel('Happiness Score')
plt.ylabel('Frequency')
plt.savefig('happiness_distribution.png')
plt.close()

# 3. Bar Chart: Average Happiness Score by Region
plt.figure(figsize=(12, 6))
avg_happiness_region = df_clean.groupby('Region')['HappinessScore'].mean().sort_values(ascending=False)
sns.barplot(x=avg_happiness_region.values, y=avg_happiness_region.index, palette='viridis')
plt.title('Average Happiness Score by Region')
plt.xlabel('Average Happiness Score')
plt.ylabel('Region')
plt.savefig('avg_happiness_by_region.png')
plt.close()

# 4. Scatter Plot: Happiness Score vs. Log GDP per capita
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df_clean, x='Log GDP per capita', y='HappinessScore', hue='Region', alpha=0.7)
plt.title('Happiness Score vs. Log GDP per capita')
plt.xlabel('Log GDP per capita')
plt.ylabel('Happiness Score')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig('happiness_vs_gdp.png')
plt.close()

# 5. Box Plot: Happiness Score by Region
plt.figure(figsize=(12, 6))
sns.boxplot(x='HappinessScore', y='Region', data=df_clean, palette='Set3')
plt.title('Distribution of Happiness Score by Region')
plt.xlabel('Happiness Score')
plt.ylabel('Region')
plt.savefig('happiness_boxplot_region.png')
plt.close()

# 6. Heatmap: Correlation Matrix
plt.figure(figsize=(12, 10))
# Select only numeric columns for correlation analysis
numeric_df = df_clean.select_dtypes(include=['float64', 'int64'])
correlation_matrix = numeric_df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix of Happiness Factors')
plt.tight_layout()
plt.savefig('correlation_heatmap.png')
plt.close()


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=avg_happiness_region.values, y=avg_happiness_region.index, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(x='HappinessScore', y='Region', data=df_clean, palette='Set3')
