# 📊 World Population Data Visualization Tutorial

**Complete analysis using Matplotlib & Pandas**

This notebook demonstrates comprehensive data visualization techniques following professional data science practices.

## 1. Import Libraries and Setup

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set style for better looking plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Configure matplotlib for notebook
%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 11

print("🌍 World Population Data Visualization Tutorial")
print("Libraries loaded successfully!")

## 2. Data Generation and Exploration

In [None]:
# [Insert the data generation function from the main script here]
# This cell would contain the create_world_population_data() function

# Generate the dataset
df = create_world_population_data()
print(f"Dataset created: {len(df)} records across {df['Country'].nunique()} countries")
print(f"Time period: {df['Year'].min()} - {df['Year'].max()}")

# Display first few rows
df.head()

## 3. Line Graph - Population Growth Over Time

In [None]:
# [Line graph code from main script]
print("📈 Creating line graph for population growth trends...")

# Get top 5 most populous countries and plot their growth
top_countries = df[df['Year'] == 2022].nlargest(5, 'Population')['Country'].tolist()

plt.figure(figsize=(14, 8))
for country in top_countries:
    country_data = df[df['Country'] == country].sort_values('Year')
    plt.plot(country_data['Year'], country_data['Population'], 
             marker='o', linewidth=2.5, markersize=4, label=country)

plt.title('Population Growth Over Time - Top 5 Countries', fontsize=16, fontweight='bold')
plt.xlabel('Year', fontsize=12, fontweight='bold')
plt.ylabel('Population (Millions)', fontsize=12, fontweight='bold')
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.show()

## 4. Histogram - Population Distribution

In [None]:
# [Histogram code from main script]
print("📊 Creating histogram for population distribution...")

pop_2022 = df[df['Year'] == 2022]['Population'].values

plt.figure(figsize=(12, 6))
plt.hist(pop_2022, bins=15, color='skyblue', alpha=0.7, edgecolor='black')
plt.title('Distribution of Country Populations (2022)', fontsize=16, fontweight='bold')
plt.xlabel('Population (Millions)', fontsize=12, fontweight='bold')
plt.ylabel('Number of Countries', fontsize=12, fontweight='bold')
plt.grid(True, alpha=0.3, axis='y')
plt.show()

print(f"Mean population: {np.mean(pop_2022):.1f} million")
print(f"Median population: {np.median(pop_2022):.1f} million")

## 5. Pie Charts - Categorical Analysis

In [None]:
# [Pie chart code from main script]
print("🥧 Creating pie charts for categorical analysis...")

# Pie Chart 1: Population by Continent
continent_pop = df[df['Year'] == 2022].groupby('Continent')['Population'].sum().sort_values(ascending=False)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# First pie chart
colors1 = ['#FF9999', '#66B2FF', '#99FF99', '#FFCC99', '#FF99CC']
ax1.pie(continent_pop.values, labels=continent_pop.index, autopct='%1.1f%%', 
        startangle=90, colors=colors1)
ax1.set_title('Population by Continent (2022)', fontweight='bold')

# Second pie chart (growth rate categories would go here)
# [Growth rate categorization code]

plt.tight_layout()
plt.show()

## 6. Box Plot - Statistical Comparison

In [None]:
# [Box plot code from main script]
print("📦 Creating box plot for statistical comparison...")

# Calculate growth rates and create box plot
# [Growth rate calculation and box plot visualization code]

## 7. Advanced Dashboard

In [None]:
# [Advanced visualization code from main script]
print("🎯 Creating advanced multi-panel dashboard...")

# Create 2x2 subplot dashboard
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))

# [Each subplot code would go here]

plt.tight_layout()
plt.show()

## 8. Summary and Insights

In [None]:
# Final summary statistics and insights
print("📋 FINAL INSIGHTS")
print("=" * 30)

total_pop_2022 = df[df['Year'] == 2022]['Population'].sum()
total_pop_1960 = df[df['Year'] == 1960]['Population'].sum()
overall_growth = ((total_pop_2022 / total_pop_1960) ** (1/62) - 1) * 100

print(f"Total population growth (1960-2022): {overall_growth:.2f}% annually")
print(f"Current total population: {total_pop_2022:.0f} million")

# Top countries summary
top_5 = df[df['Year'] == 2022].nlargest(5, 'Population')
print("\nTop 5 Most Populous Countries:")
for i, (_, row) in enumerate(top_5.iterrows(), 1):
    print(f"{i}. {row['Country']}: {row['Population']:.0f}M ({row['Continent']})")