# Exploratory Data Analysis (EDA): South Asia Population & Migration Dataset (1960-2023)
This notebook performs basic exploratory data analysis (EDA) on the South Asia Population & Migration Dataset, which includes data from Afghanistan, Bangladesh, India, Pakistan, and Sri Lanka.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
file_path = '/kaggle/input/population-and-net-migration-dataset-world-bank/pop_and_net_migration.csv'
data = pd.read_csv(file_path)

# Display the first few rows
data.head()

## Dataset Overview

In [None]:
# Check dataset shape
print(f"Dataset contains {data.shape[0]} rows and {data.shape[1]} columns.")

# Check column data types and missing values
data.info()

# Check for missing values
print("Missing values:")
print(data.isnull().sum())

# Summary statistics
data.describe()

## Population Growth Over Time for Afghanistan

In [None]:
# Filter data for Afghanistan
afghanistan_data = data[data['Country'] == 'Afghanistan']

# Plot population over time
plt.figure(figsize=(10, 6))
sns.lineplot(x='Year', y='total_population', data=afghanistan_data, marker='o')
plt.title('Population Growth in Afghanistan (1960-2023)')
plt.xlabel('Year')
plt.ylabel('Total Population')
plt.grid()
plt.show()

## Population Comparison Across Countries in 2023

In [None]:
# Filter data for 2023
data_2023 = data[data['Year'] == 2023]

# Bar plot for population comparison
plt.figure(figsize=(10, 6))
sns.barplot(x='Country', y='total_population', data=data_2023)
plt.title('Population Comparison Across Countries (2023)')
plt.ylabel('Total Population')
plt.xlabel('Country')
plt.xticks(rotation=45)
plt.show()

## Net Migration Trends Over Time

In [None]:
# Line plot for net migration trends for all countries
plt.figure(figsize=(12, 6))
sns.lineplot(x='Year', y='net_migration', hue='Country', data=data, marker='o')
plt.title('Net Migration Trends (1960-2023)')
plt.xlabel('Year')
plt.ylabel('Net Migration')
plt.legend(title='Country')
plt.grid()
plt.show()

## Population Growth Rates

In [None]:
# Calculate year-on-year population growth percentage
data['population_growth'] = data.groupby('Country')['total_population'].pct_change() * 100

# Display growth rates
data[['Country', 'Year', 'population_growth']].head()

## Correlation Between Total Population and Net Migration

In [None]:
# Heatmap for correlation
plt.figure(figsize=(8, 6))
sns.heatmap(data[['total_population', 'net_migration']].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Between Total Population and Net Migration')
plt.show()

## Save Processed Data

In [None]:
# Save the processed dataset
processed_file_path = '/kaggle/working/processed_data.csv'
data.to_csv(processed_file_path, index=False)
print(f"Processed data saved at {processed_file_path}")