<a href="https://colab.research.google.com/github/Engr-Usman-Ali/CodeAlpha_Task_Unemployment_Analysis/blob/main/CodeAlpha_Task_Unemployment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [43]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
!git clone https://github.com/Engr-Usman-Ali/CodeAlpha_Task_Unemployment_Analysis.git

In [None]:
# Load the dataset
file_path = "/content/CodeAlpha_Task_Unemployment_Analysis/Unemployment in India.csv"
df = pd.read_csv(file_path)

# Cleaning the dataset
df.columns = df.columns.str.strip() # Remove spaces in column names
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) # Trim spaces in values
df.dropna(inplace=True) # Remove missing values

In [None]:

# Convert 'Date' column to datetime format
df["Date"] = pd.to_datetime(df["Date"], format="%d-%m-%Y")

# Display basic statistical summary
summary_stats = df.describe()
print("Summary Statistics:\n", summary_stats)

In [None]:
# Unique regions and areas
unique_regions = df["Region"].nunique()
unique_areas = df["Area"].unique()
print(f"\nTotal Regions: {unique_regions}")
print(f"Areas: {unique_areas}")

In [None]:
# Set plot style
sns.set(style="whitegrid")

# 1. **Unemployment Rate Trends Over Time**
plt.figure(figsize=(12, 6))
sns.lineplot(data=df, x="Date", y="Estimated Unemployment Rate (%)", hue="Area")
plt.title("Unemployment Rate Trends in India (Rural vs Urban)")
plt.xlabel("Year")
plt.ylabel("Unemployment Rate (%)")
plt.legend(title="Area")
plt.xticks(rotation=45)
plt.show()

In [None]:
# 2. **Region-wise Unemployment Rate (Top 10)**
plt.figure(figsize=(14, 6))
top_regions = df.groupby("Region")["Estimated Unemployment Rate (%)"].mean().sort_values(ascending=False).head(10)
sns.barplot(x=top_regions.index, y=top_regions.values, palette="Reds_r")
plt.title("Top 10 Regions with Highest Unemployment Rate")
plt.xlabel("Region")
plt.ylabel("Average Unemployment Rate (%)")
plt.xticks(rotation=45)
plt.show()

In [None]:
# 3. **Labour Participation Rate Distribution**
plt.figure(figsize=(10, 5))
sns.histplot(df["Estimated Labour Participation Rate (%)"], bins=20, kde=True, color="blue")
plt.title("Distribution of Labour Participation Rate in India")
plt.xlabel("Labour Participation Rate (%)")
plt.ylabel("Frequency")
plt.show()

In [None]:
# 4. **Urban vs Rural Employment Comparison**
plt.figure(figsize=(12, 6))
sns.boxplot(x="Area", y="Estimated Employed", data=df, palette="coolwarm")
plt.title("Comparison of Estimated Employed Population (Rural vs Urban)")
plt.xlabel("Area")
plt.ylabel("Estimated Employed Population")
plt.show()