**Unemployment Analysis in India**

**Uploading the Dataset File**

In [1]:
from google.colab import files
u=files.upload()

Saving archive (6).zip to archive (6).zip


In [2]:
#Extracting the zip file
import zipfile
zip=zipfile.ZipFile("/content/archive (6).zip","r")
zip.extractall('/content')
zip.close()

**Importing Necessary Requiured Libraries**

In [83]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [84]:
data = pd.read_csv('/content/Unemployment_Rate_upto_11_2020.csv')
data

Unnamed: 0,Region,Date,Frequency,Estimated Unemployment Rate (%),Estimated Employed,Estimated Labour Participation Rate (%),Region.1,longitude,latitude
0,Andhra Pradesh,31-01-2020,M,5.48,16635535,41.02,South,15.9129,79.740
1,Andhra Pradesh,29-02-2020,M,5.83,16545652,40.90,South,15.9129,79.740
2,Andhra Pradesh,31-03-2020,M,5.79,15881197,39.18,South,15.9129,79.740
3,Andhra Pradesh,30-04-2020,M,20.51,11336911,33.10,South,15.9129,79.740
4,Andhra Pradesh,31-05-2020,M,17.43,12988845,36.46,South,15.9129,79.740
...,...,...,...,...,...,...,...,...,...
262,West Bengal,30-06-2020,M,7.29,30726310,40.39,East,22.9868,87.855
263,West Bengal,31-07-2020,M,6.83,35372506,46.17,East,22.9868,87.855
264,West Bengal,31-08-2020,M,14.87,33298644,47.48,East,22.9868,87.855
265,West Bengal,30-09-2020,M,9.35,35707239,47.73,East,22.9868,87.855


In [85]:
#Displaying column names
data.columns.unique()

Index(['Region', ' Date', ' Frequency', ' Estimated Unemployment Rate (%)',
       ' Estimated Employed', ' Estimated Labour Participation Rate (%)',
       'Region.1', 'longitude', 'latitude'],
      dtype='object')

In [86]:
# Data Cleaning and Transformation
data[' Date'] = pd.to_datetime(data[' Date'])
data['Year'] = data[' Date'].dt.year

**Average Estimated Employed in each Region**

In [87]:
# Calculate average estimated employed by Region
avg_employed_by_region = data.groupby('Region')[' Estimated Employed'].mean().reset_index()

# Create a bar chart using plotly express
fig = px.bar(avg_employed_by_region, x='Region', y=' Estimated Employed',
             title='Average Estimated Employed by Region(States)',
             labels={' Estimated Employed': ' Estimated Employed'},
             color=' Estimated Employed', color_continuous_scale='Viridis')

# Improve the layout
fig.update_layout(showlegend=False, width=1100, height=600)

# Show the plot
fig.show()

**Average Estimated Unemployment Rate by Region (States)**

In [88]:
# Calculate average unemployment rate by Region
avg_unemployment_by_region = data.groupby('Region')[' Estimated Unemployment Rate (%)'].mean().reset_index()

# Create a bar chart using plotly express
fig = px.bar(avg_unemployment_by_region, x='Region', y=' Estimated Unemployment Rate (%)',
             title='Average Estimated Unemployment Rate by Region (States)',
             labels={' Estimated Unemployment Rate (%)': ' Estimated Unemployment Rate (%)'},
             color=' Estimated Unemployment Rate (%)', color_continuous_scale='Viridis',
             category_orders={'Region': avg_unemployment_by_region.sort_values(by=' Estimated Unemployment Rate (%)')['Region']})

# Improve the layout
fig.update_layout(showlegend=False, width=1100, height=600)

# Show the plot
fig.show()

**From above Bar Chart we can Visualize that Haryana and Tripura have Large Estimated Unemployment Rate**

**Pie Chart for Average Unemployment Rates by Zone**

In [89]:
# Calculate average unemployment rate by Region.1 (Directions)
avg_unemployment_by_direction = data.groupby('Region.1')[' Estimated Unemployment Rate (%)'].mean().reset_index()

# Create a pie chart using plotly express
fig = px.pie(avg_unemployment_by_direction, values=' Estimated Unemployment Rate (%)', names='Region.1',
             title='Average Unemployment Rates by Region.1 (Zones)',
             hover_data=[' Estimated Unemployment Rate (%)', 'Region.1'],
             labels={' Estimated Unemployment Rate (%)': ' Unemployment Rate (%)'})

# Add the unemployment rate within each slice
fig.update_traces(textinfo='percent+label', pull=[0.1, 0.1, 0.1, 0.1],
                  marker=dict(line=dict(color='#000000', width=1)))

# Improve the layout
fig.update_layout(showlegend=False, width=600, height=600)

# Show the plot
fig.show()

**From above Pie chart we can conclude North Zone has Highest Unemployment Rate**

**Pie Chart for Unemployment Rate in Region(States) and Zones**

In [90]:
unemployment =data.groupby(['Region.1','Region'])[' Estimated Unemployment Rate (%)'].mean().reset_index()
unemployment

Unnamed: 0,Region.1,Region,Estimated Unemployment Rate (%)
0,East,Bihar,19.471
1,East,Jharkhand,19.539
2,East,Odisha,6.462
3,East,West Bengal,10.192
4,North,Delhi,18.414
5,North,Haryana,27.477
6,North,Himachal Pradesh,16.065
7,North,Jammu & Kashmir,16.477778
8,North,Punjab,11.981
9,North,Rajasthan,15.868


In [91]:
fig = px.sunburst(unemployment,path=['Region.1','Region'],values=' Estimated Unemployment Rate (%)',
                 title ='Unemployment rate in state and region',height=600)
fig.show()

**Average Estimated Unemployment Rate Before and After Lockdown by Region(States)**

In [92]:
# Define the period before lockdown and after lockdown
before_lockdown_start = pd.to_datetime('2020-03-01')
lockdown_start = pd.to_datetime('2020-03-25')  # Assuming lockdown started on March 25, 2020

# Filter data for the periods
before_lockdown_data = data[data[' Date'] < lockdown_start]
after_lockdown_data = data[data[' Date'] >= lockdown_start]

# Calculate average unemployment rate for each region and period
avg_unemployment_before_lockdown = before_lockdown_data.groupby('Region')[' Estimated Unemployment Rate (%)'].mean().reset_index()
avg_unemployment_after_lockdown = after_lockdown_data.groupby('Region')[' Estimated Unemployment Rate (%)'].mean().reset_index()

# Merge dataframes for before and after lockdown
merged_data = pd.merge(avg_unemployment_before_lockdown, avg_unemployment_after_lockdown, on='Region', suffixes=('_before', '_after'))

# Create a bar chart using plotly express
fig = px.bar(merged_data, x='Region', y=[' Estimated Unemployment Rate (%)_before', ' Estimated Unemployment Rate (%)_after'],
             title='Average Estimated Unemployment Rate Before and After Lockdown by Region(States)',
             labels={'value': 'Estimated Unemployment Rate (%)', 'variable': 'Lockdown Period'},
             category_orders={'Region': merged_data.sort_values(by=' Estimated Unemployment Rate (%)_before')['Region']},
             color_discrete_sequence=['#636EFA', '#EF553B'],  # Color sequence for before and after lockdown
             height=600)

# Improve the layout
fig.update_layout(barmode='group', showlegend=True, legend_title_text='Lockdown Period', width=1200)

# Show the plot
fig.show()

**Hence we can conclude Haryana and Puducherry faced Large estimated unemployment rate after lockdown as compared to before lockdown**