In [63]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import datetime as dt
import calendar

In [64]:
df= pd.read_csv("Unemployment_Rate_upto_11_2020.csv")
df

Unnamed: 0,Region,Date,Frequency,Estimated Unemployment Rate (%),Estimated Employed,Estimated Labour Participation Rate (%),Region.1,longitude,latitude
0,Andhra Pradesh,31-01-2020,M,5.48,16635535,41.02,South,15.9129,79.740
1,Andhra Pradesh,29-02-2020,M,5.83,16545652,40.90,South,15.9129,79.740
2,Andhra Pradesh,31-03-2020,M,5.79,15881197,39.18,South,15.9129,79.740
3,Andhra Pradesh,30-04-2020,M,20.51,11336911,33.10,South,15.9129,79.740
4,Andhra Pradesh,31-05-2020,M,17.43,12988845,36.46,South,15.9129,79.740
...,...,...,...,...,...,...,...,...,...
262,West Bengal,30-06-2020,M,7.29,30726310,40.39,East,22.9868,87.855
263,West Bengal,31-07-2020,M,6.83,35372506,46.17,East,22.9868,87.855
264,West Bengal,31-08-2020,M,14.87,33298644,47.48,East,22.9868,87.855
265,West Bengal,30-09-2020,M,9.35,35707239,47.73,East,22.9868,87.855


In [65]:
df.shape

(267, 9)

In [66]:
df.columns=["state","date","frequency","estimated unemployment rate","estimated employed","estimated labour participation rate","region","longitude","latutude"]
df.head()

Unnamed: 0,state,date,frequency,estimated unemployment rate,estimated employed,estimated labour participation rate,region,longitude,latutude
0,Andhra Pradesh,31-01-2020,M,5.48,16635535,41.02,South,15.9129,79.74
1,Andhra Pradesh,29-02-2020,M,5.83,16545652,40.9,South,15.9129,79.74
2,Andhra Pradesh,31-03-2020,M,5.79,15881197,39.18,South,15.9129,79.74
3,Andhra Pradesh,30-04-2020,M,20.51,11336911,33.1,South,15.9129,79.74
4,Andhra Pradesh,31-05-2020,M,17.43,12988845,36.46,South,15.9129,79.74


In [67]:
round(df.describe().T)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
estimated unemployment rate,267.0,12.0,11.0,0.0,5.0,10.0,17.0,76.0
estimated employed,267.0,13962106.0,13366318.0,117542.0,2838930.0,9732417.0,21878686.0,59433759.0
estimated labour participation rate,267.0,42.0,8.0,17.0,37.0,40.0,44.0,70.0
longitude,267.0,23.0,6.0,11.0,18.0,24.0,27.0,34.0
latutude,267.0,81.0,6.0,71.0,76.0,79.0,85.0,93.0


In [68]:
#create a new column for month
df['date'] = pd.to_datetime(df['date'], dayfirst = True)

In [69]:
df['month_int'] = df['date'].dt.month
df['month'] = df['month_int'].apply(lambda x: calendar.month_abbr[x])
df.head()

Unnamed: 0,state,date,frequency,estimated unemployment rate,estimated employed,estimated labour participation rate,region,longitude,latutude,month_int,month
0,Andhra Pradesh,2020-01-31,M,5.48,16635535,41.02,South,15.9129,79.74,1,Jan
1,Andhra Pradesh,2020-02-29,M,5.83,16545652,40.9,South,15.9129,79.74,2,Feb
2,Andhra Pradesh,2020-03-31,M,5.79,15881197,39.18,South,15.9129,79.74,3,Mar
3,Andhra Pradesh,2020-04-30,M,20.51,11336911,33.1,South,15.9129,79.74,4,Apr
4,Andhra Pradesh,2020-05-31,M,17.43,12988845,36.46,South,15.9129,79.74,5,May


In [70]:
#Numeric data groped by months
IND = df.groupby(["month"])[['estimated unemployment rate', "estimated employed","estimated labour participation rate"]].mean()
IND = pd.DataFrame(IND).reset_index()

In [71]:
month = IND.month
unemployment_rate = IND["estimated unemployment rate"]
labour_participation_rate = IND["estimated labour participation rate"]
fig = go.Figure()
fig.add_trace(go.Bar(x=month, y=unemployment_rate, name="Unemployment Rate"))
fig.add_trace(go.Bar(x=month, y=labour_participation_rate, name="Labour Participation Rate"))
fig.update_layout(title="Unemployment Rate and Labour Participation Rate",
                  xaxis={"categoryorder":"array", "categoryarray":["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct"]})
fig.show()

In [72]:
fig = px.bar(IND, x='month',y='estimated employed',color='month',
             category_orders={"month": ["Jan","feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct"]},
              title='estimated employed people from Jan 2020 to Oct 2020')
fig.show()

In [73]:
#statewise analysis
state = df.groupby(["state"])[["estimated unemployment rate", "estimated employed", "estimated labour participation rate"]].mean()
state = pd.DataFrame(state).reset_index()

In [74]:
#Box plot
fig = px.box(df,x='state',y='estimated unemployment rate', color = 'state',title = 'Unemployment rate')
fig.update_layout(xaxis = {'categoryorder':'total descending'})
fig.show()

In [75]:
#average unemployment rate bar plot
fig = px.bar(state, x='state', y="estimated unemployment rate", color="state", title="Average unemployment Rate(state)")
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [76]:
#Bar plot unemployment rate(monthly)
fig = px.bar(df, x='state', y='estimated unemployment rate', animation_frame='month', color='state', title='Unemployment rate from Jan 2020 to Oct 2020 (State)')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"]=2000
fig.show()

In [77]:
#Regional Analysis
df.region.unique()

array(['South', 'Northeast', 'East', 'West', 'North'], dtype=object)

In [78]:
#numeric data griped by region
region = df.groupby(["region"])[['estimated unemployment rate', "estimated employed", "estimated labour participation rate"]].mean()
region = pd.DataFrame(region).reset_index()

In [79]:
#scatter plot
fig = px.scatter_matrix(df, dimensions=['estimated unemployment rate', "estimated employed", "estimated labour participation rate"], color = 'region')
fig.show()

In [80]:
#Average Unemployment Rate
fig = px.bar(region, x="region", y="estimated unemployment rate", color="region", title="Average Unemployment Rate(Region)")
fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.show()

In [81]:
fig = px.bar(df, x="region", y="estimated unemployment rate", animation_frame = 'month', color="state", title="Unemployment rate from Jan 2020 to Oct 2020")
fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.show()

In [82]:
unemployment = df.groupby(['region','state'])['estimated unemployment rate'].mean().reset_index()
unemployment.head()

Unnamed: 0,region,state,estimated unemployment rate
0,East,Bihar,19.471
1,East,Jharkhand,19.539
2,East,Odisha,6.462
3,East,West Bengal,10.192
4,North,Delhi,18.414


In [83]:
fig = px.sunburst(unemployment, path=['region','state'],values='estimated unemployment rate',title='Unemployment rate in every state and region', height=650)
fig.show()

In [84]:
#Unemployment rate before and after lockdown
before_lockdown = df[(df['month_int'] >=1) & (df['month_int'] < 4)]
after_lockdown = df[(df['month_int'] >=4) & (df['month_int'] <= 6)]

In [85]:
af_lockdown = after_lockdown.groupby('state')['estimated unemployment rate'].mean().reset_index()
lockdown = before_lockdown.groupby('state')['estimated unemployment rate'].mean().reset_index()
lockdown['unemployment rate after lockdown']=af_lockdown['estimated unemployment rate']
lockdown.columns = ['state','unemployment rate before lockdown','unemployment rate after lockdown']
lockdown.head()

Unnamed: 0,state,unemployment rate before lockdown,unemployment rate after lockdown
0,Andhra Pradesh,5.7,13.75
1,Assam,4.613333,7.07
2,Bihar,12.11,36.806667
3,Chhattisgarh,8.523333,9.38
4,Delhi,18.036667,25.713333


In [86]:
#Unemployment rate change after lockdown
lockdown['rate change in unemployment'] = round(lockdown['unemployment rate after lockdown'] - lockdown['unemployment rate before lockdown']/lockdown['unemployment rate before lockdown'],2)

In [87]:
fig=px.bar(lockdown, x='state', y='rate change in unemployment',color='rate change in unemployment',title='Percentage change in unemployment rate in each state after lockdown',template="ggplot2")
fig.update_layout(xaxis={'categoryorder':'total ascending'})
fig.show()