In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import datetime as dt
import calendar

import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [None]:
df= pd.read_csv('Unemployment_Rate.csv')
df.head()

Unnamed: 0,Region,Date,Frequency,Estimated Unemployment Rate (%),Estimated Employed,Estimated Labour Participation Rate (%),Region.1,longitude,latitude
0,Andhra Pradesh,31-01-2020,M,5.48,16635535,41.02,South,15.9129,79.74
1,Andhra Pradesh,29-02-2020,M,5.83,16545652,40.9,South,15.9129,79.74
2,Andhra Pradesh,31-03-2020,M,5.79,15881197,39.18,South,15.9129,79.74
3,Andhra Pradesh,30-04-2020,M,20.51,11336911,33.1,South,15.9129,79.74
4,Andhra Pradesh,31-05-2020,M,17.43,12988845,36.46,South,15.9129,79.74


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 267 entries, 0 to 266
Data columns (total 9 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   Region                                    267 non-null    object 
 1    Date                                     267 non-null    object 
 2    Frequency                                267 non-null    object 
 3    Estimated Unemployment Rate (%)          267 non-null    float64
 4    Estimated Employed                       267 non-null    int64  
 5    Estimated Labour Participation Rate (%)  267 non-null    float64
 6   Region.1                                  267 non-null    object 
 7   longitude                                 267 non-null    float64
 8   latitude                                  267 non-null    float64
dtypes: float64(4), int64(1), object(4)
memory usage: 18.9+ KB


In [None]:
df.isnull().sum()

Region                                      0
 Date                                       0
 Frequency                                  0
 Estimated Unemployment Rate (%)            0
 Estimated Employed                         0
 Estimated Labour Participation Rate (%)    0
Region.1                                    0
longitude                                   0
latitude                                    0
dtype: int64

In [None]:
df.columns

Index(['Region', ' Date', ' Frequency', ' Estimated Unemployment Rate (%)',
       ' Estimated Employed', ' Estimated Labour Participation Rate (%)',
       'Region.1', 'longitude', 'latitude'],
      dtype='object')

In [None]:
df.columns = ['state','date','frequency','estimated unemployment rate','estimated employed','estimated labour participation rate','region','longitude','latitude']
df.head()

Unnamed: 0,state,date,frequency,estimated unemployment rate,estimated employed,estimated labour participation rate,region,longitude,latitude
0,Andhra Pradesh,31-01-2020,M,5.48,16635535,41.02,South,15.9129,79.74
1,Andhra Pradesh,29-02-2020,M,5.83,16545652,40.9,South,15.9129,79.74
2,Andhra Pradesh,31-03-2020,M,5.79,15881197,39.18,South,15.9129,79.74
3,Andhra Pradesh,30-04-2020,M,20.51,11336911,33.1,South,15.9129,79.74
4,Andhra Pradesh,31-05-2020,M,17.43,12988845,36.46,South,15.9129,79.74


Type casting Date into datetime type
Converting Frequency and region columns as categorical

In [None]:
df['date'] = pd.to_datetime(df['date'], dayfirst=True)


df['frequency']= df['frequency'].astype('category')
df['region']= df['region'].astype('category')

Extracting Month from Date

In [None]:
df['Month'] = df['date'].dt.month

# Converting 'Month' to integer format
df['Month'] = df['Month'].apply(lambda x: int(x))

# Mapping integer month values to abbreviated month names
df['Month_name'] = df['Month'].apply(lambda x: calendar.month_abbr[x])

In [None]:
df.head()

Unnamed: 0,state,date,frequency,estimated unemployment rate,estimated employed,estimated labour participation rate,region,longitude,latitude,Month,Month_name
0,Andhra Pradesh,2020-01-31,M,5.48,16635535,41.02,South,15.9129,79.74,1,Jan
1,Andhra Pradesh,2020-02-29,M,5.83,16545652,40.9,South,15.9129,79.74,2,Feb
2,Andhra Pradesh,2020-03-31,M,5.79,15881197,39.18,South,15.9129,79.74,3,Mar
3,Andhra Pradesh,2020-04-30,M,20.51,11336911,33.1,South,15.9129,79.74,4,Apr
4,Andhra Pradesh,2020-05-31,M,17.43,12988845,36.46,South,15.9129,79.74,5,May


In [None]:
df_stats = df[['estimated unemployment rate', 'estimated employed','estimated labour participation rate']]
round(df_stats.describe().T, 2)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
estimated unemployment rate,267.0,12.24,10.8,0.5,4.84,9.65,16.76,75.85
estimated employed,267.0,13962105.72,13366318.36,117542.0,2838930.5,9732417.0,21878686.0,59433759.0
estimated labour participation rate,267.0,41.68,7.85,16.77,37.26,40.39,44.06,69.69


Monthwise Analysis of Unemployment rate and estimated labor participation

In [None]:
data = df.groupby(['Month_name'])[['estimated unemployment rate','estimated employed','estimated labour participation rate']].mean().reset_index()
data

Unnamed: 0,Month_name,estimated unemployment rate,estimated employed,estimated labour participation rate
0,Apr,22.236154,10570200.0,35.297308
1,Aug,10.313333,14429040.0,42.390741
2,Feb,9.266154,15488270.0,44.180769
3,Jan,9.196538,15637200.0,44.626538
4,Jul,9.834444,14418020.0,42.274815
5,Jun,10.911111,13857390.0,41.203333
6,Mar,10.782593,14538660.0,43.746667
7,May,23.244444,11507390.0,39.647778
8,Oct,8.026296,14579980.0,41.438519
9,Sep,8.705926,14587840.0,41.974444


In [None]:
fig = px.bar(data,x='Month_name',y='estimated employed',color='Month_name',
          category_orders={'Month_name':['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct']},
            title='Estimated employed people from Jan 2020 to Oct 2020')
fig.show()

It is visible from above bar plot that employment rate decreased in the months of April-June, when the cvoid-19 was at it peak

In [None]:
fig = go.Figure()

fig.add_trace(go.Bar(x = data['Month_name'],y = data['estimated unemployment rate'],name = 'Unemployment Rate'))
fig.add_trace(go.Bar(x = data['Month_name'],y = data['estimated labour participation rate'],name = 'Labour Participation Rate'))

fig.update_layout(title = 'Unemployment Rate and Labour Participation',
                     xaxis = {'categoryorder':'array','categoryarray':['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct']}      )
fig.show()

It can be analysed from above plot that unemployment rate the highest in the months of April-June, when the cvoid-19 was at it peak

**Analysis of Unemployment rate In various states of India**

In [None]:
state =  df.groupby(['state'])[['estimated unemployment rate','estimated employed','estimated labour participation rate']].mean()
state = pd.DataFrame(state).reset_index()
state

Unnamed: 0,state,estimated unemployment rate,estimated employed,estimated labour participation rate
0,Andhra Pradesh,8.664,15425480.0,38.962
1,Assam,4.856,10810280.0,43.498
2,Bihar,19.471,23606830.0,37.173
3,Chhattisgarh,7.819,8421349.0,41.161
4,Delhi,18.414,4632822.0,35.857
5,Goa,12.167,442374.8,39.242
6,Gujarat,6.376,22730750.0,45.49
7,Haryana,27.477,6844059.0,42.1
8,Himachal Pradesh,16.065,2033885.0,40.252
9,Jammu & Kashmir,16.477778,3310032.0,37.894444


In [None]:
fig = px.box(data_frame=df,x='state',y='estimated unemployment rate',color='state',title='State wise Unemployment rate')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [None]:
fig = px.bar(state,x='state',y='estimated unemployment rate',color='state',title='Average unemployment rate (State)')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

The states of Haryana, Trlpura Jharkhand saw the highest average unemployment rate while states of Gurjrat, Assam and Meghalaya has minimum unployment rate during COVID-19

In [None]:
fig = px.bar(df,x='state',y='estimated unemployment rate',animation_frame='Month_name',color='state',
            title='Unemployment rate from Jan 2020 to Oct 2020(State)')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] =2000

fig.show()

In [None]:
fig=px.scatter_geo(df,'longitude','latitude',color='state',
                  hover_name='state',size='estimated unemployment rate',
                  animation_frame='Month_name',scope='asia',title='Impact of lockdown on employment in India')

fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] =2000
fig.update_geos(lataxis_range=[5,40],lonaxis_range=[65,100],oceancolor='lightblue',
               showocean=True)

fig.show()

**Region wise Unemployment rate in India during COVID-19**

In [None]:
region = df.groupby(['region'])[['estimated unemployment rate', 'estimated employed','estimated labour participation rate']].mean().reset_index()
round(region, 2)

Unnamed: 0,region,estimated unemployment rate,estimated employed,estimated labour participation rate
0,East,13.92,19602366.9,40.11
1,North,15.89,13072487.92,38.7
2,Northeast,10.95,3617105.53,52.06
3,South,10.45,14040589.33,40.44
4,West,8.24,18623512.72,41.26


In [None]:
fig= px.scatter_matrix(df,dimensions=['estimated unemployment rate','estimated employed','estimated labour participation rate'],color='region')
fig.show()

In [None]:
fig = px.bar(region,x='region',y='estimated unemployment rate',color='region',title='Average unemployment rate(region)')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [None]:
fig = px.bar(df,x='region',y='estimated unemployment rate',animation_frame='Month_name',color='state',
            title='Unemployment rate from Jan 2020 to Oct 2020')

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] =2000

fig.show()

In [None]:
unemployment =df.groupby(['region','state'])['estimated unemployment rate'].mean().reset_index()
unemployment.head()

Unnamed: 0,region,state,estimated unemployment rate
0,East,Andhra Pradesh,
1,East,Assam,
2,East,Bihar,19.471
3,East,Chhattisgarh,
4,East,Delhi,


In [None]:
fig = px.sunburst(unemployment,path=['region','state'],values='estimated unemployment rate',
                 title ='Unemployment rate in state and region',height=600)
fig.show()

In [None]:
# data representation before and after lockdown

before_lockdown = df[(df['Month']>=1) &(df['Month'] <4)]
after_lockdown = df[(df['Month'] >=4) & (df['Month'] <=6)]

In [None]:
af_lockdown = after_lockdown.groupby('state')['estimated unemployment rate'].mean().reset_index()

lockdown = before_lockdown.groupby('state')['estimated unemployment rate'].mean().reset_index()
lockdown['unemployment rate before lockdown'] = af_lockdown['estimated unemployment rate']

lockdown.columns = ['state','unemployment rate before lockdown','unemployment rate after lockdown']
lockdown.head()

Unnamed: 0,state,unemployment rate before lockdown,unemployment rate after lockdown
0,Andhra Pradesh,5.7,13.75
1,Assam,4.613333,7.07
2,Bihar,12.11,36.806667
3,Chhattisgarh,8.523333,9.38
4,Delhi,18.036667,25.713333


In [None]:
lockdown['rate change in unemployment'] =round(lockdown['unemployment rate before lockdown']-lockdown['unemployment rate before lockdown']
                                              /lockdown['unemployment rate after lockdown'],2)

In [None]:
fig = px.bar(lockdown,x='state',y='rate change in unemployment',color='rate change in unemployment',
            title='Percentage change in Unemployment rate in each state after lockdown',template='ggplot2')
fig.update_layout(xaxis={'categoryorder':'total ascending'})
fig.show()