In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt

In [2]:
unemployed= pd.read_csv("Unemployment in India.csv")
unemployed.head()

Unnamed: 0,Region,Date,Frequency,Estimated Unemployment Rate (%),Estimated Employed,Estimated Labour Participation Rate (%),Area
0,Andhra Pradesh,31-05-2019,Monthly,3.65,11999139.0,43.24,Rural
1,Andhra Pradesh,30-06-2019,Monthly,3.05,11755881.0,42.05,Rural
2,Andhra Pradesh,31-07-2019,Monthly,3.75,12086707.0,43.5,Rural
3,Andhra Pradesh,31-08-2019,Monthly,3.32,12285693.0,43.97,Rural
4,Andhra Pradesh,30-09-2019,Monthly,5.17,12256762.0,44.68,Rural


In [3]:
unemployed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 7 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   Region                                    740 non-null    object 
 1    Date                                     740 non-null    object 
 2    Frequency                                740 non-null    object 
 3    Estimated Unemployment Rate (%)          740 non-null    float64
 4    Estimated Employed                       740 non-null    float64
 5    Estimated Labour Participation Rate (%)  740 non-null    float64
 6   Area                                      740 non-null    object 
dtypes: float64(3), object(4)
memory usage: 42.1+ KB


Type of 'Date' column has been set to object. Let's change it to datetime. This will be a new dataframe called year. This dataframe will have a new column 'Year' which will have the year of every row. Then data is grouped by year and 3 columns:" Estimated Unemployment Rate (%)"," Estimated Employed"," Estimated Labour Participation Rate (%)" are taken and their mean for each year is calculated for the new dataframe. This will help us to contrast unemployment rate before and during pandemic

In [4]:
year=unemployed.copy()# deep copy so changes not reflected in unemployed dataframe
year[' Date']=pd.to_datetime(year[' Date'],dayfirst=True)
year['Year']=year[' Date'].dt.year
year=year.groupby("Year")[[" Estimated Unemployment Rate (%)"," Estimated Employed"," Estimated Labour Participation Rate (%)"]].mean()
year=pd.DataFrame(year).reset_index()
year.head()
unemployed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 7 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   Region                                    740 non-null    object 
 1    Date                                     740 non-null    object 
 2    Frequency                                740 non-null    object 
 3    Estimated Unemployment Rate (%)          740 non-null    float64
 4    Estimated Employed                       740 non-null    float64
 5    Estimated Labour Participation Rate (%)  740 non-null    float64
 6   Area                                      740 non-null    object 
dtypes: float64(3), object(4)
memory usage: 42.1+ KB


In [5]:
unemployed.describe()

Unnamed: 0,Estimated Unemployment Rate (%),Estimated Employed,Estimated Labour Participation Rate (%)
count,740.0,740.0,740.0
mean,11.787946,7204460.0,42.630122
std,10.721298,8087988.0,8.111094
min,0.0,49420.0,13.33
25%,4.6575,1190404.0,38.0625
50%,8.35,4744178.0,41.16
75%,15.8875,11275490.0,45.505
max,76.74,45777510.0,72.57


In [6]:
unemployed.tail()

Unnamed: 0,Region,Date,Frequency,Estimated Unemployment Rate (%),Estimated Employed,Estimated Labour Participation Rate (%),Area
763,,,,,,,
764,,,,,,,
765,,,,,,,
766,,,,,,,
767,,,,,,,


Now we will drop all the null rows in the dataframe

In [7]:
notNullUnemployed=unemployed.dropna()
notNullUnemployed.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 740 entries, 0 to 753
Data columns (total 7 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   Region                                    740 non-null    object 
 1    Date                                     740 non-null    object 
 2    Frequency                                740 non-null    object 
 3    Estimated Unemployment Rate (%)          740 non-null    float64
 4    Estimated Employed                       740 non-null    float64
 5    Estimated Labour Participation Rate (%)  740 non-null    float64
 6   Area                                      740 non-null    object 
dtypes: float64(3), object(4)
memory usage: 46.2+ KB


In [8]:
notNullUnemployed.tail()

Unnamed: 0,Region,Date,Frequency,Estimated Unemployment Rate (%),Estimated Employed,Estimated Labour Participation Rate (%),Area
749,West Bengal,29-02-2020,Monthly,7.55,10871168.0,44.09,Urban
750,West Bengal,31-03-2020,Monthly,6.67,10806105.0,43.34,Urban
751,West Bengal,30-04-2020,Monthly,15.63,9299466.0,41.2,Urban
752,West Bengal,31-05-2020,Monthly,15.22,9240903.0,40.67,Urban
753,West Bengal,30-06-2020,Monthly,9.86,9088931.0,37.57,Urban


In [9]:
import plotly.express as px

We will use plotly to create a bar chart with Estimated Unemployment Rate(%) and Region. We use plotly for the date slider to view unemployment rate in all regions at specific dates.

In [10]:
notNullUnemployed[' Date']=notNullUnemployed[' Date'].astype("string")
fig = px.bar(notNullUnemployed,x='Region',y=' Estimated Unemployment Rate (%)', color='Region', animation_frame=str(' Date'), 
             title="Unemployment Rate per State with Date Slider",template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

We will plot graphs to contrast data of the various states. For this, we group the dataset by 'Region' column and use the means of 'Estimated Unemployment Rate (%)', 'Estimated Employed' and 'Estimated Labour Participation Rate (%)' for each state.

In [11]:
state=notNullUnemployed.groupby(["Region"])[[" Estimated Unemployment Rate (%)"," Estimated Employed"," Estimated Labour Participation Rate (%)"]].mean()
state=pd.DataFrame(state).reset_index()
state.head()


Unnamed: 0,Region,Estimated Unemployment Rate (%),Estimated Employed,Estimated Labour Participation Rate (%)
0,Andhra Pradesh,7.477143,8154093.0,39.375714
1,Assam,6.428077,5354772.0,44.868462
2,Bihar,18.918214,12366190.0,38.153929
3,Chandigarh,15.991667,316831.2,39.336667
4,Chhattisgarh,9.240357,4303499.0,42.810714


In [12]:
fig=px.bar(state,x='Region',y=' Estimated Unemployment Rate (%)',color='Region',template='plotly')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [13]:
fig=px.bar(notNullUnemployed,x=" Date",y=" Estimated Unemployment Rate (%)",animation_frame="Region",
           title="Unemployment rate over time for each state",template="plotly")
fig.show()

In [14]:
cov=px.histogram(year,x='Year',y=' Estimated Unemployment Rate (%)',color='Year',template='plotly',title="Effect of Covid19 on Unemployment")
cov.show()

In [15]:
cov=px.histogram(year,x='Year',y=' Estimated Employed',color='Year',template='plotly',title="Estimated Employed Before and During Covid19")
cov.show()

In [16]:
cov=px.histogram(year,x='Year',y=' Estimated Labour Participation Rate (%)',color='Year',template='plotly',title="Estimated Labour Participation Rate Before and During Covid19")
cov.show()