# Task - 2 Unemployment analysis with python
## Author: Nitin Pilkhwal, Data Science Intern at Oasis Infobyte

In [124]:
import pandas as pd
import numpy as np

In [125]:
df = pd.read_csv(r'D:\program\GITHUB_REPO\OIBSIP\Unemployment_Rate_upto_11_2020.csv')

In [126]:
df.head()

Unnamed: 0,Region,Date,Frequency,Estimated Unemployment Rate (%),Estimated Employed,Estimated Labour Participation Rate (%),Region.1,longitude,latitude
0,Andhra Pradesh,31-01-2020,M,5.48,16635535,41.02,South,15.9129,79.74
1,Andhra Pradesh,29-02-2020,M,5.83,16545652,40.9,South,15.9129,79.74
2,Andhra Pradesh,31-03-2020,M,5.79,15881197,39.18,South,15.9129,79.74
3,Andhra Pradesh,30-04-2020,M,20.51,11336911,33.1,South,15.9129,79.74
4,Andhra Pradesh,31-05-2020,M,17.43,12988845,36.46,South,15.9129,79.74


In [127]:
print(df.shape)

(267, 9)


In [128]:
print(df.columns)

Index(['Region', ' Date', ' Frequency', ' Estimated Unemployment Rate (%)',
       ' Estimated Employed', ' Estimated Labour Participation Rate (%)',
       'Region.1', 'longitude', 'latitude'],
      dtype='object')


In [129]:
df.isnull().sum()

Region                                      0
 Date                                       0
 Frequency                                  0
 Estimated Unemployment Rate (%)            0
 Estimated Employed                         0
 Estimated Labour Participation Rate (%)    0
Region.1                                    0
longitude                                   0
latitude                                    0
dtype: int64

In [130]:
df = df.rename(columns={df.columns[3]:'EUR',df.columns[4]:'EE',df.columns[5]:'ELPR',df.columns[6]:'Area'})

In [131]:
df.columns

Index(['Region', ' Date', ' Frequency', 'EUR', 'EE', 'ELPR', 'Area',
       'longitude', 'latitude'],
      dtype='object')

In [132]:
df['Region'].unique()

array(['Andhra Pradesh', 'Assam', 'Bihar', 'Chhattisgarh', 'Delhi', 'Goa',
       'Gujarat', 'Haryana', 'Himachal Pradesh', 'Jammu & Kashmir',
       'Jharkhand', 'Karnataka', 'Kerala', 'Madhya Pradesh',
       'Maharashtra', 'Meghalaya', 'Odisha', 'Puducherry', 'Punjab',
       'Rajasthan', 'Sikkim', 'Tamil Nadu', 'Telangana', 'Tripura',
       'Uttar Pradesh', 'Uttarakhand', 'West Bengal'], dtype=object)

In [133]:
df['Area'].unique()

array(['South', 'Northeast', 'East', 'West', 'North'], dtype=object)

In [134]:
df2 = df.groupby(['Area'])['EUR','EE','ELPR']


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [135]:
df2.head()

Unnamed: 0,EUR,EE,ELPR
0,5.48,16635535,41.02
1,5.83,16545652,40.9
2,5.79,15881197,39.18
3,20.51,11336911,33.1
4,17.43,12988845,36.46
10,4.66,13051904,52.98
11,4.41,10088268,40.77
12,4.77,11542888,46.73
13,11.06,6830817,29.55
14,9.55,11367897,48.26


In [136]:
Area = df.groupby(['Area'])[['EUR', "EE", "ELPR"]].mean()
Area = pd.DataFrame(Area).reset_index()

State = df.groupby(['Region'])[['EUR', "EE", "ELPR"]].mean()
State = pd.DataFrame(State).reset_index()


## Data Visualization

In [137]:
import plotly.express as px

In [138]:
fig1 = px.box(df,x='Region',y='EUR',color='Region')
fig2 = px.box(df,x='Region',y='EE',color='Region')
fig3 = px.box(df,x='Region',y='ELPR',color='Region')
fig1.show()
fig2.show()
fig3.show()

### Unemployment Rate

In [139]:
fig = px.bar(State,x='Region',y='EUR',color='Region',title='Unemplyment Rate - Statewise')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [140]:
fig = px.bar(Area,x='Area',y='EUR',color='Area',title='Unemplyment Rate - Areawise')
fig.update_layout(xaxis={'categoryorder':'total descending'},height=320,width=600)
fig.show()

In [141]:
fig = px.bar(State,x='Region',y='EE',color='Region',title='Estimated Employment - Statewise')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [142]:
fig = px.bar(Area,x='Area',y='EE',color='Area',title='Estimated Employment - Areawise')
fig.update_layout(xaxis={'categoryorder':'total descending'},height=320,width=600)
fig.show()

In [144]:
fig = px.bar(State,x='Region',y='ELPR',color='Region',title='Estimated labour participation rate - Statewise')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [146]:
fig = px.bar(Area,x='Area',y='ELPR',color='Area',title='Estimated labour participation rate - Areawise')
fig.update_layout(xaxis={'categoryorder':'total descending'},height=320,width=600)
fig.show()

In [145]:
Map = px.scatter_geo(df,'longitude','latitude',color='Region',size='EUR',hover_name='Region',scope='asia')
Map.update_geos(lataxis_range=[5,40], lonaxis_range=[65, 100],oceancolor="lightblue",showocean=True)
Map.update_layout(width=600,height=420)
Map.show()


## Observation
<ul>
<li> Haryana has the highest unemployment rate and Meghalaya has the least.
    <ul>
    <li> North has the highest unemployment rate and West has the least.
    </ul>
<li> Uttar Pradesh has the highest Estimated Employment and Sikkim has the least.
    <ul>
    <li> East has the highest Estimated Employment and Northeast has the least.
    </ul>
<li> Meghalaya has the highest Estimated labour participation rate and Kerla has the least.
    <ul>
    <li> NorthEast has the highest Estimated labour participation rate and North has the least.
    </ul>
</ul>

### Thank You