# UNEMPLOYMENT ANALYSIS WITH PYTHON

# Project by:- Mayur Dnyaneshwar Shinde

### Unemployment is measured by the unemployment rate which is the number of people who are unemployed as a percentage of the total labour force. We have seen a sharp increase in the unemployment rate during Covid-19, so analyzing the unemployment rate can be a good data science project

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
df = pd.read_csv('Unemployment_Rate_upto_11_2020.csv')

In [None]:
df

In [None]:
print(df.shape)

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
# Checking for null values
df.isna().sum()

In [None]:
df.describe()

In [None]:
df.head()

In [None]:
df.tail()

### column names int dataset are not correct for better visualisation and understanding.
### So, for a better understanding of this data, we will rename the columns

In [None]:
df.columns= ["States","Date","Frequency",
               "Estimated Unemployment Rate",
               "Estimated Employed",
               "Estimated Labour Participation Rate",
               "Region","longitude","latitude"]

In [None]:
df.head(10)

In [None]:
df['States'].value_counts()

# Unemployment Rate Analysis

In [None]:
# Setting up the graph, plots font 
plt.rc('font', **{'size':16, 'family':'constantia'})

In [None]:
sns.pairplot(df,x_vars=df[["Estimated Unemployment Rate",
               "Estimated Employed",
               "Estimated Labour Participation Rate"]],
               y_vars=df[["Estimated Unemployment Rate",
               "Estimated Employed",
               "Estimated Labour Participation Rate"]],size=4, palette='Dark2', hue="Region")
plt.show()

In [None]:
state_list = df['States'].unique()

region_list = df['Region'].unique()
region_list.sort()

regions = df.groupby("Region")
state_list_by_region = regions["States"].unique()


## <i>The unemployment rate</i> according to different <i>Regions of India</i>

In [None]:
Overall_unemp_rate = df["Estimated Unemployment Rate"].mean()
print(f"Estimated Unemloyement Rate of the country :- {Overall_unemp_rate}")

region_unemp_rate = df.groupby("Region")["Estimated Unemployment Rate"]
region_mean_unemp_rate = region_unemp_rate.mean()
region_mean_unemp_rate


In [None]:
color_list = []
for region in region_list:
    if region_mean_unemp_rate[region] < Overall_unemp_rate: 
        color_list.append("green")
    else: color_list.append("red")

plt.figure(figsize=(16,9))
plt.bar(region_list, region_mean_unemp_rate, color = color_list)
plt.xlabel("Regions in india", fontdict={"size":16})
plt.ylabel("Estimated Unemployment Rate")
plt.show()


In [None]:
plt.figure(figsize=(12, 10))
plt.title("Indian Unemployment")
sns.histplot(x="Estimated Unemployment Rate", hue="Region", data=df)
plt.show()

# The <i>Unemployment rate</i> of each <i>Indian state by region</i>.

In [None]:
unemploment = df[["States", "Region", "Estimated Unemployment Rate"]]
figure = px.sunburst(unemploment, path=["Region", "States"], 
                     values="Estimated Unemployment Rate", 
                     width=700, height=700, color_continuous_scale="RdY1Gn", 
                     title="Unemployment Rate in India")
figure.show()

# The <i>Unemployment rate</i> of each <i>Indian state</i>.

In [None]:
Overall_unemp_rate = df["Estimated Unemployment Rate"].mean()
print(f"Estimated Unemloyement Rate of the country :- {Overall_unemp_rate}")

state_unemp_rate = df.groupby("States")["Estimated Unemployment Rate"]
state_mean_unemp_rate = state_unemp_rate.mean()
state_mean_unemp_rate

In [None]:
color_list = []
for states in state_list:
    if state_mean_unemp_rate[states] < Overall_unemp_rate: 
        color_list.append("green")
    else: color_list.append("red")

plt.figure(figsize=(16,9))
plt.bar(state_list, state_mean_unemp_rate, color=color_list)
plt.xlabel("States in india", fontdict={"size":20})
plt.xticks(rotation=90)
plt.ylabel("Estimated Unemployment Rate", fontdict={"size":20})
plt.show()


# The <i>Estimated Employment</i> in each <i>Region of India</i>.m

In [None]:

plt.figure(figsize=(8,6))
plt.scatter(df['Region'], df['Estimated Employed'], alpha=0.2,s=200, c='g')
plt.xlabel("Regions in India",fontsize=20)
plt.ylabel("Employment (in *100k)",fontsize=20)


In [None]:
plt.figure(figsize=(16,12))
plt.title("Indian Employment")
sns.histplot(x="Estimated Employed", hue="Region", data=df)
plt.show()