In [1]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'unemployment-in-india:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F752131%2F1621146%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240824%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240824T040828Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D52a0a08141f702b513089290a944002f126e946a88c0b96efbbde795aa95a6e5db5cd0fbf33575387f6a503e64fc07fe0607383f0fad7d37862d7e4902f619e79dced28e2924e2ec8c49b609c6e40ea153c2923393cd443d62eeec255f8dc00e503b7e9db229a4cbcf51b17eebe66248c034ef9268367ffc85787c1aca9e4a3b07fa2fa881ac57d8432f2a084f455bf3e03da2d327ba61c3766cf8ade46ecc74d4bc95571749bd786f00206f58923ad11f3a8317e522da13660fb2be8c938ab4d8931bc009069fbb4f5eb7f197f3d4f50b1b02285c5f1d948a8181fb8e18f2b11ca90f84ba42d266d1c3b7a9919afd77c6ebc9545a13841b689dc0728cb944c4'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading unemployment-in-india, 16335 bytes compressed
Downloaded and uncompressed: unemployment-in-india
Data source import complete.


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df1=pd.read_csv("https://raw.githubusercontent.com/jarif87/DataSets/main/unemployee_india.csv")
df2=pd.read_csv("https://raw.githubusercontent.com/jarif87/DataSets/main/Unemployment_Rate_upto_11_2020.csv")

In [None]:
df1.head()

In [None]:
df1.columns

In [None]:
df1.head()

In [None]:
df1.info()

In [None]:
df1.isnull().sum()

In [None]:
df1.shape

In [None]:
df1=df1.dropna()

In [None]:
df1.isnull().sum()

In [None]:
df1.describe()

In [None]:
df1[" unemployment_rate"]

In [None]:
plt.figure(figsize=(15,10))
sns.countplot(y="Region",data=df1)
plt.show()

# avg,high,lowest unemployment rate

In [None]:
avg_unemployment_rate=df1.groupby('Region')[' unemployment_rate'].mean()
state_with_highest_unemployment=avg_unemployment_rate.idxmax()
high_unemployment_rate=avg_unemployment_rate.max()
state_with_lowest_unemployment=avg_unemployment_rate.idxmin()
low_unemploy_rate=avg_unemployment_rate.min()
print(f"state  with  high employment : {state_with_highest_unemployment}")
print(f"high employment rate : {high_unemployment_rate}")
print(f"state  with  low employment : {state_with_lowest_unemployment}")
print(f"low employment rate : {low_unemploy_rate}")

In [None]:
regions = avg_unemployment_rate.index
avg_rates = avg_unemployment_rate.values

plt.figure(figsize=(10, 6))
plt.bar(regions, avg_rates, color='skyblue')
plt.xlabel('Region')
plt.ylabel('Average Unemployment Rate')
plt.title('Average Unemployment Rate by Region')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(x='Region', y=' unemployment_rate', data=df1,palette="Set1")
plt.title("Unemployment Rate Distribution by Region")
plt.xlabel("State")
plt.ylabel("Estimated Unemployment Rate")
plt.xticks(rotation=90)
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(x='Region', y='Labour_Participation_Rate', data=df1,palette="hsv")
plt.title("Labour_Participation_ Rate")
plt.xlabel("Region")
plt.ylabel("Labour_Participation_ Rate")
plt.xticks(rotation=90)
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(x='Region', y='estimated_employee', data=df1,palette="brg")
plt.title("estimated_employee")
plt.xlabel("Region")
plt.ylabel("estimated_employee")
plt.xticks(rotation=90)
plt.show()

In [None]:
sns.histplot(data=df1, x=" unemployment_rate", kde=True,color="navy")
plt.show()

In [None]:
sns.histplot(data=df1, x="estimated_employee", kde=True,color="brown")
plt.show()

In [None]:
sns.histplot(data=df1, x="Labour_Participation_Rate", kde=True,color="darkorange")
plt.show()

# Unemployment Rate vs. Labor Participation Rate

In [None]:
plt.figure(figsize=(10,7))
sns.scatterplot(data=df1, x=" unemployment_rate", y="Labour_Participation_Rate", hue="Region")
plt.show()

In [None]:
plt.figure(figsize=(10,7))
sns.violinplot(x=df1["estimated_employee"])
plt.show()

In [None]:
pair=df1[[" unemployment_rate","estimated_employee","Labour_Participation_Rate"]]
sns.pairplot(pair,markers="*",palette="winter")
plt.show()

In [None]:
df1["Area"].value_counts()

In [None]:
sns.countplot(x="Area",data=df1)
plt.show()

# Unemployment in India (2020)

In [None]:
df2.head()

In [None]:
df2.shape

In [None]:
df2.isnull().sum()

In [None]:
df2.info()

In [None]:
df2.describe()

In [None]:
plt.figure(figsize=(10,7))
sns.countplot(y="Region",data=df2)
plt.show()

In [None]:
df2.head()

In [None]:
df2.columns

In [None]:
import plotly.express as px
fig = px.bar(df2, x="Region", y=" Estimated_Employed", title=" Estimated_Employed",
             animation_frame=' Date',template='plotly',color="Region.1")
fig.show()


In [None]:
fig = px.bar(df2, x="Region", y=" Estimated_Unemployment_Rate", title="unemployee rate 2020",
             animation_frame=' Date',template='plotly',color="Region")
fig.show()

In [None]:
fig = px.bar(df2, x="Region", y=" Estimated_Labour_Participation_Rate", title="labour_rate 2020",
             animation_frame=' Date',template='plotly',color="Region.1")
fig.show()

# Time Series Line Plot for Unemployment Rate by Region

In [None]:
fig = px.line(df2, x=' Date', y=" Estimated_Unemployment_Rate", color='Region.1',
                    title='Unemployment Rate Over Time', template='plotly')
fig.show()

# Bar Plot for Average Unemployment Rate by Region

In [None]:
avg_unemployment = df2.groupby('Region')[' Estimated_Unemployment_Rate'].mean().reset_index()

bar_plot = px.bar(avg_unemployment, x='Region', y=' Estimated_Unemployment_Rate',
                   title='Average Unemployment Rate by Region', template='plotly')
bar_plot.show()

# Bar Plot for Average labour Rate by Region

In [None]:
avg_unemployment = df2.groupby('Region')[' Estimated_Labour_Participation_Rate'].mean().reset_index()

bar_plot = px.bar(avg_unemployment, x='Region', y=' Estimated_Labour_Participation_Rate',
                   title='Average labour Rate by Region', template='plotly')
bar_plot.show()

In [None]:
df2.columns

# Scatter Plot for Unemployment Rate vs. Estimated Employed

In [None]:
fig = px.scatter(df2, x=' Estimated_Employed', y=' Estimated_Unemployment_Rate',
                          color='Region.1', title='Unemployment Rate vs. Estimated Employed',animation_frame=" Date")
fig.show()


In [None]:
fig = px.scatter(df2, x='Region.1', y=' Estimated_Labour_Participation_Rate',
                          color='Region.1', title='Region.1 vs labour rate',animation_frame=" Date")
fig.show()


In [None]:
df2.columns

# Histogram for Estimated Labour Participation Rate

In [None]:
fig = px.histogram(df2, x=' Estimated_Labour_Participation_Rate', color='Region.1',
                         title='Distribution of Labour Participation Rate by Region', template='plotly')
fig.show()

# Histogram for estimated_employee Rate

In [None]:
fig = px.histogram(df2, x=' Estimated_Employed', color='Region.1',
                         title='Distribution of Labour Participation Rate by Region', template='plotly')
fig.show()

In [None]:
df2["Region.1"].value_counts()

In [None]:
sns.countplot(y="Region.1",data=df2)
plt.show()