In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import time
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
df=pd.read_csv('/kaggle/input/unemployment-in-india/Unemployment in India.csv')
df

In [None]:
df.info()

In [None]:
df.dropna(inplace=True)

In [None]:
pd.DataFrame(df.apply(lambda col: len(col.unique())),columns=["Unique Values Count"])

In [None]:
df[' Date'] = pd.to_datetime(df[' Date'])
df[' Date'].info

In [None]:
from sklearn.preprocessing import MinMaxScaler

X = df[' Estimated Employed'].values.reshape(-1, 1)
scaler = MinMaxScaler()
df[' Estimated Employed'] = scaler.fit_transform(X)

In [None]:
df.head()

In [None]:
print(df['Region'].value_counts())
print('*'*33)
print(df[' Date'].value_counts())
print('*'*33)
print(df[' Frequency'].value_counts())
print('*'*33)
print(df['Area'].value_counts())

In [None]:
Area_encoding = {'Rural': 0, 'Urban': 1}

# Encode 'Gender' column using loc
df.loc[:, 'Area'] = df['Area'].map(Area_encoding)

In [None]:
rural_df = df[df['Area'] == 0]
urban_df = df[df['Area'] == 1]
print(rural_df.shape)
print(urban_df.shape)

In [None]:
grouped_rural_stats = rural_df.groupby('Region').agg({
    ' Estimated Unemployment Rate (%)': ['min', 'mean', 'max', 'std'],
    ' Estimated Labour Participation Rate (%)': ['min', 'mean', 'max', 'std'],
    ' Estimated Employed': ['min', 'mean', 'max']
})

grouped_urban_stats = urban_df.groupby('Region').agg({
    ' Estimated Unemployment Rate (%)': ['min', 'mean', 'max', 'std'],
    ' Estimated Labour Participation Rate (%)': ['min', 'mean', 'max', 'std'],
    ' Estimated Employed': ['min', 'mean', 'max']
})

grouped_rural_stats.columns = ['Unemployment Rate Min', 'Unemployment Rate Mean','Unemployment Rate Max', 'Unemployment Rate Std',
                         'Labour Participation Rate Min', 'Labour Participation Rate Mean','Labour Participation Rate Max','Labour Participation Rate Std',
                            'Estimated Employed Min', 'Estimated Employed Mean','Estimated Employed Max']
grouped_urban_stats.columns = ['Unemployment Rate Min', 'Unemployment Rate Mean','Unemployment Rate Max', 'Unemployment Rate Std',
                         'Labour Participation Rate Min', 'Labour Participation Rate Mean','Labour Participation Rate Max','Labour Participation Rate Std',
                           'Estimated Employed Min', 'Estimated Employed Mean','Estimated Employed Max']

In [None]:
def plot_heatmap_(data, title):
    fig, axs = plt.subplots(1, 2, figsize=(30,25))  # Create a figure with two subplots

    # Plot the first column
    sns.heatmap(data.loc[:, ['Unemployment Rate Min', 'Unemployment Rate Mean','Unemployment Rate Max']], ax=axs[0], annot=True,cmap="coolwarm",annot_kws={"size": 22})
    axs[0].set_title('Unemployment Rate', fontsize=15)

    # Plot the second column
    sns.heatmap(data.loc[:, ['Labour Participation Rate Min', 'Labour Participation Rate Mean','Labour Participation Rate Max']], ax=axs[1], annot=True, cmap="coolwarm",annot_kws={"size": 22})
    axs[1].set_title('Labour Participation Rate', fontsize=15)

    # Set the overall title for the figure 
    plt.suptitle(title, fontsize=15)
    axs[0].set_xlabel('Statistics', fontsize=15)
    axs[0].set_ylabel('Region', fontsize=15)
    axs[1].set_xlabel('Statistics', fontsize=15)
    axs[1].set_ylabel('Region', fontsize=15)
    axs[0].tick_params(axis='x', rotation=45, labelsize=21)
    axs[0].tick_params(axis='y', labelsize=22)
    axs[1].tick_params(axis='x', rotation=45, labelsize=21)
    axs[1].tick_params(axis='y', labelsize=22)
    plt.tight_layout()

In [None]:
grouped_df = df.groupby('Region')


def plot_rate_col_rate(grouped_df, rate_col):
   
    # Iterate over unique regions
    for region, region_data in grouped_df:
        # Convert date column to datetime if it's not already
        region_data[' Date'] = pd.to_datetime(region_data[' Date'])
        
        # Sort region data by date
        region_data.sort_values(by=' Date', inplace=True)
        
        # Filter region data for rural and urban areas
        rural_data = region_data[region_data['Area'] == 0]
        urban_data = region_data[region_data['Area'] == 1]
        
        # Plot the data for rural and urban areas
        plt.figure(figsize=(10, 6))
        plt.plot(rural_data[' Date'], rural_data[rate_col], marker='o', linestyle='-', label='Rural')
        plt.plot(urban_data[' Date'], urban_data[rate_col], marker='o', linestyle='-', label='Urban')
        plt.title(f'{rate_col} Over Time for {region}')
        plt.xlabel('Date')
        plt.ylabel(f'{rate_col}')
        plt.xticks(rotation=45)
        plt.grid(True)
        plt.tight_layout()
        plt.legend()
        plt.show()

In [None]:
def plot_comparison(data_urban, data_rural, y_label, title):
   
    plt.figure(figsize=(30, 14))
    plt.plot(data_urban, label='Urban', marker='o', linewidth=3)
    plt.plot(data_rural, label='Rural', marker='o', linewidth=3)
    plt.title(title, fontsize=22)
    plt.xlabel('Regions',fontsize=22)
    plt.ylabel(y_label, fontsize=22)
    plt.xticks(rotation=90, fontsize=25)
    plt.yticks(rotation=0, fontsize=22)
    plt.legend(prop={'size': 22})
    plt.grid(True)
    plt.show()


In [None]:
plot_rate_col_rate(grouped_df, ' Estimated Unemployment Rate (%)')

In [None]:
plot_rate_col_rate(grouped_df, ' Estimated Labour Participation Rate (%)')

In [None]:
plot_rate_col_rate(grouped_df, ' Estimated Employed')

In [None]:
# droping Chandigarh so that we can compair
grouped_urban_stats_com = grouped_urban_stats.drop('Chandigarh')

In [None]:
plot_comparison(grouped_urban_stats_com['Unemployment Rate Mean'], grouped_rural_stats['Unemployment Rate Mean'], 'Unemployment Rate Mean', 'Comparison of Unemployment Rate Mean between Urban and Rural')

In [None]:
plot_comparison(grouped_urban_stats_com['Labour Participation Rate Mean'], grouped_rural_stats['Labour Participation Rate Mean'], 'Labour Participation Rate Mean', 'Comparison of Labour Participation Rate Mean between Urban and Rural')

In [None]:
plot_comparison(grouped_urban_stats_com['Estimated Employed Mean'], grouped_rural_stats['Estimated Employed Mean'], 'Estimated Employed Mean', 'Estimated Employed Mean')

In [None]:
plot_heatmap_(grouped_rural_stats,'rural')

In [None]:
plot_heatmap_(grouped_urban_stats,'ubran')

In [None]:
grouped_rural_stats['Covid19 Unemployment Rate Effect'] = grouped_rural_stats['Unemployment Rate Std'] * grouped_rural_stats['Estimated Employed Mean']
grouped_urban_stats['Covid19 Unemployment Rate Effect'] = grouped_urban_stats['Unemployment Rate Std'] * grouped_urban_stats['Estimated Employed Mean']

grouped_rural_stats['Covid19 Labour Participation Rate Effect'] = grouped_rural_stats['Labour Participation Rate Std'] * grouped_rural_stats['Estimated Employed Mean']
grouped_urban_stats['Covid19 Labour Participation Rate Effect'] = grouped_urban_stats['Labour Participation Rate Std'] * grouped_urban_stats['Estimated Employed Mean']

In [None]:
def plot_heatmap_effect(data, title):
    fig, axs = plt.subplots(1, 2, figsize=(30,25))  # Create a figure with two subplots

    # Plot the first column
    sns.heatmap(data.loc[:, ['Covid19 Unemployment Rate Effect']], ax=axs[0], annot=True,cmap="YlGnBu",annot_kws={"size": 22})
    axs[0].set_title('Unemployment Rate', fontsize=15)

    # Plot the second column
    sns.heatmap(data.loc[:, ['Covid19 Labour Participation Rate Effect']], ax=axs[1], annot=True, cmap="YlGnBu",annot_kws={"size": 22})
    axs[1].set_title('Labour Participation Rate', fontsize=15)

    # Set the overall title for the figure
    plt.suptitle(title, fontsize=15)
    axs[0].set_xlabel('Statistics', fontsize=15)
    axs[0].set_ylabel('Region', fontsize=15)
    axs[1].set_xlabel('Statistics', fontsize=15)
    axs[1].set_ylabel('Region', fontsize=15)
    axs[0].tick_params(axis='x', rotation=45, labelsize=21)
    axs[0].tick_params(axis='y', labelsize=22)
    axs[1].tick_params(axis='x', rotation=45, labelsize=21)
    axs[1].tick_params(axis='y', labelsize=22)
    plt.tight_layout()

In [None]:
plot_heatmap_effect(grouped_rural_stats, 'Rural')

In [None]:
plot_heatmap_effect(grouped_urban_stats, 'Urban')

In [None]:
plot_comparison(grouped_urban_stats['Covid19 Unemployment Rate Effect'], grouped_rural_stats['Covid19 Unemployment Rate Effect'], 'Covid19 Unemployment Rate Effect', 'Covid19 Unemployment Rate Effect')

In [None]:
plot_comparison(grouped_urban_stats['Covid19 Labour Participation Rate Effect'], grouped_rural_stats['Covid19 Labour Participation Rate Effect'], 'Covid19 Labour Participation Rate Effect', 'Covid19 Labour Participation Rate Effect')

The Covid19 Effect of Labour Participation Rate in rural Area is higher than it in urban Area for all Stats except Delhi state¶

# Summary & Conclusion

1- Despite of Higher Unemployment Rate in Urban Area than Rural Area , Rural Area damaged badly because of high population except Delhi state
 
2- Puducherry & Tripura states shown very high Unemployment & low Labour Participation Rate but The low population of them make covid19 effect Nothing
 
3- Bahir high Unemployment & low Labour Participation Rate may be because people died