In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

file_path = r"C:\Users\adapa\Downloads\Unemployment in India.csv"

try:
    df = pd.read_csv(file_path)
except FileNotFoundError:
    print("Error: The file was not found. Please check the path.")
    exit()

print("Column Names:", df.columns)  
print(df.head()) 
print(df.info())

possible_date_cols = ['Date', 'date', 'DATE']
date_col = next((col for col in possible_date_cols if col in df.columns), None)

if date_col:
    df.rename(columns={date_col: 'Date'}, inplace=True)
    df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
else:
    print("Error: 'Date' column is missing or misnamed. Please verify the dataset.")
    exit()

expected_columns = ['Region', 'Date', 'Frequency', 'Estimated Unemployment Rate',
                    'Estimated Employed', 'Estimated Labour Participation Rate',
                    'Area', 'Longitude', 'Latitude']

if len(df.columns) >= len(expected_columns):
    df.columns = expected_columns[:len(df.columns)]
else:
    print("Warning: Column length mismatch. Proceeding with existing columns.")

df.dropna(inplace=True)

df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

overall_unemployment = df.groupby('Date')['Estimated Unemployment Rate'].mean().reset_index()

plt.figure(figsize=(14, 7))
sns.lineplot(x='Date', y='Estimated Unemployment Rate', data=overall_unemployment)
plt.title('Overall Estimated Unemployment Rate in India Over Time')
plt.xlabel('Date')
plt.ylabel('Unemployment Rate (%)')
plt.grid(True)
plt.show()

plt.figure(figsize=(16, 8))
sns.barplot(x='Region', y='Estimated Unemployment Rate',
            data=df.groupby('Region')['Estimated Unemployment Rate'].mean().reset_index().sort_values(by='Estimated Unemployment Rate', ascending=False))
plt.xticks(rotation=90)
plt.title('Average Estimated Unemployment Rate by State/Region')
plt.xlabel('State/Region')
plt.ylabel('Unemployment Rate (%)')
plt.show()

fig = px.sunburst(df, path=['Area', 'Region'], values='Estimated Unemployment Rate',
                  title='Unemployment Rate Distribution by Area and Region', height=600, color_continuous_scale='RdYlGn_r')
fig.show()


Column Names: Index(['Region', ' Date', ' Frequency', ' Estimated Unemployment Rate (%)',
       ' Estimated Employed', ' Estimated Labour Participation Rate (%)',
       'Area'],
      dtype='object')
           Region         Date  Frequency   Estimated Unemployment Rate (%)  \
0  Andhra Pradesh   31-05-2019    Monthly                              3.65   
1  Andhra Pradesh   30-06-2019    Monthly                              3.05   
2  Andhra Pradesh   31-07-2019    Monthly                              3.75   
3  Andhra Pradesh   31-08-2019    Monthly                              3.32   
4  Andhra Pradesh   30-09-2019    Monthly                              5.17   

    Estimated Employed   Estimated Labour Participation Rate (%)   Area  
0           11999139.0                                     43.24  Rural  
1           11755881.0                                     42.05  Rural  
2           12086707.0                                     43.50  Rural  
3           12285693.0     

KeyError: 'Date'