In [None]:
#//task 2
#Unemployment is measured by the unemployment rate which is the number of people
#who are unemployed as a percentage of the total labour force. We have seen a sharp
#increase in the unemployment rate during Covid-19, so analyzing the unemployment rate
#can be a good data science project.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('Oibsip/Unemployment.csv', header=None)

df.columns = ['Region', 'Date', 'Frequency', 'Estimated Unemployment Rate (%)',
              'Estimated Employed', 'Estimated Labour Participation Rate (%)', 'Area']

df['Date'] = df['Date'].str.strip()
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y', errors='coerce')

df['Estimated Unemployment Rate (%)'] = pd.to_numeric(df['Estimated Unemployment Rate (%)'], errors='coerce')
df['Estimated Unemployment Rate (%)'].fillna(df['Estimated Unemployment Rate (%)'].mean(), inplace=True)

print(df.head())
print(df.info())
print(df.describe())

plt.figure(figsize=(10, 6))
sns.lineplot(x='Date', y='Estimated Unemployment Rate (%)', hue='Region', data=df[df['Region'] == 'Andhra Pradesh'])
plt.title('Unemployment Rate Over Time in Andhra Pradesh')
plt.xlabel('Date')
plt.ylabel('Unemployment Rate (%)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

plt.figure(figsize=(14, 8))
sns.lineplot(x='Date', y='Estimated Unemployment Rate (%)', hue='Region', data=df)
plt.title('Unemployment Rate Over Time Across Different Regions')
plt.xlabel('Date')
plt.ylabel('Unemployment Rate (%)')
plt.xticks(rotation=45)
plt.legend(loc='upper right', bbox_to_anchor=(1.15, 1))
plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 8))
avg_unemployment_rate = df.groupby('Region')['Estimated Unemployment Rate (%)'].mean().sort_values()
sns.barplot(x=avg_unemployment_rate, y=avg_unemployment_rate.index, palette='viridis')
plt.title('Average Unemployment Rate by Region')
plt.xlabel('Average Unemployment Rate (%)')
plt.ylabel('Region')
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 8))
avg_unemployment_rate = df.groupby('Region')['Estimated Unemployment Rate (%)'].mean()
plt.pie(avg_unemployment_rate, labels=avg_unemployment_rate.index, autopct='%1.1f%%', colors=sns.color_palette('pastel'), startangle=140)
plt.title('Unemployment Rate Distribution by Region')
plt.show()

df.to_csv('Oibsip/Cleaned_Unemployment.csv', index=False)
