<a href="https://colab.research.google.com/github/Jahnavi-1920/Emotion-dectection-using-opencv/blob/Jahnavi-1920-upload-1/Unemployement_analysis_with_python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Unemployment is measured by the unemployment rate which is the number of people who are unemployed as a percentage of the total
#labour force. We have seen a sharp increase in the unemployment rate during Covid-19, so analyzing the unemployment rate can
#be a good data science project.


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
file_path = '/content/Unemployment_Rate_upto_11_2020.xlsx'
data = pd.read_excel(file_path)

# Display the column names and first few rows to identify the correct column names
print("Column Names:")
print(data.columns)

print("\nData Overview:")
print(data.head())

# Uncomment the below line after identifying the correct column name
# unemployment_rate_column = 'Correct_Column_Name_Here'

# Assuming the correct column name identified is 'Unemployment Rate'
unemployment_rate_column = 'Unemployment Rate'  # Replace with the actual column name if different

try:
    # Basic information about the data
    print("\nData Info:")
    print(data.info())

    # Check for missing values
    print("\nMissing Values:")
    print(data.isnull().sum())

    # Descriptive statistics
    print("\nDescriptive Statistics:")
    print(data.describe())

    # Plot the unemployment rate over time
    plt.figure(figsize=(14, 7))
    plt.plot(data['Date'], data[unemployment_rate_column], marker='o')
    plt.title('Unemployment Rate Over Time')
    plt.xlabel('Date')
    plt.ylabel('Unemployment Rate (%)')
    plt.grid(True)
    plt.show()

    # Boxplot to visualize the distribution of unemployment rates
    plt.figure(figsize=(10, 6))
    sns.boxplot(y=data[unemployment_rate_column])
    plt.title('Distribution of Unemployment Rates')
    plt.ylabel('Unemployment Rate (%)')
    plt.show()

    # Analyzing the change in unemployment rate during Covid-19
    covid_period = data[(data['Date'] >= '2020-03-01') & (data['Date'] <= '2020-11-30')]
    pre_covid_period = data[data['Date'] < '2020-03-01']

    # Summary statistics before and during Covid-19
    print("\nSummary Statistics Pre-Covid-19:")
    print(pre_covid_period.describe())

    print("\nSummary Statistics During Covid-19:")
    print(covid_period.describe())

    # Compare the average unemployment rate before and during Covid-19
    pre_covid_avg = pre_covid_period[unemployment_rate_column].mean()
    covid_avg = covid_period[unemployment_rate_column].mean()

    print("\nAverage Unemployment Rate Pre-Covid-19: {:.2f}%".format(pre_covid_avg))
    print("Average Unemployment Rate During Covid-19: {:.2f}%".format(covid_avg))

    # Plotting unemployment rates pre-Covid-19 and during Covid-19
    plt.figure(figsize=(14, 7))
    plt.plot(pre_covid_period['Date'], pre_covid_period[unemployment_rate_column], label='Pre-Covid-19')
    plt.plot(covid_period['Date'], covid_period[unemployment_rate_column], label='During Covid-19', marker='o')
    plt.title('Unemployment Rate Pre-Covid-19 vs During Covid-19')
    plt.xlabel('Date')
    plt.ylabel('Unemployment Rate (%)')
    plt.legend()
    plt.grid(True)
    plt.show()

except KeyError as e:
    print(f"KeyError: {e}. Please check the column name in the dataset.")




Column Names:
Index(['Region', 'Date', 'Frequency', 'Estimated Unemployment Rate (%)',
       'Estimated Employed', 'Estimated Labour Participation Rate (%)',
       'Region.1', 'Longitude', 'Latitude'],
      dtype='object')

Data Overview:
           Region        Date Frequency  Estimated Unemployment Rate (%)  \
0  Andhra Pradesh  31-01-2020         M                             5.48   
1  Andhra Pradesh  29-02-2020         M                             5.83   
2  Andhra Pradesh  31-03-2020         M                             5.79   
3  Andhra Pradesh  30-04-2020         M                            20.51   
4  Andhra Pradesh  31-05-2020         M                            17.43   

   Estimated Employed  Estimated Labour Participation Rate (%) Region.1  \
0            16635535                                    41.02    South   
1            16545652                                    40.90    South   
2            15881197                                    39.18    South   
3

<Figure size 1400x700 with 0 Axes>