Data Extraction

In [None]:
pip install fredapi

In [None]:
pip install folium

In [None]:
from fredapi import Fred
import pandas as pd
import numpy as np
fred = Fred(api_key='15239f57043248d46a380db5710b0e65')

#US Unemployment Rate Seasonally Adjusted
unrate_sa = fred.get_series('UNRATE')
#US Unemployment Rate Not Seasonally Adjusted
unrate_nsa = fred.get_series('UNRATENSA')
#The reason we need both is because the race series are not seasonally adjusted

In [None]:
#US Unemployment Rate - Women Seasonally Adjusted
women = fred.get_series('LNS14000002')
#US Unemployment Rate - Men Seasonally Adjusted
men = fred.get_series('LNS14000001')

In [None]:
#US Unemployment Rate - Black/African American Not Seasonally Adjusted
black = fred.get_series('LNU04000006')
#US Unemployment Rate - Hispanic Not Seasonally Adjusted
hispanic = fred.get_series('LNU04000009')
#US Unemployment Rate - White Not Seasonally Adjusted
white = fred.get_series('LNU04000003')

In [None]:
#Release is State Employment and Unemployment
release = fred.search_by_release(112)
#Cutting down to only Unemployment Rate data
release = release[release['title'].str.startswith('Unemployment Rate')]
#Getting rid of erraneous entries that aren't specifically the 50 states
release = release[~release['title'].str.endswith('Region')]
release = release[~release['title'].str.endswith('Division')]
release = release[~release['title'].str.endswith('division')]
release = release[~release['title'].str.endswith('(DISCONTINUED)')]
release = release[~release['title'].str.endswith('District of Columbia')]
release = release[~release['title'].str.endswith('Puerto Rico')]

#Cutting all Not Seasonally Adjusted Series
states = release[release['seasonal_adjustment'].str.startswith('Seasonally')]

In [None]:
states

In [None]:
dict = {}
#Going through the Dataframe to get all the series ids
for state in states['id']:
    dict[state[0:2]]=fred.get_series(states['id'][state])
#Building a new DataFrame with all the data we need
states_df = pd.DataFrame(dict)

In [None]:
states_df

Statistical Analysis

In [None]:
from scipy import stats

print(stats.shapiro(unrate_sa))
print(stats.shapiro(unrate_nsa))

We can conclude that the unemployment rates are probably not normally distributed, therefore we cannot use tests that assume a normal distribution. We will instead proceed with using the Kruskal-Wallis H Test and Mann-Whitney U Test

Discriminating Factors: Gender

In [None]:
print(stats.kruskal(unrate_sa, women, men))
print(stats.mannwhitneyu(women,men))

These tests suggest that there is a significant difference between the unemployment rates for men and women in the US overall since 1948

In [None]:
df_gen = pd.concat([unrate_sa, women, men], axis=1)
df_gen.columns = ['Unemployment Rate', 'Unemployment Rate - Women', 'Unemployment Rate - Men']

In [None]:
import matplotlib.pyplot as plt

def compute_ci(data, confidence=0.95):
    mean = np.mean(data)
    sem = stats.sem(data) 
    h = sem * stats.t.ppf((1 + confidence) / 2., len(data)-1)
    return mean, mean - h, mean + h

means = []
ci_lower = []
ci_upper = []

for ur in df_gen.columns:
    mean, lower, upper = compute_ci(df_gen[ur])
    means.append(mean)
    ci_lower.append(lower)
    ci_upper.append(upper)

plt.figure(figsize=(8, 6))
plt.bar(df_gen.columns, means, yerr=[np.array(means) - np.array(ci_lower), np.array(ci_upper) - np.array(means)],
        capsize=10, color='skyblue', edgecolor='black')

In [None]:
plt.figure(figsize=(8, 6))
plt.ylim(5,6.2)
plt.bar(df_gen.columns, means, yerr=[np.array(means) - np.array(ci_lower), np.array(ci_upper) - np.array(means)],
        capsize=10, color='skyblue', edgecolor='black')

Discriminating Factors: Race

In [None]:
print(stats.kruskal(unrate_nsa, black, hispanic, white))
print(stats.mannwhitneyu(black,hispanic))
print(stats.mannwhitneyu(black,white))
print(stats.mannwhitneyu(hispanic,white))

These results suggest there are clear differences between the unemployment rates for races in the US since 1972

In [None]:
df_race = pd.concat([unrate_nsa, black, hispanic, white], axis=1)
df_race.columns = ['Unemployment Rate', 'Unemployment Rate - Black', 'Unemployment Rate - Hispanic', 'Unemployment Rate - White']
df_race = df_race.dropna()

In [None]:
means = []
ci_lower = []
ci_upper = []

for ur in df_race.columns:
    mean, lower, upper = compute_ci(df_race[ur])
    means.append(mean)
    ci_lower.append(lower)
    ci_upper.append(upper)

plt.figure(figsize=(12, 6))
plt.bar(df_race.columns, means, yerr=[np.array(means) - np.array(ci_lower), np.array(ci_upper) - np.array(means)],
        capsize=10, color='skyblue', edgecolor='black')

In [None]:
plt.figure(figsize=(12, 6))
plt.ylim(5,12)
plt.bar(df_race.columns, means, yerr=[np.array(means) - np.array(ci_lower), np.array(ci_upper) - np.array(means)],
        capsize=10, color='skyblue', edgecolor='black')

In [None]:
ur_m = np.array([])
ur_s = np.array([])
urw_m = np.array([])
urw_s = np.array([])
urm_m = np.array([])
urm_s = np.array([])

#Gathering mean and standard deviation for each year
for i in range(1948,2025):
    ur = df_gen[df_gen.index.year==i]['Unemployment Rate']
    urw = df_gen[df_gen.index.year==i]['Unemployment Rate - Women']
    urm = df_gen[df_gen.index.year==i]['Unemployment Rate - Men']
    ur_m = np.append(ur_m,ur.mean())
    ur_s = np.append(ur_s,ur.std())
    urw_m = np.append(urw_m,urw.mean())
    urw_s = np.append(urw_s,urw.std())
    urm_m = np.append(urm_m,urm.mean())
    urm_s = np.append(urm_s,urm.std())

In [None]:
fig, ax = plt.subplots()

years = np.arange(1948,1990)

#ax.errorbar(years,ur_m[0:len(years)],yerr=ur_s[0:len(years)],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='UR')
ax.errorbar(years,urw_m[0:len(years)],yerr=urw_s[0:len(years)],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='Women')
ax.errorbar(years,urm_m[0:len(years)],yerr=urm_s[0:len(years)],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='Men')

plt.ylim(2,12)
ax.set_title('Average Unemployment Rate of Men and Women From 1948 to 1989')
ax.set_ylabel('Average Unemployment Rate')
ax.set_xlabel("Year")

ax.legend(loc="upper left")
plt.show()

In [None]:
fig, ax = plt.subplots()

years = np.arange(1990,2024)

#ax.errorbar(years,ur_m[0:len(years)],yerr=ur_s[0:len(years)],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='UR')
ax.errorbar(years,urw_m[42:-1],yerr=urw_s[42:-1],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='Women')
ax.errorbar(years,urm_m[42:-1],yerr=urm_s[42:-1],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='Men')

plt.ylim(2,12)
ax.set_title('Average Unemployment Rate of Men and Women From 1990 to 2023')
ax.set_ylabel('Average Unemployment Rate')
ax.set_xlabel("Year")

ax.legend(loc="upper left")
plt.show()

In [None]:
urbl_m = np.array([])
urbl_s = np.array([])
urhi_m = np.array([])
urhi_s = np.array([])
urwh_m = np.array([])
urwh_s = np.array([])

for i in range(1972,2025):
    urbl = df_race[df_race.index.year==i]['Unemployment Rate - Black']
    urhi = df_race[df_race.index.year==i]['Unemployment Rate - Hispanic']
    urwh = df_race[df_race.index.year==i]['Unemployment Rate - White']
    urbl_m = np.append(urbl_m,urbl.mean())
    urbl_s = np.append(urbl_s,urbl.std())
    urhi_m = np.append(urhi_m,urhi.mean())
    urhi_s = np.append(urhi_s,urhi.std())
    urwh_m = np.append(urwh_m,urwh.mean())
    urwh_s = np.append(urwh_s,urwh.std())

In [None]:
fig, ax = plt.subplots()

years = np.arange(1972,2000)

ax.errorbar(years,urbl_m[0:len(years)],yerr=urbl_s[0:len(years)],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='Black')
ax.errorbar(years,urhi_m[0:len(years)],yerr=urhi_s[0:len(years)],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='Hispanic')
ax.errorbar(years,urwh_m[0:len(years)],yerr=urwh_s[0:len(years)],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='White')

plt.ylim(2,22)
ax.set_title('Average Unemployment Rate of Different Races From 1972 to 1999')
ax.set_ylabel('Average Unemployment Rate')
ax.set_xlabel("Year")

ax.legend(loc="upper left")
plt.show()

In [None]:
fig, ax = plt.subplots()

years = np.arange(2000,2024)

ax.errorbar(years,urbl_m[28:-1],yerr=urbl_s[28:-1],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='Black')
ax.errorbar(years,urhi_m[28:-1],yerr=urhi_s[28:-1],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='Hispanic')
ax.errorbar(years,urwh_m[28:-1],yerr=urwh_s[28:-1],fmt='-o',solid_capstyle='projecting', capsize=7, capthick =1.4, label='White')

plt.ylim(2,22)
ax.set_title('Average Unemployment Rate of Different Races From 2000 to 2023')
ax.set_ylabel('Average Unemployment Rate')
ax.set_xlabel("Year")

ax.legend(loc="upper left")
plt.show()

Data Visulization

In [None]:
states_df

In [None]:

# converting the index datatype to datetime to extract the year 
states_df.index = pd.to_datetime(states_df.index)


# Creating a new column year by extracting year from index (date time )
states_df["Year"] = states_df.index.year

In [None]:
# the new column will be visible at the end
states_df 


In [None]:
# dropping the index column as we wont need it anymore 


states_df_reset = states_df.reset_index(drop=True)

In [None]:
states_df_reset

In [None]:
# Taking the year column at the first position

states_df_reset = states_df_reset[['Year','AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'IA',
       'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO',
       'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK',
       'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI',
       'WV', 'WY']] 

In [None]:
states_df_reset

In [None]:
# taking mean year wise for all the states
mean_state_wise = states_df_reset.groupby('Year').mean()

In [None]:
mean_state_wise

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Create the heat map for visulizing the state wise data by year

plt.figure(figsize=(100,100 ))
sns.heatmap(mean_state_wise, annot=True, cmap='YlGnBu')
plt.title('Mean Values by State and Year')
plt.xlabel('State')
plt.ylabel('Year')
plt.show()

In [None]:
# for creating a map we will use the transpose fucntion

mean_state_wise_map = mean_state_wise.T



In [None]:
# we can manually take any year here to visulize it on the map (change the iloc value)

T1 = mean_state_wise_map.iloc[:,1]

In [None]:
T1.columns = ['State', 'rate']

In [None]:
T1.columns

In [None]:
T1

In [None]:
import folium


m = folium.Map(location=(45.5236, -122.6750))

In [None]:
import pandas
import requests

state_geo = requests.get(
    "https://raw.githubusercontent.com/python-visualization/folium-example-data/main/us_states.json"
).json()

#state_data = pandas.read_csv(     "https://raw.githubusercontent.com/python-visualization/folium-example-data/main/us_unemployment_oct_2012.csv" )

m = folium.Map(location=[48, -102], zoom_start=3)

folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=T1,
    columns=["State", "rate"],
    key_on="feature.id",
    fill_color="YlGn",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Unemployment Rate (%)",
).add_to(m)

folium.LayerControl().add_to(m)

m

GUI

In [None]:
# for visulizing it in GUI we will melt the dataframe so that it is easier for the visulization


temp_df = mean_state_wise.reset_index()

gui_df = temp_df.melt(id_vars=['Year'], var_name='State', value_name='Unemployment Rate')

# Displaying the melted dataframe

print(gui_df)

In [None]:
#importing the required libraries

import tkinter as tk
from tkinter import ttk
from tkinter import messagebox
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

In [None]:
# extracting the sorted and unique list of years and states --> from the dataframe to be on the safer side 


years_list = sorted(gui_df['Year'].unique())
states_list = sorted(gui_df['State'].unique())


# For the creation of GUI we will use the Tkinter library
gui_window = tk.Tk()

# Giving a meaningful title to our Visualization

gui_window.title("Unemployment Rate Visualization by selecting years and states")

# creating checkbox frame to place the years and states boxes in the gui window

checkbox_gui_frame = ttk.Frame(gui_window)
checkbox_gui_frame.pack(padx=10, pady=10, fill='both', expand=True)

# Framing the plotting of graphs  

plot_gui_frame = ttk.Frame(gui_window)
plot_gui_frame.pack(padx=10, pady=10, fill='both', expand=True)

# Now to store the selected states and years we are creating new dictionarys to get input from checkboxes


checked_state = {}
checked_year = {}


# Now creating the checkboxes for the states first 

state_gui_label = ttk.Label(checkbox_gui_frame, text="Select States:")
state_gui_label.grid(row=0, column=0, padx=5, pady=5, sticky="w")


# Creation of checkmarks for each state

for i, j in enumerate(states_list):
    var = tk.BooleanVar()
    checked_state[j] = var
    checkbox = ttk.Checkbutton(checkbox_gui_frame, text=j, variable=var)
    checkbox.grid(row=(i // 3) + 1, column=(i % 3), padx=5, pady=5, sticky="w")

# Now creating the checkboxes for the years  

year_gui_label = ttk.Label(checkbox_gui_frame, text="Select Years:")
year_gui_label.grid(row=0, column=3, padx=5, pady=5, sticky="w")

# Creation of checkmarks for each year

for i, j in enumerate(years_list):
    var = tk.BooleanVar()
    checked_year[j] = var
    checkbox = ttk.Checkbutton(checkbox_gui_frame, text=str(j), variable=var)
    checkbox.grid(row=(i // 3) + 1, column=3 + (i % 3), padx=5, pady=5, sticky="w")

# Creating a function to plot the unemployement rate according to the selection done in the checkboxes

def plotting_unemployment():
    # extract the selected states and years
    states_marked_in_window = [state for state, var in checked_state.items() if var.get()]
    years_marked_in_window = [year for year, var in checked_year.items() if var.get()]

    # Filtering the dataframe based on selected states and years
    final_df = gui_df[gui_df['State'].isin(states_marked_in_window) & gui_df['Year'].isin(years_marked_in_window)]

    if final_df.empty:
        messagebox.showwarning("No Data", "No data available for the selected states and years.")
        return

    # Creation of graph
    
    plt.figure(figsize=(10, 6))
    
    # Visulizing the unemployment rates
    for state in states_marked_in_window:
        state_data = final_df[final_df['State'] == state]
        plt.plot(state_data['Year'], state_data['Unemployment Rate'], label=state, marker='o')

    # Making some changes to the graph setting
    plt.title('Unemployment Rate by State and Year')
    plt.xlabel('Year')
    plt.ylabel('Unemployment Rate (%)')
    plt.legend(title="States")
    plt.grid(True)

    # Drop the previous plot for new selection
    for widget in plot_gui_frame.winfo_children():
        widget.destroy()

    # Display the new graph in the GUI window
    canvas = FigureCanvasTkAgg(plt.gcf(), master=plot_gui_frame)
    canvas.get_tk_widget().pack(fill='both', expand=True)
    canvas.draw()

# Creation of button for execution of the window

plot_button = ttk.Button(checkbox_gui_frame, text="Create Visulisation of Unemployment Rate", command=plotting_unemployment)
plot_button.grid(row=len(states_list) // 3 + 2, column=0, columnspan=6, pady=10)

# Start the Tkinter main loop

gui_window.mainloop()
