In [None]:
import copy
import seaborn as sns
from seaborn.utils import np, plt, pd, os

In [None]:
my_pkg = "..//Presentation"
imp_mods = os.sys.path 
if my_pkg not in imp_mods:
    imp_mods.append(my_pkg)
    print("Imported successfully")

In [None]:
from census_methods import CensusDataset as cd 

In [None]:
census_df = pd.read_csv('final_Marital_Status_and_Religion_cleaned.csv')

In [None]:
census_df.info()

In [None]:
census_df.head()

___

## UNEMPLOYMENT TRENDS

In [None]:
unemployed = census_df.loc[census_df['Is Unemployed'] == 1]
unemployed

#### Those who are actively unemployed (below 65 years and unemployed)

In [None]:
actively_unemployed = unemployed.loc[unemployed['Age'] <= 65]
actively_unemployed.shape[0]

In [None]:
unemployed.shape[0] - actively_unemployed.shape[0]

### Are certain ages more likely to be unemployed than others?

#### Age Distribution of Unemployed Residents

In [None]:
age_boundaries = list(range(0, census_df['Age'].max()+5, 5))
print(age_boundaries)

#### Classify the ages of unemployed residents

In [None]:
unemp_age_cls = pd.cut(unemployed['Age'], age_boundaries, retbins=True, right=False, include_lowest=True)[0]
unemp_age_cls

#### Unemployment Trend Across Age Brackets

In [None]:
# Frequency table for age distribution of unemployed residents
unemp_age_cls_freq = unemp_age_cls.value_counts().sort_index()
unemp_age_cls_freq

In [None]:
# as a percentage of total unemployed ("share of pie")
unemp_age_cls_freq_perc = np.round(100 * unemp_age_cls_freq/unemp_age_cls_freq.sum(), 2)
unemp_age_cls_freq_perc

In [None]:
# ages 25 to 44
unemp_age_cls_freq_perc.reset_index().loc[4:8, 'Age'].sum()

In [None]:
# colored red, others colored darkred
ten_perc_and_above = unemp_age_cls_freq_perc.loc[unemp_age_cls_freq_perc >= 10]
ten_perc_and_above

In [None]:
below_ten_perc = unemp_age_cls_freq_perc.loc[~(unemp_age_cls_freq_perc.index.isin(ten_perc_and_above.index))]
below_ten_perc

In [None]:
# as a percentage of the entire population - overall percentage
# for proper perspective
unemp_age_cls_overall_perc = np.round(unemp_age_cls_freq/census_df.shape[0], 3)
unemp_age_cls_overall_perc

#### Plot Showing the Age Distribution of Unemployed Residents

In [None]:
sns.set_style(style='whitegrid')
fig = plt.figure(figsize=(6, 4), dpi=200)

sns.histplot(x=unemployed['Age'], kde=True, bins=age_boundaries)
plt.title("Age Distribution of Unemployed Residents")

In [None]:
# set up grid layout for figure
sns.set_style(style='darkgrid')
# adjust dimensions of figure
fig = plt.figure(figsize=(6, 4), dpi=200)
# set up plotting sections
down_ax, up_ax = fig.add_axes([0, 0, 1, 0.6]), fig.add_axes([0, 0.8, 1, 0.6])

# plot on sections
sns.barplot(x=ten_perc_and_above, 
            y=ten_perc_and_above.index, 
            color='red', ax=up_ax)
sns.barplot(x=below_ten_perc, 
            y=below_ten_perc.index, 
            color='darkred', ax=up_ax)
sns.barplot(x=unemp_age_cls_overall_perc, 
            y=unemp_age_cls_overall_perc.index, 
            color='darkred', ax=down_ax)

# set axis boundaries for plotting section
up_ax.set_xlim(0, 20)

# annotate value(s) for each bar
for i in range(unemp_age_cls_freq.shape[0]):
    if unemp_age_cls_freq[i]:
        up_ax.text(unemp_age_cls_freq_perc[i]+0.5, i, f"{unemp_age_cls_freq[i]} residents", 
                   fontsize=5, fontweight='bold')
        down_ax.text(unemp_age_cls_overall_perc[i]+0.00025, i, f"{unemp_age_cls_overall_perc[i]}%", 
                   fontsize=5, fontweight='bold')
        
# summary of plot
up_ax.text(9, 21, 
           f"{ten_perc_and_above.sum()}% of all {unemployed['Is Unemployed'].sum()} unemployed residents belong to\n" +
           "age brackets: [30 to 34, 35 to 39, 40 to 44, 50 to 54, 55 to 59]\n" +
           "(shown above in red)",
           fontsize=8, fontweight='bold', color='blue',
           bbox={'alpha': 0.2, 'facecolor':'none', 'edgecolor':'red'})

note = f"{unemployed['Is Unemployed'].sum()} unemployed residents make up only " +\
             f"{100 * unemployed['Is Unemployed'].sum()/census_df.shape[0]: .2f}% of the population.\n" +\
    f"And {actively_unemployed.shape[0]} actively unemployed residents make up only " +\
             f"{100 * actively_unemployed.shape[0]/census_df.shape[0]: .2f}% of the population"
down_ax.text(0.002, 21, note,
           fontsize=8, fontweight='bold', color='blue',
           bbox={'alpha': 0.2, 'facecolor':'none', 'edgecolor':'red'})

# label the axis of section
up_ax.set_xlabel("Percentage of Unemployed"), up_ax.set_ylabel("Age Brackets")
down_ax.set_xlabel("Percentage of Population"), down_ax.set_ylabel("Age Brackets")

# title for section
up_ax.set_title("Age Brackets of Unemployed Residents in Percentage")
down_ax.set_title("Age Brackets of Unemployed Residents as a Percentage of Population",
                 fontsize=10);

In [None]:
fname = 'gen_unempl_trend.png'
cd.fig_writer(fname, fig)

### Unemployment Trend Across Gender

In [None]:
unemp_per_gender = unemployed['Gender'].value_counts()
unemp_per_gender

In [None]:
# set up grid layout for figure
sns.set_style(style='whitegrid')
# adjust dimensions of figure
fig = plt.figure(figsize=(6, 4), dpi=200)

sns.barplot(x=unemp_per_gender.index,
            y=unemp_per_gender, 
            palette={'M':'brown', 'F':'darkblue'})

for i in range(unemp_per_gender.shape[0]):
    plt.text(i, unemp_per_gender[i]+1, unemp_per_gender[i],
            fontweight='bold', size=8)

note = "Unemployed (%):\n" +\
"-"*20 + f"\nFemales: {100*unemp_per_gender['F']/unemp_per_gender.sum(): .2f}" +\
f"\nMales:{100 * unemp_per_gender['M']/unemp_per_gender.sum(): .2f}"

plt.text(0.8, 250, note,
        bbox={'facecolor':'none', 'edgecolor':'red'},
        size=8)

plt.title("Number of Male and Female Unemployed Residents");

#### For each age bracket, how many females/males are unemployed?

In [None]:
unemp_fem_ages = unemp_age_cls.loc[unemployed['Gender'] == 'F']
unemp_mal_ages = unemp_age_cls.loc[unemployed['Gender'] == 'M']

In [None]:
# age distribution of unemployed males
unemp_male_age_distr = unemp_mal_ages.value_counts().sort_index()
unemp_male_age_distr

In [None]:
unemp_fem_age_distr = unemp_fem_ages.value_counts().sort_index()
unemp_fem_age_distr

In [None]:
# set up grid layout for figure
sns.set_style(style='darkgrid')
# adjust dimensions of figure
fig = plt.figure(figsize=(8, 5), dpi=200)

mal_ax = sns.barplot(y=unemp_male_age_distr.index, 
                     x=unemp_male_age_distr, 
                     color='brown',
                    label='Male',
                    lw=0)

fem_ax = sns.barplot(y=unemp_fem_age_distr.index, 
                     x=-1*unemp_fem_age_distr, 
                     color='darkblue',
                    label='Female',
                    lw=0)

for i in range(unemp_age_cls_freq.shape[0]):
    if unemp_male_age_distr[i] or unemp_fem_age_distr[i]:
        mal_ax.text(unemp_male_age_distr[i] + 0.5, i, unemp_male_age_distr[i],
                    fontweight='bold', size=5)
        fem_ax.text(-1*unemp_fem_age_distr[i]-1, i, unemp_fem_age_distr[i], color='black',
                    fontweight='bold', size=5)

fem_ax.set(xticklabels=[60, 40, 20, 0, 20, 40, 60], xlabel='Head Count', ylabel='Age Bracket')
fem_ax.set_yticklabels(fem_ax.get_yticklabels(), size=5)
mal_ax.text(20, 1, 'Male', color='brown')
fem_ax.text(-20, 1, 'Female', color='darkblue')

plt.xlim(-60, 60)

plt.title("Pyramid of Male/Female Unemployed Residents Per Age Bracket");

### Unemployment Trend Across Religions

In [None]:
unemp_per_religion = unemployed['Religion'].value_counts()
unemp_per_religion

In [None]:
# set up grid layout for figure
sns.set_style(style='whitegrid')
# adjust dimensions of figure
fig = plt.figure(figsize=(6, 4), dpi=200)

sns.barplot(x=unemp_per_religion.index,
            y=unemp_per_religion, 
            palette={'Christian': 'brown', 
                     'Unknown': 'darkblue',
                    'Muslim': 'blue',
                    'Sikh': 'darkred',
                    'Bahai': 'red',
                    'Jewish': 'red'})

for i in range(unemp_per_religion.shape[0]):
    plt.text(i, unemp_per_religion[i]+1, unemp_per_religion[i],
            fontweight='bold', size=8)

note = "Unemployed (%):\n" +\
"-"*20 + f"\nChristian: {100*unemp_per_religion['Christian']/unemp_per_religion.sum(): .2f}" +\
f"\nUnknown:{100 * unemp_per_religion['Unknown']/unemp_per_religion.sum(): .2f}" +\
f"\nMuslim: {100*unemp_per_religion['Muslim']/unemp_per_religion.sum(): .2f}" +\
f"\nOthers: {100 * (unemp_per_religion.loc[['Sikh', 'Bahai', 'Jewish']].sum())/unemp_per_religion.sum(): .2f}"

plt.text(3, 200, note,
        bbox={'facecolor':'none', 'edgecolor':'red'},
        size=8)

plt.title("Number of Unemployed Residents Across Religious Lines");

***ANSWER:***<br>
The number of unemployed residents (535) is quite small in comparison to the entire population (7581). However, it was observed that almost two-thirds of the unemployed population come from only five (out of twenty-four) age brackets namely:<br>
**30 to 34, 35 to 39, 40 to 44, 50 to 54, and 55 to 59**. <br>
And they each contributed at least ten percent to the number of unemployed residents.<br>
Also, there are **more unemployed females (65%) than males (35%)**.<br>
**54% of unemployed are christians, while only 4% come from other religions.**<br>
This also is in line with the overall population trend, given there are generally more females than males and more Christians than other religions.<br>

#### Unemployment Trend per Street

In [None]:
unemp_per_gender

#### Why are they unemployed?

In [None]:
actively_unemployed['Relationship to Head of House'].value_counts()

In [None]:
actively_unemployed['Religion'].value_counts()

In [None]:
census_df['Religion'].value_counts()

In [None]:
# unemployed christians
261/3015

In [None]:
# unemployed muslims
13/127

In [None]:
fig = plt.figure(figsize=(6, 4), dpi=200)
l, r = fig.add_axes([0, 0, 0.8, 1]), fig.add_axes([0.95, 0, 0.8, 1])

mal_ax = sns.barplot(y=unemp_male_age_distr.index, 
                     x=unemp_male_age_distr, 
                     color='brown',
                    label='Male',
                    lw=0,
                    ax=l)

fem_ax = sns.barplot(y=unemp_fem_age_distr.index, 
                     x=-1*unemp_fem_age_distr, 
                     color='darkblue',
                    label='Female',
                    lw=0,
                    ax=l)

sns.barplot(x=unemp_per_religion.index,
            y=unemp_per_religion, 
            palette={'Christian': 'brown', 
                     'Unknown': 'darkblue',
                    'Muslim': 'blue',
                    'Sikh': 'darkred',
                    'Bahai': 'red',
                    'Jewish': 'red'},
           ax=r)

for i in range(unemp_age_cls_freq.shape[0]):
    if unemp_male_age_distr[i] or unemp_fem_age_distr[i]:
        mal_ax.text(unemp_male_age_distr[i] + 0.5, i, unemp_male_age_distr[i],
                    fontweight='bold', size=8)
        fem_ax.text(-1*unemp_fem_age_distr[i]-3, i, unemp_fem_age_distr[i], color='black',
                    fontweight='bold', size=8)

for i in range(unemp_per_religion.shape[0]):
    r.text(i, unemp_per_religion[i]+1, unemp_per_religion[i],
            fontweight='bold', size=10)
    
fem_ax.set(xticklabels=[60, 40, 20, 0, 20, 40, 60], xlabel='Head Count', ylabel='Age Bracket')
fem_ax.set_yticklabels(fem_ax.get_yticklabels(), size=5)
mal_ax.text(20, 1, 'Male', color='brown')
fem_ax.text(-20, 1, 'Female', color='darkblue')

l.set_xlim(-60, 60)

l.set_title("Pyramid of Male/Female Unemployed Residents Per Age Bracket")
    
    
l.set_ylabel("Age Brackets", size=15)
l.set_xlabel("Count", size=15)
r.set_xlabel("Age Brackets", size=15)
r.set_ylabel("Population", size=15)

l.tick_params(labelsize=10)
r.tick_params(labelsize=10)

note = "Unemployed (%):\n" +\
"-"*20 + f"\nChristian: {100*unemp_per_religion['Christian']/unemp_per_religion.sum(): .2f}" +\
f"\nUnknown:{100 * unemp_per_religion['Unknown']/unemp_per_religion.sum(): .2f}" +\
f"\nMuslim: {100*unemp_per_religion['Muslim']/unemp_per_religion.sum(): .2f}" +\
f"\nOthers: {100 * (unemp_per_religion.loc[['Sikh', 'Bahai', 'Jewish']].sum())/unemp_per_religion.sum(): .2f}"

# plot summary
r.text(3, 200, note,
        bbox={'facecolor':'none', 'edgecolor':'red'},
        size=8)

r.set_title("Number of Unemployed Residents Across Religious Lines")

note = "Unemployed (%):\n" +\
"-"*20 + f"\nFemales: {100*unemp_per_gender['F']/unemp_per_gender.sum(): .2f}" +\
f"\nMales:{100 * unemp_per_gender['M']/unemp_per_gender.sum(): .2f}"

l.text(-50, 20, note,
        bbox={'facecolor':'black', 'edgecolor':'none'},
        size=8, weight='bold', color='white')

plt.show()
# plt.xticks(rotation=90);

In [None]:
fname = 'unemployment_trend.png'
cd.fig_writer(fname, fig)

___