In [None]:
import copy
import seaborn as sns
from seaborn.utils import np, plt, pd, os

In [None]:
my_pkg = "..//Presentation"
imp_mods = os.sys.path 
if my_pkg not in imp_mods:
    imp_mods.append(my_pkg)
    print("Imported successfully")

In [None]:
from census_methods import CensusDataset as cd 

In [None]:
census_df = pd.read_csv('final_Marital_Status_and_Religion_cleaned.csv')

In [None]:
census_df.info()

In [None]:
census_df.head()

___

### MARITAL STATUS

In [None]:
mar_stat_perc = np.round(100 * census_df['Marital Status'].value_counts()/census_df.shape[0], 2)
mar_stat_perc

In [None]:
sns.set_style('darkgrid')
fig = plt.figure(figsize=(8, 6), dpi=200)
l, r  = fig.add_axes([0, 0, 0.6, 1]), fig.add_axes([0.8, 0, 0.6, 1])
sns.boxplot(data=census_df, y='Age', x='Marital Status', hue='Gender',
           palette={'F':'pink', 'M': 'brown'}, ax=l)
sns.barplot(y=mar_stat_perc, x=mar_stat_perc.index, ax=r,
           palette={'Single':'blue',
                   'Married':'green',
                   'Divorced':'red',
                   'Widowed':'grey',
                   'Ineligible':'purple'})

sns.move_legend(l, [1.01, 0.7])
r.text(3, 32, 'Marital Status', size=12, color='white',
      bbox={'facecolor':'black', 'edgecolor':'none'})

ax1.set_xlabel(ax1.get_xlabel(), labelpad=1)

r.set_ylabel("Population (in %)")
r.set_title("Population By Marital Status", size=15)

l.set_title("Age By Marital Status Across Genders", size=15)

In [None]:
fname = 'marr_stat_age_distr.png'
cd.fig_writer(fname, fig)

### MARRIED VS DIVORCED

In [None]:
census_df['Marital Status'].unique()

In [None]:
married = census_df.loc[census_df['Marital Status'] == 'Married']
married

In [None]:
divorced = census_df.loc[census_df['Marital Status'] == 'Divorced']
divorced

In [None]:
print(f"{married.shape[0]} married people")

In [None]:
# number of wives
wives = married.loc[married['Gender'] == 'F']
print(f"There are {wives.shape[0]} married women living in the town")

In [None]:
# number of husbands
husb = married.loc[married['Gender'] == 'M']
print(f"There are {husb.shape[0]} married men living in the town")

In [None]:
print(f"{divorced.shape[0]} divorced people")

In [None]:
# number of divorced wives
div_w = divorced.loc[divorced['Gender'] == 'F']
print(f"There are {div_w.shape[0]} divorced women living in the town")

In [None]:
# number of divorced husbands
div_h = divorced.loc[divorced['Gender'] == 'M']
print(f"There are {div_h.shape[0]} divorced men living in the town")

In [None]:
age_boundaries = list(range(0, census_df['Age'].max()+5, 5))

In [None]:
sns.set_style(style='darkgrid')
fig = plt.figure(figsize=(6, 4), dpi=200)
mar_ax, div_ax = fig.add_axes([0, 0, 0.6, 1]), fig.add_axes([0.8, 0, 0.6, 1])

sns.histplot(data=married, x='Age', hue='Gender', multiple='layer',
             bins=age_boundaries,
             palette={'F': 'darkblue',
                    'M': 'darkgreen'}, ax=mar_ax)

sns.histplot(data=divorced, x='Age', hue='Gender', multiple='layer',
             bins=age_boundaries, 
             palette={'F': 'darkblue',
                    'M': 'darkgreen'}, ax=div_ax)

mar_ax.set_ylabel("Population"), div_ax.set_ylabel("Population")
mar_ax.set_ylim(0, 180), div_ax.set_ylim(0, 80)

mar_ax.text(0, 180, "Married", color='white', size=12,
        bbox={'facecolor':'black', 'edgecolor':'none'})
div_ax.text(0, 80, "Divorced", color='white', size=12,
        bbox={'facecolor':'black', 'edgecolor':'none'})

note = "Population\n" +\
"-"*15 +\
f"\nWives: {wives.shape[0]}\n" +\
f"Husbands: {husbands.shape[0]}"
mar_ax.text(60, 150, note, color='black', size=8, weight='bold',
        bbox={'facecolor':'none', 'edgecolor':'red'})

note = "Population\n" +\
"-"*15 +\
f"\nWomen: {div_w.shape[0]}\n" +\
f"Men: {div_h.shape[0]}"
div_ax.text(70, 65, note, color='black', size=8, weight='bold',
        bbox={'facecolor':'none', 'edgecolor':'red'})

sns.move_legend(mar_ax, [1.01, 0.75]), sns.move_legend(div_ax, [1.01, 0.75])

mar_ax.set_title("Age Distribution of Married Residents with 5-year Intervals", y=1.05)
div_ax.set_title("Age Distribution of Divorced Residents with 5-year Intervals", y=1.05);

In [None]:
fname = 'marr_div_age_distr.png'
cd.fig_writer(fname, fig)

#### Direct Comparison Between Married and Divorced

In [None]:
married_age_freq = pd.cut(married['Age'], bins=age_boundaries, retbins=True, include_lowest=True, right=False)[0].value_counts().sort_index()
married_age_freq

In [None]:
married_age_perc = np.round(100 * married_age_freq/married_age_freq.sum(), 2)
married_age_perc

In [None]:
df = married_age_perc.reset_index(name='% Head Count')
df.columns = df.columns.str.replace('index', 'Age Bracket')
mar_ages_0_44 = df.loc[df.index.isin(range(9))]
mar_ages_0_44['% Head Count']

In [None]:
divorced_age_freq = pd.cut(divorced['Age'], bins=age_boundaries, retbins=True, include_lowest=True, right=False)[0].value_counts().sort_index()
divorced_age_freq

In [None]:
divorced_age_perc = np.round(100 * divorced_age_freq/divorced_age_freq.sum(), 2)
divorced_age_perc

In [None]:
df = divorced_age_perc.reset_index(name='% Head Count')
df.columns = df.columns.str.replace('index', 'Age Bracket')
div_ages_0_44 = df.loc[df.index.isin(range(9))]
div_ages_0_44['% Head Count']

In [None]:
fig = plt.figure(figsize=(8, 6), dpi=200)
ax1 = fig.add_axes([0, 0, 1, 1])

sns.barplot(x=married_age_perc, 
            y=married_age_perc.index,
           color='green',
           label='Married',
            lw=0,
           ax=ax1)

sns.barplot(x=-1 * divorced_age_perc,
           y=divorced_age_perc.index,
           color='red',
           label='Divorced', 
            lw=0,
           ax=ax1)

plt.xlim(-20, 20)

note = "Residents are getting divorced at younger ages"+\
"\n than they are getting married"
plt.text(-8, 1.5, note, size=10, color='black', weight='bold',
        bbox={'edgecolor':'blue', 'facecolor':'none'})

note = f"{np.round(div_ages_0_44['% Head Count'].sum(), 2)}% of divorced residents" +\
"\nare below 45 years"
plt.text(-18, 19, note, size=10, color='black', weight='bold',
        bbox={'edgecolor':'blue', 'facecolor':'none'})

note = f"Only {np.round(mar_ages_0_44['% Head Count'].sum(), 2)}% of married residents" +\
"\nare below 45 years"
plt.text(5, 19, note, size=10, color='black', weight='bold',
        bbox={'edgecolor':'blue', 'facecolor':'none'})

plt.text(15, 12, "Married", color='white', weight='bold',
        bbox={'facecolor':'green', 'edgecolor':'none'},
        size=15)
plt.text(-18, 12, "Divorced", color='white', weight='bold',
        bbox={'facecolor':'red', 'edgecolor':'none'},
        size=15)

plt.xlabel("Married/Divorced Percentage"), plt.ylabel("Age Bracket")

for i in range(married_age_perc.shape[0]):
    if married_age_perc[i] or divorced_age_perc[i]:
        plt.text(married_age_perc[i] + 0.5, i, married_age_perc[i],
                size=5, weight='bold')
        plt.text(-1*divorced_age_perc[i] - 1.5, i, divorced_age_perc[i],
            size=5, weight='bold')

label = [20, 15, 10, 5, 0, 5, 10, 15, 20]
plt.xticks(ticks=ax1.get_xticks(), labels=label)


plt.title("Pyramid Showing Age Distribution for Married and Divorced Residents", y=1.005, weight='bold', size=15);

In [None]:
fname = 'marr_vs_div_age_pyr.png'
cd.fig_writer(fname, fig)

#### Number of marriages per year

In [None]:
total_married = married.shape[0]
print(total_married)

#### Wives & Husbands

In [None]:
wives = census_df.loc[(census_df['Marital Status'] == 'Married') &
                      (census_df['Gender'] == 'F')]  # married females
husbands = married.loc[(census_df['Marital Status'] == 'Married') &
                      (census_df['Gender'] == 'M')]  # married males

In [None]:
# number of wives
print(wives.shape[0])

In [None]:
# number of husbands
print(husbands.shape[0])

#### Number of Marriages
(number of wives + number of husbands)/2

In [None]:
num_marriages = np.int_((wives.shape[0] + husbands.shape[0])/2)
print(num_marriages)

### Married Rate

**Crude Marriage Rate:**<br>
CMR = (Number of marriages per year / Total population) * 1000<br>
Number of marriages per 1000 resident

Married this year is assumed be wives and husbands who are between 25 and 29 years.

In [None]:
married_thisyr = married.loc[married['Age'].between(25, 29)]
num_marriages = married_thisyr.shape[0]/2
num_marriages

In [None]:
married_thisyr['Gender'].value_counts()

In [None]:
marriage_rate = 1000 * num_marriages/census_df.shape[0]
marriage_rate

In [None]:
print(f"{np.round(marriage_rate, 0): .0f} marriages occur per 1000 residents annually")

### Divorced Rate

**Crude Divorced Rate:**<br>
CDR = (Number of divorces per year / Total population) * 1000<br>
Number of divorces per 1000 resident

In [None]:
# women tend to get divorced between 45-49
divorce_thisyr = divorced.loc[divorced['Age'].between(45, 49)]
num_divorces = divorce_thisyr.shape[0]/2
divorce_thisyr.shape[0]

In [None]:
divorce_thisyr['Gender'].value_counts()

In [None]:
divorce_rate = 1000 * num_divorces/census_df.shape[0]
divorce_rate

In [None]:
print(f"{np.round(divorce_rate, 0): .0f} divorces occur per 1000 residents annually")

#### Divorced by 45

In [None]:
divorced_by_45 = census_df.loc[(census_df['Marital Status'] == 'Divorced') &
                               (census_df['Age'].between(0, 45))]
divorced_by_45

In [None]:
divorced_by_45.shape[0]/divorced.shape[0]