In [None]:
import copy
import seaborn as sns
from seaborn.utils import np, plt, pd, os

In [None]:
my_pkg = "..//Presentation"
imp_mods = os.sys.path 
if my_pkg not in imp_mods:
    imp_mods.append(my_pkg)
    print("Imported successfully")

In [None]:
census_df = pd.read_csv('final_Marital_Status_and_Religion_cleaned.csv')

In [None]:
from census_methods import CensusDataset as cd 

In [None]:
census_df.info()

In [None]:
census_df.head()

___

## RELIGIOUS AFFILIATIONS

In [None]:
religion_freq = census_df['Religion'].value_counts()
religion_freq

In [None]:
relig_perc = np.round(100 * religion_freq/religion_freq.sum(), 2)
relig_perc

In [None]:
known_religion = census_df.loc[~census_df['Religion'].isin(['Unknown', 'Ineligible'])]
known_religion

In [None]:
known_relig_freq = known_religion['Religion'].value_counts()
known_relig_freq

In [None]:
known_relig_perc = np.round(100 * known_relig_freq/known_relig_freq.sum(), 2)
known_relig_perc

#### Age Brackets per Religion

In [None]:
age_boundaries = list(range(0, census_df['Age'].max()+5, 5))

In [None]:
# set up grid layout for figure
sns.set_style(style='darkgrid')
# adjust dimensions of figure
fig = plt.figure(figsize=(6, 4), dpi=200)
l, r = fig.add_axes([0, 0, 0.8, 1]), fig.add_axes([1.15, 0, 0.8, 1])

sns.histplot(data=census_df, x='Age', hue='Religion', ax=l,
             multiple='layer', bins=age_boundaries, alpha=0.5,
             palette={'Christian': 'darkgreen', 
                     'Agnostic': 'brown',
                     'Muslim': 'yellow',
                     'Sikh': 'darkblue',
                     'Bahai': 'grey',
                     'Jewish': 'darkred',
                    'Unknown':'darkgrey',
                    'Ineligible': 'black'})

sns.barplot(x=known_relig_freq.index,
            y=known_relig_freq, ax=r,
            palette={'Christian': 'darkblue', 
                     'Agnostic': 'brown',
                    'Muslim': 'yellow',
                    'Sikh': 'darkred',
                    'Bahai': 'red',
                    'Jewish': 'blue'})

sns.move_legend(l, [1.01, 0.4])

l.set_ylabel("Head Count")

note = "Religion per Population\n" +\
"-"*30 +\
f"\nChristian: {relig_perc['Christian']}%\n" +\
f"Muslim: {relig_perc['Muslim']}%\n" +\
f"Sikh: {relig_perc['Sikh']}%\n" +\
f"Jewish: {relig_perc['Jewish']}%\n" +\
f"Agnostic: {relig_perc['Agnostic']}%\n" +\
f"Bahai: {relig_perc['Bahai']}%\n" +\
f"Ineligible: {relig_perc['Ineligible']}%\n" +\
f"Unknown: {relig_perc['Unknown']}%\n"

l.text(85, 230, note, size=8, color='darkblue',
        bbox={'edgecolor':'red', 'facecolor':'none'}, 
       weight='bold')

note = "For Known Religion\n" +\
'-'*30 +\
f"\nChristian: {known_relig_perc['Christian']}%\n" +\
f"Muslim: {known_relig_perc['Muslim']}%\n" +\
f"Sikh: {known_relig_perc['Sikh']}%\n" +\
f"Jewish: {known_relig_perc['Jewish']}%\n" +\
f"Agnostic: {known_relig_perc['Agnostic']}%\n" +\
f"Bahai: {known_relig_perc['Bahai']}%\n"

r.text(1, 2000, note, size=7, weight='bold', color='blue',
        bbox={'facecolor':'none', 'edgecolor':'red'})

for i in range(known_relig_freq.shape[0]):
    r.text(i, known_relig_freq[i]+1, known_relig_freq[i], size=7, weight='bold')
    
r.set_ylabel("Head Count"), plt.xlabel("Religion")

r.set_title("Population By Religion", size=10)

l.set_title("Age Distribution per Religion (5-year Intervals)");

In [None]:
fname = 'religion.png'
cd.fig_writer(fname, fig)

#### Age Spread of Each Religion

#### 1. Christianity

In [None]:
christian = known_religion.loc[known_religion['Religion'] == 'Christian']
christian

In [None]:
christian_age_distr = pd.cut(christian['Age'], age_boundaries, retbins=True, include_lowest=True, right=False)[0].value_counts().sort_index()
christian_age_distr

In [None]:
christian_age_perc = np.round(100 * christian_age_distr/christian_age_distr.sum(), 2)
christian_age_perc

In [None]:
df = christian_age_perc.reset_index()
df.columns = df.columns.str.replace('Age', '%Head Count').str.replace('index', 'Age Bracket')
df

In [None]:
print(f"{np.round(df.loc[:8, '%Head Count'].sum(), 2)}% of christians are below 45 years old.\n" +
f"While {np.round(df.loc[9:, '%Head Count'].sum(), 2)}% of christians are 45 years and above")

In [None]:
age_18_44 = census_df.loc[census_df['Age'].between(18, 44)]
age_18_44

In [None]:
adults = census_df.loc[census_df['Age'] >= 18]
adults

In [None]:
print(f"{np.round(100 * age_18_44.shape[0]/adults.shape[0], 2)}% of the entires population aged 18 years and above are between 18 and 44 years")

In [None]:
print(f"{np.round(100 * age_18_44.shape[0]/census_df.shape[0], 2)}% of the entire population are between 18 and 44 years old")

In [None]:
non_christian = known_religion.loc[~known_religion.index.isin(christian.index)]
non_christian

In [None]:
non_christian_age_distr = pd.cut(non_christian['Age'], age_boundaries, retbins=True, include_lowest=True, right=False)[0].value_counts().sort_index()
non_christian_age_distr

In [None]:
non_christian_age_perc  = np.round(100*non_christian_age_distr/non_christian_age_distr.sum(), 2)
non_christian_age_perc

In [None]:
df = non_christian_age_perc.reset_index()
df.columns = df.columns.str.replace('Age', '%Head Count').str.replace('index', 'Age Bracket')
df

In [None]:
df.loc[:8, '%Head Count'].sum()
print(f"{np.round(df.loc[:8, '%Head Count'].sum(), 2)}% of non-christians are below 45 years old.\n" +
f"While {np.round(df.loc[9:, '%Head Count'].sum(), 2)}% of non-christians are 45 years and above")

In [None]:
# set up grid layout for figure
sns.set_style(style='darkgrid')
# adjust dimensions of figure
fig = plt.figure(figsize=(8, 6), dpi=200)
l, r = fig.add_axes([0, 0, 0.6, 1]), fig.add_axes([0.8, 0, 0.6, 1])

ch_ax = sns.barplot(x=christian_age_distr, 
            y=christian_age_distr.index, 
            color='green', 
            ax=l)

nch_ax = sns.barplot(x=-1 * non_christian_age_distr,
            y=non_christian_age_distr.index, 
            color='darkred', 
            ax=l)

sns.boxplot(data=census_df, y='Age', x='Religion',
            hue='Gender',
           ax=r,
           palette={"F":'pink',
                   'M':'brown'})

nch_ax.set_xlim(-200, 400)

# for i in range(christian_age_distr.shape[0]):
#     if christian_age_distr[i]:
#         ch_ax.text(christian_age_distr[i]+1, i-0.4, christian_age_distr[i],
#                 size=8, weight='bold')
# for i in range(non_christian_age_distr.shape[0]):
#     if non_christian_age_distr[i]:
#         nch_ax.text(-1*non_christian_age_distr[i] - 1, i, non_christian_age_distr[i],
#                 size=8, weight='bold')
        
nch_ax.text(-170, 1.5, "Other Religions", color='white', size=12,
        bbox={'facecolor':'darkred', 'edgecolor':'none'})
ch_ax.text(300, 1.5, "Christians", color='white', size=12,
        bbox={'facecolor':'green', 'edgecolor':'none'})

label_list = [200, 100, 0, 100, 200, 300, 400]
l.set_xticklabels(labels=label_list)
# nch_ax.set_yticklabels(labels=nch_ax.get_yticklabels(), rotation=90)

l.set_xlabel("Head Count", size=13), l.set_ylabel("Age Bracket", size=13)
# nch_ax.set_xlabel("Head Count", size=13), nch_ax.set_xlabel("Age Bracket", size=13)

l.set_title("Age Distribution of Christians Vs. Other Religions")
r.set_title("Gender-based Age Distribution By Religion");

In [None]:
fname = 'compare_religions.png'
cd.fig_writer(fname, fig)