# Data load and Integration


In [1]:
!pip install nbformat



In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import plotly.express as px # plotly library is extensively used for interactive visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#this line the plots to be embedded into the notebook
%matplotlib inline

from pylab import rcParams
rcParams['figure.figsize'] = 5,10
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
#function to load data
def load_data(url):
    data = pd.read_csv(url)
    return data

In [4]:
#loading the cleaned data
u1 = "https://raw.githubusercontent.com/AnanyaThyagarajan/Divorce-Trends-of-Singaporeans/main/dataset/cleaned_population_structure.csv"
pop_struct_df = load_data(u1) #loading cleaned population structure df
pop_struct_df.drop(columns='Unnamed: 0',inplace=True)

u2 = "https://raw.githubusercontent.com/AnanyaThyagarajan/Divorce-Trends-of-Singaporeans/main/dataset/cleaned_marriage_key_indicators.csv"
mar_key_df = load_data(u2)
mar_key_df.drop(columns='Unnamed: 0',inplace=True)

u3 = "https://raw.githubusercontent.com/AnanyaThyagarajan/Divorce-Trends-of-Singaporeans/main/dataset/cleaned_marriage_rate.csv"
marr_rate_by_age_grp = load_data(u3)
marr_rate_by_age_grp.drop(columns='Unnamed: 0',inplace=True)

u4 = "https://raw.githubusercontent.com/AnanyaThyagarajan/Divorce-Trends-of-Singaporeans/main/dataset/cleaned_median_age_of_bride_and_grooms.csv"
med_age_bride_groom_ageGrp = load_data(u4)
med_age_bride_groom_ageGrp.drop(columns='Unnamed: 0',inplace=True)

u5 = "https://raw.githubusercontent.com/AnanyaThyagarajan/Divorce-Trends-of-Singaporeans/main/dataset/cleaned_divorce_rate.csv"
div_rate_by_age_grp = load_data(u5)
div_rate_by_age_grp.drop(columns='Unnamed: 0',inplace=True)

u6 = "https://raw.githubusercontent.com/AnanyaThyagarajan/Divorce-Trends-of-Singaporeans/main/dataset/cleaned_age_grp_and_sex_of_divorcees.csv"
age_grp_sex_ofDivorcee = load_data(u6)
age_grp_sex_ofDivorcee.drop(columns='Unnamed: 0',inplace=True)

u7 = "https://raw.githubusercontent.com/AnanyaThyagarajan/Divorce-Trends-of-Singaporeans/main/dataset/cleaned_divorce_key_indicators.csv"
divorce_key_indicator = load_data(u7)
divorce_key_indicator.drop(columns='Unnamed: 0',inplace=True)


In [17]:
#Population structure and marriages

pop_struct_data = pop_struct_df[['Years','Total Population * (Number)','Median Age Of Resident Population * (Years)', 'Median Age Of Citizen Population * (Years)']]
pop_struct_data.head(3)
mar_key_data= mar_key_df[['Years', 'Total Marriages * (Number)']].copy()
mar_key_data.head(3)

#Merging two dataframe

pop_struct_mar_key = pd.merge(pop_struct_data,mar_key_data , how='inner', on= 'Years')
pop_struct_mar_key.head(3)
pop_struct_mar_key.to_csv('Data_Integrate_pop_struct_mar_key.csv')
pop_struct_mar_key.head(5)

Unnamed: 0,Years,Total Population * (Number),Median Age Of Resident Population * (Years),Median Age Of Citizen Population * (Years),Total Marriages * (Number)
0,2022,5637022,42.1,42.8,29389
1,2021,5453566,41.8,42.5,28329
2,2020,5685807,41.5,42.2,22651
3,2019,5703569,41.1,42.0,25434
4,2018,5638676,40.8,41.7,27007


In [18]:
marr_rate_by_age_grp.columns = ['Years', 'Male General Marriage Rate * (Per 1,000 Unmarried Resident Males Aged 15-49)', '15 - 19 Years (Per 1,000 Unmarried Resident Males)', 
                                '20 - 24 Years (Per 1,000 Unmarried Resident Males)', '25 - 29 Years (Per 1,000 Unmarried Resident Males)', 
                                '30 - 34 Years (Per 1,000 Unmarried Resident Males)', '35 - 39 Years (Per 1,000 Unmarried Resident Males)', 
                                '40 - 44 Years (Per 1,000 Unmarried Resident Males)', '45 - 49 Years (Per 1,000 Unmarried Resident Males)', 
                                '50 - 54 Years (Per 1,000 Unmarried Resident Males)', '55 - 59 Years (Per 1,000 Unmarried Resident Males)', 
                                '60 - 64 Years (Per 1,000 Unmarried Resident Males)', '65 Years & Over (Per 1,000 Unmarried Resident Males)', 
                                'Female General Marriage Rate * (Per 1,000 Unmarried Resident Females Aged 15-49)', '15 - 19 Years (Per 1,000 Unmarried Resident Females)', 
                                '20 - 24 Years (Per 1,000 Unmarried Resident Females)', '25 - 29 Years (Per 1,000 Unmarried Resident Females)', 
                                '30 - 34 Years (Per 1,000 Unmarried Resident Females)',
       '35 - 39 Years (Per 1,000 Unmarried Resident Females)', '40 - 44 Years (Per 1,000 Unmarried Resident Females)', 
       '45 - 49 Years (Per 1,000 Unmarried Resident Females)', '50 - 54 Years (Per 1,000 Unmarried Resident Females)', 
       '55 - 59 Years (Per 1,000 Unmarried Resident Females)', '60 - 64 Years (Per 1,000 Unmarried Resident Females)', 
       '65 Years & Over (Per 1,000 Unmarried Resident Females)', 'Crude Marriage Rate * (Per 1,000 Residents)']

In [19]:
# Marriage rates by age group



marr_rate_by_age_grp.head(5)
# SPLITTING THE DATAFRAME

# For Millennials:
mil_df = marr_rate_by_age_grp[['Years', 'Male General Marriage Rate * (Per 1,000 Unmarried Resident Males Aged 15-49)', '15 - 19 Years (Per 1,000 Unmarried Resident Males)', 
                               '20 - 24 Years (Per 1,000 Unmarried Resident Males)', '25 - 29 Years (Per 1,000 Unmarried Resident Males)', 
                               '30 - 34 Years (Per 1,000 Unmarried Resident Males)', '35 - 39 Years (Per 1,000 Unmarried Resident Males)',
                               'Female General Marriage Rate * (Per 1,000 Unmarried Resident Females Aged 15-49)', '15 - 19 Years (Per 1,000 Unmarried Resident Females)', 
                               '20 - 24 Years (Per 1,000 Unmarried Resident Females)', '25 - 29 Years (Per 1,000 Unmarried Resident Females)',
                               '30 - 34 Years (Per 1,000 Unmarried Resident Females)', '35 - 39 Years (Per 1,000 Unmarried Resident Females)']].copy()

mil_df.to_csv('Millennial_marriage_rates_by_age_grp.csv')
mil_df.head(5)
mill_mar_rate = mil_df.copy()
# Total marriage rate of male and female
tot_mar_rate = mill_mar_rate[['Years', 'Male General Marriage Rate * (Per 1,000 Unmarried Resident Males Aged 15-49)','Female General Marriage Rate * (Per 1,000 Unmarried Resident Females Aged 15-49)']].copy()
tot_mar_rate.head(5)

# Marriage rate of Millennials-Males
mar_males_r = mill_mar_rate[['Years','15 - 19 Years (Per 1,000 Unmarried Resident Males)', '20 - 24 Years (Per 1,000 Unmarried Resident Males)', '25 - 29 Years (Per 1,000 Unmarried Resident Males)', '30 - 34 Years (Per 1,000 Unmarried Resident Males)', '35 - 39 Years (Per 1,000 Unmarried Resident Males)']].copy()
mar_males_r.head(5)

# Marriage rate of Millennials-Females
mar_females_r = mill_mar_rate[['Years','15 - 19 Years (Per 1,000 Unmarried Resident Females)', '20 - 24 Years (Per 1,000 Unmarried Resident Females)', '25 - 29 Years (Per 1,000 Unmarried Resident Females)', '30 - 34 Years (Per 1,000 Unmarried Resident Females)', '35 - 39 Years (Per 1,000 Unmarried Resident Females)']].copy()
mar_females_r.head(5)

# For Gen X:

genx_df = marr_rate_by_age_grp[['Years','Male General Marriage Rate * (Per 1,000 Unmarried Resident Males Aged 15-49)','40 - 44 Years (Per 1,000 Unmarried Resident Males)', '45 - 49 Years (Per 1,000 Unmarried Resident Males)', '50 - 54 Years (Per 1,000 Unmarried Resident Males)', '55 - 59 Years (Per 1,000 Unmarried Resident Males)', '60 - 64 Years (Per 1,000 Unmarried Resident Males)', '65 Years & Over (Per 1,000 Unmarried Resident Males)', 'Female General Marriage Rate * (Per 1,000 Unmarried Resident Females Aged 15-49)','40 - 44 Years (Per 1,000 Unmarried Resident Females)', '45 - 49 Years (Per 1,000 Unmarried Resident Females)', '50 - 54 Years (Per 1,000 Unmarried Resident Females)', '55 - 59 Years (Per 1,000 Unmarried Resident Females)', '60 - 64 Years (Per 1,000 Unmarried Resident Females)', '65 Years & Over (Per 1,000 Unmarried Resident Females)']].copy()
#genx_df.reset_index(inplace=True)
#genx_df.drop(columns='index',inplace=True)

genx_df.to_csv('GenX_marriage_rates_by_age_grp.csv')
genx_df.head(4)
gen_x_marriage_rate = genx_df.copy()
gen_x_marriage_rate.head(4)

# Marriage rate of Generation X -Males
genx_mar_males_r = gen_x_marriage_rate[['Years','40 - 44 Years (Per 1,000 Unmarried Resident Males)', '45 - 49 Years (Per 1,000 Unmarried Resident Males)', '50 - 54 Years (Per 1,000 Unmarried Resident Males)', '55 - 59 Years (Per 1,000 Unmarried Resident Males)', '60 - 64 Years (Per 1,000 Unmarried Resident Males)', '65 Years & Over (Per 1,000 Unmarried Resident Males)']].copy()
genx_mar_males_r.head(4)

# Marriage rate of Generation X -Females
genx_mar_females_r = gen_x_marriage_rate[['Years', '40 - 44 Years (Per 1,000 Unmarried Resident Females)', '45 - 49 Years (Per 1,000 Unmarried Resident Females)', '50 - 54 Years (Per 1,000 Unmarried Resident Females)', '55 - 59 Years (Per 1,000 Unmarried Resident Females)', '60 - 64 Years (Per 1,000 Unmarried Resident Females)', '65 Years & Over (Per 1,000 Unmarried Resident Females)']].copy()
genx_mar_females_r.head(5)


Unnamed: 0,Years,"40 - 44 Years (Per 1,000 Unmarried Resident Females)","45 - 49 Years (Per 1,000 Unmarried Resident Females)","50 - 54 Years (Per 1,000 Unmarried Resident Females)","55 - 59 Years (Per 1,000 Unmarried Resident Females)","60 - 64 Years (Per 1,000 Unmarried Resident Females)","65 Years & Over (Per 1,000 Unmarried Resident Females)"
0,2022,22.5,12.9,7.6,3.9,1.9,0.4
1,2021,19.2,9.5,6.8,2.8,1.7,0.3
2,2020,17.3,9.1,5.6,2.6,1.3,0.2
3,2019,21.4,11.0,6.5,3.4,1.5,0.3
4,2018,22.0,12.1,6.3,2.8,1.5,0.2


In [20]:
#Median age of the brides and grooms

med_age_bride_groom_ageGrp.head(4)

# SPLITTING THE DATAFRAME

# Creating dataframe with the Age of the groom and the bride considering the total marriages- irrespective of the type
tot_mar_age = med_age_bride_groom_ageGrp[['Years', 'Grooms - Total Marriages (Years)','Brides - Total Marriages (Years)']].copy() 
#tot_mar_age.reset_index(inplace=True)
#tot_mar_age.drop(columns='index',inplace=True)
tot_mar_age.head(5)

#Creating dataframe with the median age of the groom and bride with respect to their first and remarriage
med_age_marriage_df = med_age_bride_groom_ageGrp[['Years','Grooms - First Marriages (Years)', 'Grooms - Remarriages (Years)','Brides - First Marriages (Years)', 'Brides - Remarriages (Years)']].copy()
#med_age_marriage_df.reset_index(inplace=True)
#med_age_marriage_df.drop(columns='index',inplace=True)
med_age_marriage_df.head(5)


Unnamed: 0,Years,Grooms - First Marriages (Years),Grooms - Remarriages (Years),Brides - First Marriages (Years),Brides - Remarriages (Years)
0,2022,30.7,44.6,29.3,39.3
1,2021,30.5,42.4,29.1,37.6
2,2020,30.4,43.3,28.8,38.0
3,2019,30.4,44.2,28.8,38.3
4,2018,30.2,43.8,28.5,37.9


In [21]:
# Divorce rates by age group

div_rate_by_age_grp.head(5)
div_rate_by_age_grp.columns = ['Years', 'Male General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)', '20 - 24 Years (Per 1,000 Married Resident Males)', 
                               '25 - 29 Years (Per 1,000 Married Resident Males)', '30 - 34 Years (Per 1,000 Married Resident Males)', 
                               '35 - 39 Years (Per 1,000 Married Resident Males)', '40 - 44 Years (Per 1,000 Married Resident Males)', 
                               '45 - 49 Years (Per 1,000 Married Resident Males)', '50 Years And Over (Per 1,000 Married Resident Males)', 
                               'Female General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)', '20 - 24 Years (Per 1,000 Married Resident Females)', 
                               '25 - 29 Years (Per 1,000 Married Resident Females)', '30 - 34 Years (Per 1,000 Married Resident Females)', 
                               '35 - 39 Years (Per 1,000 Married Resident Females)', '40 - 44 Years (Per 1,000 Married Resident Females)', 
                               '45 - 49 Years (Per 1,000 Married Resident Females)', '50 Years And Over (Per 1,000 Married Resident Females)', 
                               'Crude Divorce Rate * (Per 1,000 Residents)']

# SPLITTING THE DATAFRAME

# For Millennials : 


mil_div_df = div_rate_by_age_grp[['Years', 'Male General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)', '20 - 24 Years (Per 1,000 Married Resident Males)', 
                                  '25 - 29 Years (Per 1,000 Married Resident Males)', '30 - 34 Years (Per 1,000 Married Resident Males)', 
                                  '35 - 39 Years (Per 1,000 Married Resident Males)','Female General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)', 
                                  '20 - 24 Years (Per 1,000 Married Resident Females)', '25 - 29 Years (Per 1,000 Married Resident Females)', 
                                  '30 - 34 Years (Per 1,000 Married Resident Females)', '35 - 39 Years (Per 1,000 Married Resident Females)']].copy()
mil_div_df.head(5)
mil_div_df.to_csv('Millennials_divorce_rates_by_age_grp.csv')
millennials_divorce_rate = mil_div_df.copy()

# Total Divorce rate of male and female
mil_tot_div_rate= millennials_divorce_rate[['Years', 'Male General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)','Female General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)']].copy()
mil_tot_div_rate

# Divorce rate of Millennials-Males
mil_div_males_r = millennials_divorce_rate[['Years','20 - 24 Years (Per 1,000 Married Resident Males)', '25 - 29 Years (Per 1,000 Married Resident Males)', '30 - 34 Years (Per 1,000 Married Resident Males)', '35 - 39 Years (Per 1,000 Married Resident Males)']].copy()
mil_div_males_r

# Divorce rate of Millennials-Females
mil_div_females_r = millennials_divorce_rate[['Years','20 - 24 Years (Per 1,000 Married Resident Females)', '25 - 29 Years (Per 1,000 Married Resident Females)', '30 - 34 Years (Per 1,000 Married Resident Females)', '35 - 39 Years (Per 1,000 Married Resident Females)']].copy()
mil_div_females_r


# For Gen X:

genx_div_df = div_rate_by_age_grp[['Years', 'Male General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)','40 - 44 Years (Per 1,000 Married Resident Males)', '45 - 49 Years (Per 1,000 Married Resident Males)', '50 Years And Over (Per 1,000 Married Resident Males)', 'Female General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)', '40 - 44 Years (Per 1,000 Married Resident Females)', '45 - 49 Years (Per 1,000 Married Resident Females)', '50 Years And Over (Per 1,000 Married Resident Females)']].copy()
#genx_df2.reset_index(inplace=True)
#genx_df2.drop(columns='index',inplace=True)
genx_div_df.head(5)
genx_div_df.to_csv('GenX_divorce_rates_by_age_grp.csv')
gen_x_divorce_rate = genx_div_df.copy()
gen_x_divorce_rate.head(5)

# Divorce rate of Gen X-Males
genx_div_males_r = gen_x_divorce_rate[['Years','40 - 44 Years (Per 1,000 Married Resident Males)', '45 - 49 Years (Per 1,000 Married Resident Males)', '50 Years And Over (Per 1,000 Married Resident Males)']].copy()
genx_div_males_r.head(4)

# Divorce rate of Gen X-Females
genx_div_females_r = gen_x_divorce_rate[['Years','40 - 44 Years (Per 1,000 Married Resident Females)', '45 - 49 Years (Per 1,000 Married Resident Females)', '50 Years And Over (Per 1,000 Married Resident Females)']].copy()
genx_div_females_r.head(4)


Unnamed: 0,Years,"40 - 44 Years (Per 1,000 Married Resident Females)","45 - 49 Years (Per 1,000 Married Resident Females)","50 Years And Over (Per 1,000 Married Resident Females)"
0,2022,8.1,7.0,2.7
1,2021,9.6,8.1,2.9
2,2020,8.1,6.5,2.4
3,2019,8.6,6.9,2.7


In [68]:
#total divorce rate male and female
tot_div_rate = div_rate_by_age_grp[['Years', 'Male General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)','Female General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)']]


In [99]:
# Male and Female Divocees by age group

age_grp_sex_ofDivorcee.columns = ['Years', 'Total Male Divorcees (Number)', 'Under 25 (Number)', '25-29 (Number)', '30-34 (Number)', '35-39 (Number)', '40-44 (Number)', 
                                  '45-49 (Number)', '50-54 (Number)', '55-59 (Number)', '60 & Over (Number)', 'Unknown Age (Number)', 'Total Female Divorcees (Number)', 
                                  'Under 25 (Number)-F', '25-29 (Number)-F', '30-34 (Number)-F', '35-39 (Number)-F', '40-44 (Number)-F', 
                                  '45-49 (Number)-F', '50-54 (Number)-F', '55-59 (Number)-F', '60 & Over (Number)-F', 'Unknown Age (Number)-F', 
                                  'Median Age At Divorce (Males) (Years)', 'Median Age At Divorce (Females) (Years)']

# SPLITTING THE DATAFRAME

# Median Age of Male and female divorcees at the time of divorce
med_age_div = age_grp_sex_ofDivorcee[['Years','Median Age At Divorce (Males) (Years)', 'Median Age At Divorce (Females) (Years)']].copy()
med_age_div.head(5)


# For Millennials : 

mil_div_age_grp = age_grp_sex_ofDivorcee[['Years','Total Male Divorcees (Number)', 'Under 25 (Number)', '25-29 (Number)', '30-34 (Number)', '35-39 (Number)',
                                  'Total Female Divorcees (Number)', 'Under 25 (Number)-F', '25-29 (Number)-F', '30-34 (Number)-F', '35-39 (Number)-F' ]].copy()

mil_div_age_grp.columns 

mil_div_age_grp.columns = ['Years', 'Total Male Divorcees (Number)', 'Male Divorcees aged Under 25 (Number)', 'Male Divorcees aged 25-29 (Number)', 
                    'Male Divorcees aged 30-34 (Number)', 'Male Divorcees aged 35-39 (Number)','Total Female Divorcees (Number)', 
                    'Female Divorcees aged Under 25 (Number)', 'Female Divorcees aged 25-29 (Number)', 'Female Divorcees aged 30-34 (Number)', 
                    'Female Divorcees aged 35-39 (Number)']

mil_div_age_grp.head(5)

mil_div_age_grp.to_csv('Millennial_median_age_Male_Female_atTime_ofDivorce.csv')
millennials_divorcees = mil_div_age_grp.copy()
millennials_divorcees

# Total Divorce  of male and female
tot_div_male_fem= millennials_divorcees[['Years','Total Male Divorcees (Number)', 'Total Female Divorcees (Number)']].copy()
tot_div_male_fem.head(5)

# Millennial Males Divorcees-
div_males_mil = millennials_divorcees[['Years','Male Divorcees aged Under 25 (Number)', 'Male Divorcees aged 25-29 (Number)', 'Male Divorcees aged 30-34 (Number)', 'Male Divorcees aged 35-39 (Number)']].copy()
div_males_mil.head(4)

# Millennial Females Divorcees
div_females_mil = millennials_divorcees[['Years','Female Divorcees aged Under 25 (Number)', 'Female Divorcees aged 25-29 (Number)', 'Female Divorcees aged 30-34 (Number)', 'Female Divorcees aged 35-39 (Number)']].copy()
div_females_mil.head(4)


# For Gen X:

genx_div_age_grp = age_grp_sex_ofDivorcee[['Years','Total Male Divorcees (Number)', '40-44 (Number)', '45-49 (Number)', '50-54 (Number)', '55-59 (Number)', '60 & Over (Number)','Total Female Divorcees (Number)','40-44 (Number)-F', '45-49 (Number)-F', '50-54 (Number)-F', '55-59 (Number)-F', '60 & Over (Number)-F']].copy()
genx_div_age_grp.head(4)
genx_div_age_grp.columns = [['Years', 'Total Male Divorcees (Number)', 'Male Divorcees aged 40-44 (Number)', 'Male Divorcees aged 45-49 (Number)', 'Male Divorcees aged 50-54 (Number)', 'Male Divorcees aged 55-59 (Number)', 'Male Divorcees aged 60 & Over (Number)', 'Total Female Divorcees (Number)', 'Female Divorcees aged 40-44 (Number)', 'Female Divorcees aged 45-49 (Number)', 'Female Divorcees aged 50-54 (Number)', 'Female Divorcees aged 55-59 (Number)', 'Female Divorcees aged 60 & Over (Number)']]
genx_div_age_grp.to_csv('GenX_median_age_Male_Female_atTime_ofDivorce.csv')
gen_x_divorcees = genx_div_age_grp.copy()
gen_x_divorcees.head(4)

# Gen X Males Divorcees
genX_divorcees_males = gen_x_divorcees[['Years','Male Divorcees aged 40-44 (Number)', 'Male Divorcees aged 45-49 (Number)', 'Male Divorcees aged 50-54 (Number)', 'Male Divorcees aged 55-59 (Number)', 'Male Divorcees aged 60 & Over (Number)']].copy()
genX_divorcees_males.head(5)

# Gen X Females Divorcees
genx_divorcees_females = gen_x_divorcees[['Years','Female Divorcees aged 40-44 (Number)', 'Female Divorcees aged 45-49 (Number)', 'Female Divorcees aged 50-54 (Number)', 'Female Divorcees aged 55-59 (Number)', 'Female Divorcees aged 60 & Over (Number)']].copy()
genx_divorcees_females.head(3)

Unnamed: 0,Years,Male Divorcees aged 40-44 (Number),Male Divorcees aged 45-49 (Number),Male Divorcees aged 50-54 (Number),Male Divorcees aged 55-59 (Number),Male Divorcees aged 60 & Over (Number)
0,2022,946,846,691,514,705
1,2021,1004,949,685,555,726
2,2020,905,797,553,465,562
3,2019,984,877,667,508,600
4,2018,932,863,642,452,508


In [23]:
#KEY INDICATORS ON THE DIVORCES

divorce_key_indicator

# Key factors of divorce


k_div_df = divorce_key_indicator[['Years', 'Total Divorces (Number)','Median Age Of Male Divorcees (Years)','Median Age Of Female Divorcees (Years)','Median Duration Of Marriage For Divorces (Years)']].copy()

k_div_df.head(4)
k_div_df.to_csv('key_factors_of_divorce.csv')
key_factors_divorce = k_div_df.copy()
key_factors_divorce.head(4)

Unnamed: 0,Years,Total Divorces (Number),Median Age Of Male Divorcees (Years),Median Age Of Female Divorcees (Years),Median Duration Of Marriage For Divorces (Years)
0,2022,6922,44.4,40.5,10.9
1,2021,7674,43.9,40.0,10.7
2,2020,6708,43.2,39.5,10.4
3,2019,7330,43.4,39.3,10.4


# Data Analysis and Visualization


### 1.            Total Population against the number of married couple and the age of the people in singapore 
### during the period 1998 to 2022.

In [25]:
"""Total Population against the  number of married couple and the age of the people in singapore during the period 1998 to 2022."""
import plotly.express as px
import plotly.graph_objs as go
import plotly.io as pio


# Total Population and marriages
fig = px.line(pop_struct_mar_key, x='Years',y= ['Total Marriages * (Number)','Total Population * (Number)'])
fig.update_layout(
title='Graph 1.0: Total Population and marriages', hovermode='x',width = 1000,
xaxis_tickfont_size=14,
yaxis= dict(title='Number of People',titlefont_size=16,tickfont_size=14))
pio.show(fig)


In [28]:
print(pop_struct_mar_key.dtypes)


Years                                           int64
Total Population * (Number)                     int64
Median Age Of Resident Population * (Years)    object
Median Age Of Citizen Population * (Years)     object
Total Marriages * (Number)                      int64
dtype: object


In [29]:
pop_struct_mar_key['Median Age Of Resident Population * (Years)'] = pd.to_numeric(pop_struct_mar_key['Median Age Of Resident Population * (Years)'], errors='coerce')
pop_struct_mar_key['Median Age Of Citizen Population * (Years)'] = pd.to_numeric(pop_struct_mar_key['Median Age Of Citizen Population * (Years)'], errors='coerce')


In [56]:
# Total Marriages and Median age of the population

fig2 = px.line(pop_struct_mar_key, x='Years',y= ['Total Marriages * (Number)','Median Age Of Resident Population * (Years)','Median Age Of Citizen Population * (Years)'])
fig2.update_layout(
title='Graph 1.1: Total Marriages and Median Age of the Population', hovermode='x', width = 1200,
xaxis_tickfont_size=10,
yaxis= dict(title='Number of People',titlefont_size=16,tickfont_size=14))
pio.show(fig2)

### 2.            Marriage Trends in singapore during the period 1998 to 2022.

In [57]:
 # Total Marriage rates of Male and Female

tot = px.line(tot_mar_rate,x="Years",y=["Male General Marriage Rate * (Per 1,000 Unmarried Resident Males Aged 15-49)","Female General Marriage Rate * (Per 1,000 Unmarried Resident Females Aged 15-49)"])
tot.update_layout(
title='Graph 2.1.0: Total Marriage rates of Male and Female', hovermode='x',  width = 1500,
xaxis_tickfont_size=14,
yaxis= dict(title='Marriage rate',titlefont_size=16,tickfont_size=14))
pio.show(tot)

In [51]:
import plotly.express as px
import plotly.graph_objs as go

# Marriage rates of Millennials-Males
m1 = px.bar(mar_males_r,x="Years",y=["15 - 19 Years (Per 1,000 Unmarried Resident Males)", "20 - 24 Years (Per 1,000 Unmarried Resident Males)", "25 - 29 Years (Per 1,000 Unmarried Resident Males)", "30 - 34 Years (Per 1,000 Unmarried Resident Males)", "35 - 39 Years (Per 1,000 Unmarried Resident Males)"])
m1.update_layout(
title='Graph 2.1.1: Marriage rates of Millennials-Males', hovermode='x', barmode='group', width = 4000,
xaxis_tickfont_size=14,
yaxis= dict(title='Marriage rate',titlefont_size=16,tickfont_size=14))
pio.show(m1)

In [50]:
import plotly.express as px
import plotly.graph_objs as go

# Marriage rates of Millennials-Females
m2 = px.bar(mar_females_r, x='Years',y=['15 - 19 Years (Per 1,000 Unmarried Resident Females)', '20 - 24 Years (Per 1,000 Unmarried Resident Females)', '25 - 29 Years (Per 1,000 Unmarried Resident Females)', '30 - 34 Years (Per 1,000 Unmarried Resident Females)', '35 - 39 Years (Per 1,000 Unmarried Resident Females)'],color_discrete_sequence=px.colors.qualitative.Pastel)
m2.update_layout(
title='Graph 2.1.2: Marriage rates of Millennials-Females', hovermode='x',barmode='group', width = 4000, 
xaxis_tickfont_size=14,
yaxis= dict(title='Marriage rate',titlefont_size=16,tickfont_size=14))
pio.show(m2)

In [36]:
print(genx_mar_males_r.head())
print(genx_mar_males_r.shape)


   Years  40 - 44 Years (Per 1,000 Unmarried Resident Males)  45 - 49 Years (Per 1,000 Unmarried Resident Males)  50 - 54 Years (Per 1,000 Unmarried Resident Males)  55 - 59 Years (Per 1,000 Unmarried Resident Males)  60 - 64 Years (Per 1,000 Unmarried Resident Males) 65 Years & Over (Per 1,000 Unmarried Resident Males)
0   2022                                               50.5                                                31.8                                                25.0                                                16.7                                                11.1                                                 4.3  
1   2021                                               47.8                                                27.0                                                16.1                                                11.3                                                 7.1                                                 2.9  
2   2020                          

In [37]:
genx_mar_males_r.columns =['Years', '40 - 44 Years (Per 1,000 Unmarried Resident Males)', '45 - 49 Years (Per 1,000 Unmarried Resident Males)', 
                           '50 - 54 Years (Per 1,000 Unmarried Resident Males)', '55 - 59 Years (Per 1,000 Unmarried Resident Males)', 
                           '60 - 64 Years (Per 1,000 Unmarried Resident Males)', '65 Years & Over (Per 1,000 Unmarried Resident Males)']

Index(['Years', '40 - 44 Years (Per 1,000 Unmarried Resident Males)', '45 - 49 Years (Per 1,000 Unmarried Resident Males)', '50 - 54 Years (Per 1,000 Unmarried Resident Males)', '55 - 59 Years (Per 1,000 Unmarried Resident Males)', '60 - 64 Years (Per 1,000 Unmarried Resident Males)', '65 Years & Over (Per 1,000 Unmarried Resident Males)'], dtype='object')

In [48]:
import pandas as pd
import plotly.express as px

# Transforming the data from wide to long format
#long_df = genx_mar_males_r.melt(id_vars=['Years'], var_name='Age Group', value_name='Marriage Rate')

 #Ensure 'Years' is treated as a categorical variable
genx_mar_males_r['Years'] = genx_mar_males_r['Years'].astype(str)

# Recreate the bar chart
g1 = px.bar(genx_mar_males_r.melt(id_vars=['Years'], var_name='Age Group', value_name='Marriage Rate'), 
            x='Years', 
            y='Marriage Rate', 
            color='Age Group',
            title='Graph 2.1.3: Marriage rates of Generation X - Males',
            labels={"Marriage Rate": "Marriage Rate", "Age Group": "Age Groups"},
            barmode='group')
g1.update_layout(
title='Graph 2.1.3: Marriage rates of Generation X -Males', hovermode='x',barmode='group', width = 4000, 
xaxis_tickfont_size=14,
yaxis= dict(title='Marriage rate',titlefont_size=16,tickfont_size=14))

pio.show(g1)


In [49]:
import plotly.express as px
import plotly.graph_objs as go

# Marriage rates of Generation X -Females
genx_mar_females_r['Years'] = genx_mar_females_r['Years'].astype(str)

# Recreate the bar chart
g2 = px.bar(genx_mar_females_r.melt(id_vars=['Years'], var_name='Age Group', value_name='Marriage Rate'), 
            x='Years', 
            y='Marriage Rate', 
            color='Age Group',
            title='Graph 2.1.4: Marriage rates of Generation X - Females',
            labels={"Marriage Rate": "Marriage Rate", "Age Group": "Age Groups"},
            barmode='group')
g2.update_layout(
title='Graph 2.1.4: Marriage rates of Generation X - Females', hovermode='x',barmode='group', width = 4000, 
xaxis_tickfont_size=14,
yaxis= dict(title='Marriage rate',titlefont_size=16,tickfont_size=14))

pio.show(g2)

### 2.2           Median age of the Brides and the Grooms in singapore during the period 1998 to 2022

In [61]:
import plotly.express as px
import plotly.graph_objs as go

# Median age of the Groom and Bride irrespective of the type
f = px.line(tot_mar_age, x='Years',y= ['Grooms - Total Marriages (Years)', 'Brides - Total Marriages (Years)'])
f.update_layout(
title='Graph 2.2.0: Median age of the Groom and Bride irrespective of the type of marriage(First/Remarriage)', hovermode='x',width = 1000,
xaxis_tickfont_size=14,
yaxis= dict(title='Age of the bride/groom ',titlefont_size=16,tickfont_size=14))
pio.show(f)

In [62]:
tot_mar_age.head(5)

tot_mar_age.columns
#Creating dataframe with the median age of the groom and bride with respect to their first and remarriage
med_age_marriage_df.columns

Index(['Years', 'Grooms - First Marriages (Years)', 'Grooms - Remarriages (Years)', 'Brides - First Marriages (Years)', 'Brides - Remarriages (Years)'], dtype='object')

In [67]:
import plotly.express as px
import plotly.graph_objs as go

#  Median age of the Groom and Bride with respect to the type
f2 = px.line(med_age_marriage_df,x="Years",y=['Grooms - First Marriages (Years)', 'Grooms - Remarriages (Years)', 'Brides - First Marriages (Years)', 'Brides - Remarriages (Years)'],color_discrete_sequence=px.colors.qualitative.Vivid_r)
f2.update_layout(
title='Graph 2.2.1: Median age of the Groom and Bride with respect to the type of marriage(First/remarriage)', hovermode='x', width = 1200, 
xaxis_tickfont_size=14,
yaxis= dict(title='Age of the bride/groom ',titlefont_size=16,tickfont_size=14))
pio.show(f2)

### 3.            Divorce Trends in singapore during the period 1998 to 2022

### 3.1           Divorce rates in singapore during the period 1998 to 2022

In [69]:
import plotly.express as px
# Total Divorce rates of Male and Female

tot_div = px.line(tot_div_rate,x="Years",y=["Male General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)", "Female General Divorce Rate * (Per 1,000 Married Resident Aged 20 Years & Over)"])
tot_div.update_layout(
title='Graph 3.1.0: Total Divorce rates of Male and Female', hovermode='x',  width = 1500,
xaxis_tickfont_size=14,
yaxis= dict(title='Divorce rate',titlefont_size=16,tickfont_size=14))
tot_div.show()

In [79]:
import plotly.express as px

# Divorce rates of Millennials-Males
m1_div = px.bar(mil_div_males_r,x="Years",y=["20 - 24 Years (Per 1,000 Married Resident Males)", "25 - 29 Years (Per 1,000 Married Resident Males)", "30 - 34 Years (Per 1,000 Married Resident Males)", "35 - 39 Years (Per 1,000 Married Resident Males)"],color_discrete_sequence=px.colors.qualitative.Prism)
m1_div.update_layout(
title=' Graph 3.1.1: Divorce rates of Millennials-Males ', hovermode='x',barmode='group', width = 2500, 
xaxis_tickfont_size=14,
yaxis= dict(title='Divorce rate',titlefont_size=16,tickfont_size=14))
m1_div.show()

In [82]:
import plotly.express as px
import plotly.graph_objs as go

# Divorce rates of Millennials-Females
m2_div = px.bar(mil_div_females_r, x='Years',y=['20 - 24 Years (Per 1,000 Married Resident Females)', '25 - 29 Years (Per 1,000 Married Resident Females)', '30 - 34 Years (Per 1,000 Married Resident Females)', '35 - 39 Years (Per 1,000 Married Resident Females)'],color_discrete_sequence=px.colors.qualitative.Prism_r)
m2_div.update_layout(
title='Graph 3.1.2: Divorce rates of Millennials-Females', hovermode='x',barmode='group', width = 2600, 
xaxis_tickfont_size=14,
yaxis= dict(title='Divorce rate',titlefont_size=16,tickfont_size=14))
m2_div.show()

In [83]:
import plotly.express as px
import plotly.graph_objs as go

# Divorce rates of Generation X -Males
g1_div = px.bar(genx_div_males_r, x='Years',y=['40 - 44 Years (Per 1,000 Married Resident Males)', '45 - 49 Years (Per 1,000 Married Resident Males)', '50 Years And Over (Per 1,000 Married Resident Males)'],color_discrete_sequence=px.colors.qualitative.Alphabet)
g1_div.update_layout(
title='Graph 3.1.3: Divorce rates of Generation X -Males', hovermode='x',barmode='group', width = 2500, 
xaxis_tickfont_size=14,
yaxis= dict(title='Divorce rate',titlefont_size=16,tickfont_size=14))
g1_div.show()

In [85]:
import plotly.express as px
import plotly.graph_objs as go

# Divorce rates of Generation X -Females
g2_div = px.bar(genx_div_females_r, x='Years',y=['40 - 44 Years (Per 1,000 Married Resident Females)', '45 - 49 Years (Per 1,000 Married Resident Females)', '50 Years And Over (Per 1,000 Married Resident Females)'],color_discrete_sequence=px.colors.qualitative.Alphabet_r)
g2_div.update_layout(
title='Graph 3.1.4: Divorce rates of Generation X -Females', hovermode='x',barmode='group', width = 2500, 
xaxis_tickfont_size=14,
yaxis= dict(title='Divorce rate',titlefont_size=16,tickfont_size=14))
g2_div.show()

### 3.2         Male and Female  Divorcee  in singapore during the period 1998 to 2022

In [89]:
import plotly.express as px
# Median Age of Male and female divorcees at the time of divorce

age_divor = px.line(age_grp_sex_ofDivorcee,x="Years",y=['Median Age At Divorce (Males) (Years)', 'Median Age At Divorce (Females) (Years)'])
age_divor.update_layout(
title='Graph 3.2.0: Median Age of Male and female divorcees at the time of divorce', hovermode='x',  width = 1500,
xaxis_tickfont_size=14,
yaxis= dict(title='Age of the divorcees',titlefont_size=16,tickfont_size=14))
age_divor.show()

In [90]:
import plotly.express as px
# Total Divorce  of male and female

tot_divor = px.bar(tot_div_male_fem,x="Years",y=['Total Male Divorcees (Number)', 'Total Female Divorcees (Number)'])
tot_divor.update_layout(
title='Graph 3.2.1: Total Divorce  of male and female', hovermode='x', barmode='group',width = 1500,
xaxis_tickfont_size=14,
yaxis= dict(title='Number of divorces',titlefont_size=16,tickfont_size=14))
tot_divor.show()

In [93]:
import plotly.express as px
import plotly.graph_objs as go

# Millennial Males Divorcees
mil_divor = px.bar(div_males_mil, x='Years',y=['Male Divorcees aged Under 25 (Number)', 'Male Divorcees aged 25-29 (Number)', 'Male Divorcees aged 30-34 (Number)', 'Male Divorcees aged 35-39 (Number)'],color_discrete_sequence=px.colors.qualitative.Prism_r)
mil_divor.update_layout(
title='Graph 3.2.2: Millennial Males Divorcees', hovermode='x',barmode='group', width = 2200, 
xaxis_tickfont_size=14,
yaxis= dict(title='Number of divorcees',titlefont_size=16,tickfont_size=14))
mil_divor.show()

In [96]:
import plotly.express as px
import plotly.graph_objs as go

# Millennial Females Divorcees
mil_divor2 = px.bar(div_females_mil, x='Years',y=['Female Divorcees aged Under 25 (Number)', 'Female Divorcees aged 25-29 (Number)', 'Female Divorcees aged 30-34 (Number)', 'Female Divorcees aged 35-39 (Number)'],color_discrete_sequence=px.colors.qualitative.Bold)
mil_divor2.update_layout(
title='Graph 3.2.3: Millennial Females Divorcees', hovermode='x',barmode='group', width = 2200, 
xaxis_tickfont_size=14,
yaxis= dict(title='Number of divorcees',titlefont_size=16,tickfont_size=14))
mil_divor2.show()

In [107]:
genX_divorcees_males = pd.DataFrame(genX_divorcees_males)
genX_divorcees_males.columns

import pandas as pd
import plotly.express as px

# Flatten the MultiIndex columns
genX_divorcees_males.columns = [' '.join(col).strip() for col in genX_divorcees_males.columns.values]

# Transform the data from wide to long format
long_df = genX_divorcees_males.melt(id_vars='Years', var_name='Age Group', value_name='Number of Divorcees')

# Create the bar chart using the transformed DataFrame
gx_divor = px.bar(long_df, x='Years', y='Number of Divorcees', color='Age Group',
                  title='Graph 3.2.4: Gen X Males Divorcees',
                  color_discrete_sequence=px.colors.qualitative.Alphabet,
                  labels={'Number of Divorcees': 'Number of Divorcees', 'Age Group': 'Age Group'},
                  barmode='group')

gx_divor.update_layout(
title='Graph 3.2.4: Gen X Males Divorcees', hovermode='x',barmode='group', width = 1800, 
xaxis_tickfont_size=14,
yaxis= dict(title='Number of divorcees',titlefont_size=16,tickfont_size=14))
# Show the plot
gx_divor.show()


In [116]:
#print(genX_divorcees_males.columns)
import pandas as pd
import plotly.express as px

# Convert 'Years' to string to ensure it's treated as a categorical variable
genX_divorcees_males['Years'] = genX_divorcees_males['Years'].astype(str)

# Transforming the data from wide to long format
long_df = genX_divorcees_males.melt(id_vars='Years', var_name='Age Group', value_name='Number of Divorcees')

# Create the bar chart using the transformed DataFrame
gx_divor = px.bar(long_df, x='Years', y='Number of Divorcees', color='Age Group',
                  title='Graph 3.2.4: Gen X Males Divorcees',
                  color_discrete_sequence=px.colors.qualitative.Alphabet,
                  labels={'Number of Divorcees': 'Number of Divorcees', 'Age Group': 'Age Group'},
                  barmode='group')

gx_divor.update_layout(
title='Graph 3.2.4: Gen X Males Divorcees', hovermode='x',barmode='group', width = 2000, 
xaxis_tickfont_size=14,
yaxis= dict(title='Number of divorcees',titlefont_size=16,tickfont_size=14))
# Show the plot
gx_divor.show()


In [114]:
genx_divorcees_females.columns
# Assuming you need to rename columns again if the print shows incorrect naming
genx_divorcees_females.columns = [' '.join(col).strip() if isinstance(col, tuple) else col for col in genx_divorcees_females.columns]

# Check columns again to confirm
print(genx_divorcees_females.columns)

Index(['Years', 'Female Divorcees aged 40-44 (Number)', 'Female Divorcees aged 45-49 (Number)', 'Female Divorcees aged 50-54 (Number)', 'Female Divorcees aged 55-59 (Number)', 'Female Divorcees aged 60 & Over (Number)'], dtype='object')


In [115]:


import pandas as pd
import plotly.express as px

# Convert 'Years' to string to ensure it's treated as a categorical variable
genx_divorcees_females['Years'] = genx_divorcees_females['Years'].astype(str)

# Transforming the data from wide to long format
long_df2 = genx_divorcees_females.melt(id_vars='Years', var_name='Age Group', value_name='Number of Divorcees')

# Create the bar chart using the transformed DataFrame
gx_divor2 = px.bar(long_df2, x='Years', y='Number of Divorcees', color='Age Group',
                  title='Graph 3.2.5: Gen X Females Divorcees',
                  color_discrete_sequence=px.colors.qualitative.Alphabet_r,
                  labels={'Number of Divorcees': 'Number of Divorcees', 'Age Group': 'Age Group'},
                  barmode='group')

gx_divor2.update_layout(
title='Graph 3.2.5: Gen X Females Divorcees', hovermode='x',barmode='group', width = 2000, 
xaxis_tickfont_size=14,
yaxis= dict(title='Number of divorcees',titlefont_size=16,tickfont_size=14))
# Show the plot
gx_divor2.show()

In [117]:
import plotly.express as px
import plotly.graph_objs as go

# Total divorce
di_k = px.line(key_factors_divorce,x="Years",y=['Total Divorces (Number)'],color_discrete_sequence=px.colors.qualitative.Plotly_r)
di_k.update_layout(
title='Graph 3.3.0: Total divorce during the study period', hovermode='x', width = 1000, 
xaxis_tickfont_size=14,
yaxis= dict(title='Number of Divorces ',titlefont_size=16,tickfont_size=14))
di_k.show()

In [118]:
import plotly.express as px
import plotly.graph_objs as go

# Key factors of divorce
di_k2 = px.line(key_factors_divorce,x="Years",y=['Median Age Of Male Divorcees (Years)', 'Median Age Of Female Divorcees (Years)', 'Median Duration Of Marriage For Divorces (Years)'],color_discrete_sequence=px.colors.qualitative.Safe)
di_k2.update_layout(
title='Graph 3.3.1: Key Factors of divorce', hovermode='x', width = 1200, 
xaxis_tickfont_size=14,
yaxis= dict(title='Number of Years ',titlefont_size=16,tickfont_size=14))
di_k2.show()