In [46]:

import pandas as pd

# Graphs
import plotly.express as px
import altair as alt


In [47]:
marriages = pd.read_excel(r'..\data\marriage-first-marriages-age-of-groom-by-age-of-bride.xlsx', skiprows=2) 
marriages.head(10)

Unnamed: 0,Calendar Year,Age of Groom (Years),<15,15-19,20-24,25-29,30-34,35-39,40-44,45-49,50-54,55-59,60-64,65-69,70-74,75+,NS,Total
0,2001,<15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2001,15-19,0,93,61,4,0,1,0,0,0,0,0,0,0,0,0,159
2,2001,20-24,0,438,2227,476,50,14,1,2,0,0,0,0,0,0,0,3208
3,2001,25-29,0,89,1933,2400,518,98,24,5,1,0,0,0,0,0,0,5068
4,2001,30-34,0,21,436,1313,1113,300,87,22,3,1,0,0,0,0,0,3296
5,2001,35-39,0,5,91,458,667,517,201,61,10,0,0,0,0,0,0,2010
6,2001,40-44,0,2,27,117,240,380,362,136,35,4,1,0,0,0,0,1304
7,2001,45-49,0,1,11,32,95,174,260,262,78,18,1,2,0,0,0,934
8,2001,50-54,0,0,3,7,30,68,119,182,136,40,6,1,1,0,0,593
9,2001,55-59,0,0,3,1,5,14,49,94,106,45,16,4,0,0,0,337


In [115]:
annual_marriages = marriages.loc[marriages['Age of Groom (Years)'] == 'Total',['Calendar Year','Total']]


annual_marriages_chart = (
    alt.Chart(annual_marriages)
    .mark_line(point=True, color='red')
    .encode(
        x=alt.X("Calendar Year:O", title="Calendar Year"),
        y=alt.Y("Total:Q", title="Total"),
    )
    .properties(
        title="First Marriages per Year",
        width="container",
        height=400
    )
    .interactive()
)
annual_marriages_chart

In [49]:
# Woman
marriages_woman = marriages.loc[marriages['Age of Groom (Years)'] == 'Total',:]\
    .drop(columns=['Total','Age of Groom (Years)'])
age_groups = marriages_woman.columns[1:]
marriages_woman = marriages_woman.melt(id_vars=['Calendar Year'],value_vars=age_groups, value_name='Women Number',var_name='Group Age')

# Men
marriages_men = marriages.loc[marriages['Age of Groom (Years)'] != 'Total',['Calendar Year','Age of Groom (Years)','Total']]\
    .rename(columns={'Total':'Men Number','Age of Groom (Years)':'Group Age'})


marriages_gender = pd.merge(marriages_men,marriages_woman, on=['Group Age','Calendar Year'])

blue_15 = ["#e3f2fd","#d0e7fb","#bddcf9","#aacff7","#97c2f4","#84b5f1","#71a8ee","#5e9beb","#4b8ee7","#3881e3","#2573df","#1265db","#0b57c1","#06479e","#003377" ]
purple_15 = ["#f3e5f5", "#e8d4ee", "#ddc3e7", "#d2b2e0", "#c7a1d9","#bc90d2", "#b17fcb", "#a66ec4", "#9b5dbd", "#8f4cb6","#833baf", "#772aa8", "#6b19a1", "#5e089a", "#4a0072"]
ordered_ages = ["<15", "15-19", "20-24", "25-29", "30-34", "35-39","40-44", "45-49", "50-54", "55-59", "60-64","65-69", "70-74", "75+", "NS"]


gender = 'Women'

marriages_gender_chart = (
    alt.Chart(marriages_gender, title=f"First Marriages per Group Age for {gender}")
    .mark_line()
    .encode(
        x="Calendar Year:O",
        y=f"{gender} Number:Q",
        color=alt.Color("Group Age:N"
                        , scale=alt.Scale(range=(purple_15 if gender=='Women' else blue_15))
                        , sort=ordered_ages),
        detail="Group Age:N"
    ).interactive()
)
marriages_gender_chart


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



Edad media

In [113]:
marriages_gender_accumulated = marriages_gender.copy()
marriages_gender_accumulated['Group Age'] = marriages_gender_accumulated['Group Age'].copy().replace(to_replace={'<15':15, '15-19':17, '20-24':22, '25-29':27, '30-34':32, '35-39':37, '40-44':42, '45-49':47, '50-54':52, '55-59':57, '60-64':62, '65-69':67, '70-74':72, '75+':80, 'NS':80})
marriages_gender_accumulated['Men Accumulated Years'] = marriages_gender_accumulated['Group Age']*marriages_gender_accumulated['Men Number']
marriages_gender_accumulated['Women Accumulated Years'] = marriages_gender_accumulated['Group Age']*marriages_gender_accumulated['Women Number']
marriages_gender_accumulated = marriages_gender_accumulated.groupby('Calendar Year').sum().reset_index()
annual_marriages['Men Mean Age'] = (marriages_gender_accumulated['Men Accumulated Years']/annual_marriages.Total.values).values
annual_marriages['Women Mean Age'] = (marriages_gender_accumulated['Women Accumulated Years']/annual_marriages.Total.values).values

marriages_gender_age_chart = (
    alt.Chart(annual_marriages.melt(id_vars = 'Calendar Year', value_vars=["Women Mean Age", "Men Mean Age"],var_name="Gender",value_name="Mean Age")
            , title=f"Average Age of First Marriage")
    .mark_line()
    .encode(
        x="Calendar Year:O",
        y="Mean Age:Q",
        color=alt.Color('Gender:N', scale=alt.Scale(range= ["#1e3a8a","#4a0072"]))
    ).interactive()
)
marriages_gender_age_chart


Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [None]:
newborns = pd.read_csv(r'..\data\13100415.csv') 

newborns.head()

Unnamed: 0,REF_DATE,GEO,DGUID,Month of birth,Characteristics,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,1991,"Canada, place of residence of mother",2021A000011124,"Total, month of birth",Number of live births,Number,223,units,0,v21400536,1.1.1,403816.0,,,,0
1,1991,"Canada, place of residence of mother",2021A000011124,"Total, month of birth",Percentage of live births,Percentage,242,units,0,v21400537,1.1.2,100.0,,,,1
2,1991,"Canada, place of residence of mother",2021A000011124,"Month of birth, January",Number of live births,Number,223,units,0,v21400538,1.2.1,32213.0,,,,0
3,1991,"Canada, place of residence of mother",2021A000011124,"Month of birth, January",Percentage of live births,Percentage,242,units,0,v21400539,1.2.2,8.0,,,,1
4,1991,"Canada, place of residence of mother",2021A000011124,"Month of birth, February",Number of live births,Number,223,units,0,v21400540,1.3.1,30345.0,,,,0


array(['Month of birth, January', 'Month of birth, February',
       'Month of birth, March', 'Month of birth, April',
       'Month of birth, May', 'Month of birth, June',
       'Month of birth, July', 'Month of birth, August',
       'Month of birth, September', 'Month of birth, October',
       'Month of birth, November', 'Month of birth, December'],
      dtype=object)

In [122]:
total_newborns = newborns.loc[(newborns.GEO == 'Alberta, place of residence of mother') 
         & (newborns.UOM == 'Number')
         & (newborns['Month of birth']	== 'Total, month of birth')
         & (newborns.REF_DATE >= 2001)
         & (newborns.REF_DATE <= 2024)
         , ['REF_DATE','VALUE']]

#month_order = total_newborns['Month of birth'].unique()
total_newborns_chart = (
    alt.Chart(total_newborns
            , title=f"Average Age of First Marriage"
            )
    .mark_line()
    .encode(
        x="REF_DATE:O",
        y="VALUE:Q"
        #, color = alt.Color('Month of birth:O', sort=month_order)
    ).interactive()
)

In [123]:
combined = alt.layer(
    total_newborns_chart,
    annual_marriages_chart
).resolve_scale(
    y='independent'
)
combined
