In [2]:
# Import the Altair library
import pandas as pd
import altair as alt
import numpy as np
# work-around to let Altair handle larger data sets
alt.data_transformers.enable('json')


# Reference to data
france = pd.read_csv('./src/assets/dpt2020.csv', sep=',')

print(france.columns)

Index(['sexe', 'preusuel', 'annais', 'dpt', 'nombre'], dtype='object')


In [11]:
import csv

def optimize(csv_data, csv_file):
    if csv_data is None:
        raise Exception("CSV was not loaded when optimize was called")
    else:
        print("Optimizing dataset")

    filteredByNameByYear = {}
     
    for _,row in csv_data.iterrows():
        annais = row['annais']
        preusuel = row['preusuel']
        nombre = row['nombre']
        if preusuel not in filteredByNameByYear:
            filteredByNameByYear[preusuel] = {}
        if row.annais not in filteredByNameByYear[preusuel]:
            filteredByNameByYear[preusuel][annais] = {'male':0, 'female':0, 'total':0}
        if row.sexe == 1 : #homme
            filteredByNameByYear[preusuel][annais]['male'] += nombre
        if row.sexe == 2 : #femme
            filteredByNameByYear[preusuel][annais]['female'] += nombre
        filteredByNameByYear[preusuel][annais]['total'] += nombre

    print("Optimization done")
    # Write the data to a CSV file
    with open(csv_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Preusuel', 'Annais', 'Male', 'Female', 'Total'])  # Write header row
        for preusuel, data in filteredByNameByYear.items():
            for annais, counts in data.items():
                writer.writerow([preusuel, annais, counts['male'], counts['female'], counts['total']])

    return filteredByNameByYear

opti_france = optimize(france, 'new_france.csv')


Optimizing dataset
Optimization done


In [9]:
opti_france['CLAUDE']

{'1900': {'male': 626, 'female': 0, 'total': 626},
 '1901': {'male': 683, 'female': 0, 'total': 683},
 '1902': {'male': 652, 'female': 3, 'total': 655},
 '1903': {'male': 654, 'female': 3, 'total': 657},
 '1904': {'male': 726, 'female': 0, 'total': 726},
 '1905': {'male': 702, 'female': 6, 'total': 708},
 '1906': {'male': 723, 'female': 3, 'total': 726},
 '1907': {'male': 662, 'female': 5, 'total': 667},
 '1908': {'male': 734, 'female': 11, 'total': 745},
 '1909': {'male': 660, 'female': 4, 'total': 664},
 '1910': {'male': 740, 'female': 14, 'total': 754},
 '1911': {'male': 742, 'female': 13, 'total': 755},
 '1912': {'male': 779, 'female': 27, 'total': 806},
 '1913': {'male': 750, 'female': 23, 'total': 773},
 '1914': {'male': 847, 'female': 30, 'total': 877},
 '1915': {'male': 474, 'female': 16, 'total': 490},
 '1916': {'male': 362, 'female': 34, 'total': 396},
 '1917': {'male': 466, 'female': 28, 'total': 494},
 '1918': {'male': 543, 'female': 45, 'total': 588},
 '1919': {'male': 693

In [13]:
# Display the evolution of the proportion of male 'CLAUDE' in France over the years

selected_name = 'CLAUDE'

filtered_name = france[(france['preusuel'] == selected_name) & (france['annais'] != 'XXXX')]
search_input = alt.selection_point(
    value = selected_name,
    bind = alt.binding(
        input="search",
        placeholder="Name",
        name='Search',
    )
)
filtered_france =  france[(france['annais'] != 'XXXX')]

gender_proportion_graph = alt.Chart(opti_france).transform_filter(
    search_input
).transform_aggregate(
    sum_selection='sum(nombre)',
    groupby=['annais', 'sexe']
).transform_joinaggregate(
    total_sum='sum(sum_selection)',
    groupby=['annais']
).transform_calculate(
    proportion='datum.sum_selection / datum.total_sum*100',
    sex_label='datum.sexe === 1 ? "Male" : "Female"'
).mark_bar().encode(
    x=XXX.keys()alt.X('annais:O', title='Year', scale=alt.Scale(domain=[str(year) for year in range(1900, 2021)])),
    y=alt.Y('proportion:Q', title='Percentage by sex'),
    color=alt.Color('sex_label:N',scale=alt.Scale(domain=['Male', 'Female'],range=['red', 'blue']),legend=alt.Legend(title='Sexe')),
    tooltip=[
        alt.Tooltip('annais', title='Year'),
        alt.Tooltip('proportion:Q', title='Percentage'),
        alt.Tooltip("sum_selection:Q", title="Number of births"),
        alt.Tooltip("sex_label:N", title="Sex")
    ]
).properties(
    width=1000,
    height=400,
    title="Births for searched name"
).add_params(
    search_input
)
### this is a bar chart
gender_proportion_graph

: 

: 

In [9]:
selected_name = 'CLAUDE'

filtered_F_selected_name = france[(france['sexe'] == 2) & (
    france['preusuel'] == 'CLAUDE') & (france['annais'] != 'XXXX')]
filtered_M_selected_name = france[(france['sexe'] == 1) & (
    france['preusuel'] == 'CLAUDE')& (france['annais'] != 'XXXX')]

print(sum(filtered_F_selected_name['nombre']))

54031


In [57]:
# Display the evolution of the proportion of male 'CLAUDE' in France over the years

selected_name = 'MARIE'
filtered_name = france[(france['preusuel'] == selected_name) & (france['annais'] != 'XXXX')]

gender_proportion_graph = alt.Chart(filtered_name).transform_aggregate(
    sum_selection='sum(nombre)',
    groupby=['annais', 'sexe']
).transform_calculate(
    sex_label='datum.sexe === 1 ? "Male" : "Female"'
).mark_circle().encode(
    x=alt.X('annais:O', title='Year', scale=alt.Scale(domain=[str(year) for year in range(1900, 2021)])),
    y=alt.Y('sum_selection:Q', title='Percentage by sex'),
    color=alt.Color('sex_label:N',scale=alt.Scale(domain=['Male', 'Female'],range=['red', 'blue']),legend=alt.Legend(title='Sexe')),
    tooltip=[
        alt.Tooltip('annais', title='Year'),
        alt.Tooltip("sum_selection:Q", title="Number of births"),
        alt.Tooltip("sex_label:N", title="Sex")
    ]

).properties(
    width=1000,
    height=500,
    title="Births for the name "+selected_name
)


###this is a scatter plot
gender_proportion_graph

Le but c'est de trouver les proportions de prénoms selon les genres, par année. Donc j'essaie de faire un sorte d'avoir le nombre dans 2 tables différentes pour pouvoir faire le rapport.



In [2]:
# Display the evolution of the proportion of male 'CLAUDE' in France over the years

selected_name = 'CLAUDE'

filtered_name = france[(france['preusuel'] == selected_name) & (france['annais'] != 'XXXX')]
search_input = alt.selection_point(
    fields = ["preusuel"],
    value = selected_name,
    bind = alt.binding(
        input="search",
        placeholder="Name",
        name='Search',
    )
)
filtered_france =  france[(france['annais'] != 'XXXX')]

gender_proportion_graph = alt.Chart(filtered_france).transform_filter(
    search_input
).transform_aggregate(
    sum_selection='sum(nombre)',
    groupby=['annais', 'sexe']
).transform_joinaggregate(
    total_sum='sum(sum_selection)',
    groupby=['annais']
).transform_calculate(
    proportion='datum.sum_selection / datum.total_sum*100',
    sex_label='datum.sexe === 1 ? "Male" : "Female"'
).mark_bar().encode(
    x=alt.X('annais:O', title='Year', scale=alt.Scale(domain=[str(year) for year in range(1900, 2021)])),
    y=alt.Y('proportion:Q', title='Percentage by sex'),
    color=alt.Color('sex_label:N',scale=alt.Scale(domain=['Male', 'Female'],range=['red', 'blue']),legend=alt.Legend(title='Sexe')),
    tooltip=[
        alt.Tooltip('annais', title='Year'),
        alt.Tooltip('proportion:Q', title='Percentage'),
        alt.Tooltip("sum_selection:Q", title="Number of births"),
        alt.Tooltip("sex_label:N", title="Sex")
    ]
).properties(
    width=1000,
    height=400,
    title="Births for searched name"
).add_params(
    search_input
)
### this is a bar chart
gender_proportion_graph

In [5]:
# Display the evolution of the proportion of male 'CLAUDE' in France over the years

selected_name = 'CAMILLE'
filtered_name = france[(france['preusuel'] == selected_name) & (france['annais'] != 'XXXX')]

gender_proportion_graph = alt.Chart(filtered_name).transform_aggregate(
    sum_selection_M='sum(nombre)',
    groupby=['annais', 'sexe']
).transform_joinaggregate(
    total_sum='sum(sum_selection_M)',
    groupby=['annais']
).transform_calculate(
    proportion='datum.sum_selection_M / datum.total_sum*100',
    sex_label='datum.sexe === 1 ? "Male" : "Female"'
).transform_filter(
    (alt.datum.sexe == 1) | (alt.datum.sexe == 2)
).mark_circle().encode(
    x=alt.X('annais:O', title='Year', scale=alt.Scale(domain=[str(year) for year in range(1900, 2021)])),
    y=alt.Y('proportion:Q', title='Percentage by sex'),
    color=alt.Color('sex_label:N',scale=alt.Scale(domain=['Male', 'Female'],range=['red', 'blue']),legend=alt.Legend(title='Sexe')),
    tooltip=[
        alt.Tooltip('annais', title='Year'),
        alt.Tooltip('proportion:Q', title='Percentage'),
        alt.Tooltip("sum_selection_M:Q", title="Number of births"),
        alt.Tooltip("sex_label:N", title="Sex")
    ]
).properties(
    width=1000,
    height=400,
    title="Births for the name "+selected_name
)


###this is a scatter plot
gender_proportion_graph