In [1]:
import h5py
import pandas as pd

from urllib.parse import unquote

In [2]:
quotes = pd.read_hdf("data/quotes/quotes_pol_allpol1.h5")

In [3]:
elec = pd.read_parquet("data/elec/leg_shares.parquet")

## Step 1: compute share of quotes per political nuance per month

In [4]:
all_monthly = quotes.groupby(pd.Grouper(key='date', freq='ME'))['text_id'].nunique().reset_index()
all_monthly.columns = ['Mois', 'Articles']

model_data = all_monthly.copy()

for alignment in elec['alignement_politique'].unique():
    subset = quotes[quotes['alignement_politique'] == alignment].copy()

    alignment_monthly = subset.groupby(pd.Grouper(key='date', freq='ME'))['text_id'].nunique().reset_index()
    alignment_monthly.columns = ['Mois', f'Articles_{alignment}']

    model_data = pd.merge(model_data, alignment_monthly, on='Mois', how='left')
    model_data[f'prop_{alignment}'] = model_data[f'Articles_{alignment}'] / model_data['Articles']

## Step 2: identify electoral cycles

In [5]:
legi_dates = [
    "21/06/1981",
    "16/03/1986",
    "11/06/1988",
    "28/03/1993",
    "01/06/1997",
    "16/06/2002",
    "17/06/2007",
    "17/06/2012",
    "18/06/2017",
    "19/06/2022",
    "06/07/2024"]

In [7]:
# Drop periods before the first election in the database
model_data = model_data[model_data['Mois'] >= pd.to_datetime("1981-06")]

In [8]:
model_data['Mois'] = model_data['Mois'].dt.to_period('M')

legi_periods = pd.to_datetime(legi_dates, dayfirst=True).to_period('M')
legi_map = pd.Series(legi_periods.year.values, index=legi_periods)

def get_last_election(mois):
    past_elections = legi_map[legi_map.index <= mois]
    return past_elections.iloc[-1] if not past_elections.empty else None

model_data['last_election'] = model_data['Mois'].apply(get_last_election)

## Step 3: merge with electoral data

In [14]:
elec = elec.groupby('alignement_politique', as_index=False).sum()

In [16]:
model_data

Unnamed: 0,Mois,Articles,Articles_extremedroite_droiteradicale,prop_extremedroite_droiteradicale,Articles_extremegauche_gaucheradicale,prop_extremegauche_gaucheradicale,Articles_centre,prop_centre,Articles_None,prop_None,Articles_autre,prop_autre,Articles_centredroite_droite,prop_centredroite_droite,Articles_centregauche_gauche,prop_centregauche_gauche,last_election
5,1981-06,666,12,0.018018,76,0.114114,3,0.004505,,,5.0,0.007508,206,0.309309,454,0.681682,1981
6,1981-07,485,6,0.012371,46,0.094845,7,0.014433,,,0.0,0.000000,117,0.241237,376,0.775258,1981
7,1981-08,342,6,0.017544,25,0.073099,3,0.008772,,,0.0,0.000000,85,0.248538,251,0.733918,1981
8,1981-09,624,11,0.017628,63,0.100962,11,0.017628,,,1.0,0.001603,160,0.256410,468,0.750000,1981
9,1981-10,686,6,0.008746,58,0.084548,6,0.008746,,,6.0,0.008746,225,0.327988,495,0.721574,1981
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
514,2023-11,36,19,0.527778,11,0.305556,16,0.444444,,,0.0,0.000000,17,0.472222,12,0.333333,2022
515,2023-12,47,21,0.446809,14,0.297872,26,0.553191,,,0.0,0.000000,22,0.468085,21,0.446809,2022
516,2024-01,75,21,0.280000,20,0.266667,50,0.666667,,,2.0,0.026667,17,0.226667,31,0.413333,2022
517,2024-02,36,10,0.277778,15,0.416667,21,0.583333,,,1.0,0.027778,13,0.361111,15,0.416667,2022


In [15]:
elec

Unnamed: 0,alignement_politique,candidat,1988,1993,1997,2002,2007,2012,2017,2022
0,autre,DIVREG,0.000562,0.026604,0.008174,0.006807,0.006712,0.004353,0.013944,0.007971
1,centre,CENALLIENSLREMPREP,0.0,0.0,0.0,0.007777,0.0,0.013422,0.142245,0.125813
2,centredroite_droite,DVDLRMDMNCEPRVRPRUDFUDFDUDIUMP,0.262492,0.289287,0.235983,0.277968,0.313282,0.19698,0.124269,0.074741
3,centregauche_gauche,DVGFGGECMAJNUPPRGDVGPSRDGRDGDVG,0.244777,0.156395,0.181088,0.169918,0.166383,0.238385,0.044738,0.143503
4,extremedroite_droiteradicale,AUDDLFFNMNRAUDMPFRECRN,0.064266,0.085509,0.099011,0.080466,0.035742,0.080873,0.072621,0.110953
5,extremegauche_gaucheradicale,AUGLCRLFILOPCF,0.073936,0.071291,0.080471,0.04802,0.046799,0.005726,0.073217,0.00556
