In [None]:
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm

from matplotlib.lines import Line2D
from pandas.tseries.offsets import MonthEnd
from sklearn.metrics import mean_absolute_percentage_error as mean_ape
from sklearn.metrics import root_mean_squared_error as root_mse

sns.set(style="whitegrid")

In [None]:
presi_dates = [
    "26/04/1981", "10/05/1981",
    "24/04/1988", "08/05/1988",
    "23/04/1995", "07/05/1995",
    "21/04/2002", "05/05/2002",
    "22/04/2007", "06/05/2007",
    "22/04/2012", "06/05/2012",
    "21/04/2017", "07/05/2017",
    "10/04/2022", "24/04/2022"]
presi_months = pd.to_datetime(presi_dates, dayfirst=True).to_period('M').drop_duplicates()

legi_dates = [
    "14/06/1981", "21/06/1981",
    "16/03/1986",
    "05/06/1988", "11/06/1988",
    "21/03/1993", "28/03/1993",
    "25/05/1997", "01/06/1997",
    "09/06/2002", "16/06/2002",
    "10/06/2007", "17/06/2007",
    "10/06/2012", "17/06/2012",
    "11/06/2017", "18/06/2017",
    "12/06/2022", "19/06/2022",
    "29/06/2024", "06/07/2024"]
legi_months = pd.to_datetime(legi_dates, dayfirst=True).to_period('M').drop_duplicates()

regio_dates = [
    "16/03/1986",
    "22/03/1992",
    "15/03/1998",
    "21/03/2004", "28/03/2004",
    "14/03/2010", "21/03/2010",
    "06/12/2015", "13/12/2015",
    "20/06/2021", "27/06/2021"]
regio_months = pd.to_datetime(regio_dates, dayfirst=True).to_period('M').drop_duplicates()

canto_dates = [
    "14/03/1982", "21/03/1982",
    "10/03/1985", "17/03/1985",
    "25/09/1988", "02/10/1988",
    "20/03/1992", "27/03/1992",
    "15/03/1998", "22/03/1998",
    "11/03/2001", "18/03/2001",
    "21/03/2004", "28/03/2004",
    "9/03/2008", "16/03/2008",
    "20/03/2011", "27/03/2011"]
canto_months = pd.to_datetime(canto_dates, dayfirst=True).to_period('M').drop_duplicates()

dept_dates = [
    "22/03/2015", "29/03/2015",
    "20/06/2021", "27/06/2021"]
dept_months = pd.to_datetime(dept_dates, dayfirst=True).to_period('M').drop_duplicates()

muni_dates = [
    "06/03/1983", "13/03/1983",
    "12/03/1989", "19/03/1989",
    "11/06/1995", "18/06/1995",
    "11/03/2001", "18/03/2001",
    "09/03/2008", "16/03/2008",
    "23/03/2014", "30/03/2014",
    "15/03/2020", "28/06/2020"]
muni_months = pd.to_datetime(muni_dates, dayfirst=True).to_period('M').drop_duplicates()

europ_dates = [
    "13/06/1999",
    "13/06/2004",
    "07/06/2009",
    "25/05/2014",
    "26/05/2019",
    "09/06/2024"]
europ_months = pd.to_datetime(europ_dates, dayfirst=True).to_period('M').drop_duplicates()

main_elec_dates = presi_dates + legi_dates + europ_dates
main_elec_months = pd.to_datetime(main_elec_dates, dayfirst=True).to_period('M').drop_duplicates().sort_values()

other_elec_dates = regio_dates + canto_dates + dept_dates + muni_dates
other_elec_months = pd.to_datetime(other_elec_dates, dayfirst=True).to_period('M').drop_duplicates().sort_values()

all_elec_dates = presi_dates + legi_dates + regio_dates + canto_dates + dept_dates + muni_dates + europ_dates
all_elec_months = pd.to_datetime(all_elec_dates, dayfirst=True).to_period('M').drop_duplicates().sort_values()

def add_shaded_periods(ax_list, periods, color, alpha):
    start_period = None
    for i, period in enumerate(periods):
        if start_period is None:
            start_period = period
        is_last = (i == len(periods) - 1)
        is_gap = (not is_last and periods[i + 1] != period + 1)
        if is_last or is_gap:
            end_period = period
            start = start_period.to_timestamp()
            end = (end_period + MonthEnd(1)).to_timestamp()
            for ax in ax_list:
                ax.axvspan(start, end, color=color, alpha=alpha)
            start_period = None

In [None]:
model_data_no_journal = pd.read_parquet("data/model_data_no_journal.parquet")
model_data_no_journal = model_data_no_journal[model_data_no_journal['political_alignment'] != 'autre']
model_data_no_journal['next_pres_votes_share'] = model_data_no_journal['pres_dummy'] * model_data_no_journal['pres_votes_share']

model_data = pd.read_parquet("data/model_data.parquet")
model_data = model_data[model_data['political_alignment'] != 'autre']
model_data['next_pres_votes_share'] = model_data['pres_dummy'] * model_data['pres_votes_share']

In [None]:
outcome = "quotes_share"

Ce notebook utilise une approche économétrique alternative étudiant l'écart entre des comportements théoriques et réels. Elles conçoit les comportements théoriques sont comme des normes, et leur écart avec les comportements réels comme une mesure de la déviance. Cette approche est intéressante lorsque les normes sont connues *a priori* et modélisables directement.

Prenons l'exemple du nombre d'articles consacrés aux différentes nuances politiques. Une ancienne règle éditoriale stipule que ces articles doivent se répartir au tiers entre le gouvernement, la majorité et l'opposition. Il est alors possible, plutôt que de modéliser la répartition réelle des articles, d'observer comment elle s'écarte de cette norme, en particulier selon les nuances politiques et au cours du temps. L'hypothèse d'une légitimation de l'extrême droite devrait se traduire par une application de moins en moins stricte de la règle des tiers. Ceci n'est en réalité pas vérifiable, car la règle des tiers amalgame l'ensemble des nuances politiques ne participant ni à la majorité ni au gouvernement, aboutissant à diluer l'extrême droite dans l'opposition.

Lorsque la norme n'est pas suffisamment connue pour être modélisable directement, il est possible de l'estimer à partir des données. Il existe ici un risque que le raisonnement deviennet circulaire : lorsque l'on mesure l'écart des comportements réels à une norme définie à partir des comportements moyens, on peut aboutir à évaluer plutôt la performance prédictive du modèle. Pour minimiser ce risque, il faut utiliser le modèle le plus simple possible, notamment en éliminant les contrôles.

# 1. Estimation de la norme
Appliquons cette approche à la proportion des citations attribuées à chaque nuance politique. Supposons que la couverture médiatique d'une nuance est fonction de sa représentativité politique réelle, reflétée par son score aux précédentes élections législatives et son nombre de sièges à l'Assemblée Nationale, de sa représentativité politique anticipée, reflétée par son score aux prochaines élections présidentielles lorsqu'elles sont distantes de moins de 4 mois, et de sa participation au gouvernement. Cette hypothèse correspond au modèle n°3 de l'autre notebook, qui s'écrit :  

$$Y_{i} = \beta T_{i} + \gamma G_{i} + \theta P_{i} + \delta L_{i}$$

Comme nous cherchons à isoler des évolutions au cours du temps, il est préférable d'estimer ces paramètres sur une période de référence, afin de vérifier si les résidus augmentent lors des périodes suivantes. Nous avons retenu la charnière de juin 2012, comme dans l'autre notebook.

In [None]:
X = model_data_no_journal[["na_share", "government", "next_pres_votes_share", "leg_votes_share"]]
y = model_data_no_journal[outcome]
model = sm.OLS(y, X).fit(cov_type='HC3')

y_pred = model.predict(X)
mape = mean_ape(y, y_pred)
rmspe = root_mse(y, y_pred) / y.mean()

params = model.params.rename("coef").to_frame()
pvalues = model.pvalues.rename("pval").to_frame()
print(pd.merge(params, pvalues, left_index=True, right_index=True))
print("")
print(f"R2: {100*model.rsquared:.2f}%")
print(f"MAPE: {100*mape:.2f}%")
print(f"RMSPE: {rmspe:.5f}")

In [None]:
cutoff = pd.Period('2017-06', freq='M')
X = model_data_no_journal[model_data_no_journal["month"] <= cutoff][["na_share", "government", "next_pres_votes_share", "leg_votes_share"]]
y = model_data_no_journal[model_data_no_journal["month"] <= cutoff][outcome]
model = sm.OLS(y, X).fit(cov_type='HC3')

y_pred = model.predict(X)
mape = mean_ape(y, y_pred)
rmspe = root_mse(y, y_pred) / y.mean()

params = model.params.rename("coef").to_frame()
pvalues = model.pvalues.rename("pval").to_frame()
print(pd.merge(params, pvalues, left_index=True, right_index=True))
print("")
print(f"R2: {100*model.rsquared:.2f}%")
print(f"MAPE: {100*mape:.2f}%")
print(f"RMSPE: {rmspe:.5f}")

Réduire les données à la période antérieure à juin 2017 exerce un effet ambigu sur les performances du modèle : les écarts absolus augmentent mais les écarts quadratiques se réduisent, ce qui signifie que les valeurs courantes sont moins bien prédites, tandis que les outliers sont mieux prédits.

Le score aux prochaines élections présidentielles n'est pas significatif. Nous conservons néanmoins cette variable dont nous avons vu qu'elle est intéressante seulement pour les nuances extrêmes.

# 2. Mesure des écarts
## Selon les partis politiques

In [None]:
model_data_no_journal['y_pred'] = model.predict(model_data_no_journal[["na_share", "government", "pres_votes_share", "leg_votes_share"]])
model_data_no_journal['rel_residuals'] = 100 * (model_data_no_journal[outcome] - model_data_no_journal['y_pred']) / model_data_no_journal[outcome]

In [None]:
summary = []

for alignment in model_data_no_journal['political_alignment'].unique():
    subset_data = model_data_no_journal[model_data_no_journal['political_alignment'] == alignment]
    y = subset_data[outcome]
    y_pred = subset_data['y_pred']
    mape = mean_ape(y, y_pred)
    rmspe = root_mse(y, y_pred) / y.mean()
    summary.append({
        'Political alignment': alignment,
        'MAPE': mape,
        'RMSPE': rmspe
    })

pd.DataFrame(summary).style.hide(axis=0)

In [None]:
plot_data = model_data_no_journal.copy()
plot_data['month'] = plot_data['month'].dt.to_timestamp()

alignment_groups = [
    (['Far left', 'Far right'],
     {'Far left': 'crimson',
      'Far right': 'royalblue'}),
    (['Right', 'Left', 'Center'],
     {'Right': 'cornflowerblue',
      'Left': 'orchid',
      'Center': 'goldenrod'})]

In [None]:
alignment = 'Far right'
subset_data = plot_data[plot_data['political_alignment'] == alignment].copy()
subset_data['MA_observed'] = subset_data[outcome].rolling(window=6).mean()
subset_data['MA_rel'] = subset_data['rel_residuals'].rolling(window=6).mean()

fig, axes = plt.subplots(2, 1, figsize=(16, 10), sharex=True)

sns.lineplot(data=subset_data, x='month', y=outcome, ax=axes[0], label='Observed values', alpha=0.2, color='crimson', linestyle='-')
sns.lineplot(data=subset_data, x='month', y='MA_observed', ax=axes[0], label='6 months moving average for OV', alpha=0.65, color='crimson', linestyle='dashdot')
sns.lineplot(data=subset_data, x='month', y='y_pred', ax=axes[0], label='Predicted values', alpha=1, color='teal', linestyle='dotted')
axes[0].set_title("Observed and Predicted Values")
axes[0].set_ylabel('')

sns.lineplot(data=subset_data, x='month', y='rel_residuals', ax=axes[1], label='Relative residuals', alpha=0.3, color='crimson', linestyle='-')
sns.lineplot(data=subset_data, x='month', y='MA_rel', ax=axes[1], label='6 months moving average for RR', color='crimson', linestyle='dashdot')
sns.lineplot(data=subset_data, x='month', y='y_pred', ax=axes[1], label='Predicted values', alpha=1, color='teal', linestyle='dotted')
axes[1].set_title("Relative Residuals (%)")
axes[1].set_xlabel('')
axes[1].set_ylabel('')

add_shaded_periods(axes, main_elec_months, color='black', alpha=0.1)

plt.suptitle("Proportion of Quotes Attributed to Far-Right Politicians")
plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(16, 12), sharex=True)

for ax, (political_alignments, colors) in zip(axes, alignment_groups):
    alignment_handles = []

    for alignment in political_alignments:
        subset_data = plot_data[plot_data['political_alignment'] == alignment].copy()
        subset_data['MA'] = subset_data[outcome].rolling(window=6).mean()
        
        ax.plot(subset_data['month'], subset_data[outcome], label=None,
                alpha=0.2, color=colors[alignment], linestyle='-')
        ax.plot(subset_data['month'], subset_data['MA'], label=None,
                alpha=0.65, color=colors[alignment], linestyle='dashdot')
        ax.plot(subset_data['month'], subset_data['y_pred'], label=None,
                alpha=1, color=colors[alignment], linestyle='dotted')
        
        alignment_handles.append(Line2D([0], [0], color=colors[alignment], lw=2, label=alignment))

    alignment_legend = ax.legend(handles=alignment_handles, title="Political alignment", loc="upper left")
    ax.add_artist(alignment_legend)

    line_type_handles = [
        Line2D([0], [0], color='black', lw=2, linestyle='-', label="Monthly average"),
        Line2D([0], [0], color='black', lw=2, linestyle='dashdot', label="6 months moving average"),
        Line2D([0], [0], color='black', lw=2, linestyle='dotted', label="Predictions")]
    ax.legend(handles=line_type_handles, title="Values", loc="upper right")

axes[-1].set_xlabel("")

add_shaded_periods(axes, main_elec_months, color='black', alpha=0.1)

plt.suptitle("Quote Distribution by Political Affiliation\nObserved vs. Predicted Values")
plt.tight_layout()
plt.savefig("output/nuances_val_graph.png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(16, 12), sharex=True)

for ax, (political_alignments, colors) in zip(axes, alignment_groups):
    alignment_handles = []

    for alignment in political_alignments:
        subset_data = plot_data[plot_data['political_alignment'] == alignment].copy()
        subset_data['MA'] = subset_data['rel_residuals'].rolling(window=6).mean()
        
        ax.plot(subset_data['month'], subset_data['rel_residuals'], label=None,
                alpha=0.2, color=colors[alignment], linestyle='-')
        ax.plot(subset_data['month'], subset_data['MA'], label=None,
                alpha=0.65, color=colors[alignment], linestyle='dashdot')

        alignment_handles.append(Line2D([0], [0], color=colors[alignment], lw=2, label=alignment))

    alignment_legend = ax.legend(handles=alignment_handles, title="Political alignment", loc="lower left")
    ax.add_artist(alignment_legend)

    line_type_handles = [
        Line2D([0], [0], color='black', lw=2, linestyle='-', label="Monthly average"),
        Line2D([0], [0], color='black', lw=2, linestyle='dashdot', label="6 months moving average")
    ]
    ax.legend(handles=line_type_handles, title="Values", loc="lower right")

axes[-1].set_xlabel("")

add_shaded_periods(axes, main_elec_months, color='black', alpha=0.1)

plt.suptitle("Quote Distribution by Political Affiliation\nRelative Residuals (%)")
plt.tight_layout()
plt.savefig("output/nuances_rel_graph.png", dpi=300, bbox_inches='tight')
plt.show()

On ne voit pas apparaître de tendance manifestement favorable à l'extrême droite.

## Selon les journaux

In [None]:
model_data['y_pred'] = model.predict(model_data[["na_share", "government", "pres_votes_share", "leg_votes_share"]])
model_data['rel_residuals'] = 100 * (model_data[outcome] - model_data['y_pred']) / model_data[outcome]

In [None]:
summary = []

for journal in model_data['journal'].unique():
    subset_data = model_data[model_data['journal'] == journal]
    y = subset_data[outcome]
    y_pred = subset_data['y_pred']
    mape = mean_ape(y, y_pred)
    rmspe = root_mse(y, y_pred) / y.mean()
    summary.append({
        'Journal': journal,
        'MAPE': mape,
        'RMSPE': rmspe})

pd.DataFrame(summary).style.hide(axis=0)

In [None]:
plot_data = model_data.copy()
plot_data['month'] = plot_data['month'].dt.to_timestamp()

colors = {
    'Le Figaro': 'goldenrod',
    'Libération': 'limegreen',
    'Le Monde': 'orchid',
    'La Croix': 'skyblue',
    'Médiapart': 'crimson'}

alignments = [
    "Far right",
    "Right",
    "Center",
    "Left",
    "Far left"]

n_alignments = len(alignments)

In [None]:
fig, axes = plt.subplots(n_alignments, 1, figsize=(16, 4 * n_alignments), sharex=True)

for i, alignment in enumerate(alignments):
    ax = axes[i]
    subset_data = plot_data[plot_data['political_alignment'] == alignment]

    for journal in subset_data['journal'].unique():
        if journal == 'Médiapart': continue
        sub_subset_data = subset_data[subset_data['journal'] == journal].copy()
        sub_subset_data['MA'] = sub_subset_data[outcome].rolling(window=12).mean()
        ax.plot(sub_subset_data['month'], sub_subset_data[outcome], label=journal,
                alpha=0.7, color=colors[journal], linestyle='-')

    ax.plot(subset_data['month'], subset_data['y_pred'], color='black', alpha=0.8, linestyle='dotted')
    ax.set_title(f"{alignment}")
    ax.legend()

add_shaded_periods(axes, main_elec_months, color='black', alpha=0.1)

plt.suptitle("""
Quote Distribution by Political Affiliation and Journal\n
Observed vs. Predicted Values
""")
plt.tight_layout()
plt.savefig("output/journals_val_graph.png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
fig, axes = plt.subplots(n_alignments, 1, figsize=(16, 4 * n_alignments), sharex=True)

for i, alignment in enumerate(alignments):
    ax = axes[i]
    subset_data = plot_data[plot_data['political_alignment'] == alignment]

    for journal in subset_data['journal'].unique():
        sub_subset_data = subset_data[subset_data['journal'] == journal].copy()
        sub_subset_data['MA'] = sub_subset_data['rel_residuals'].rolling(window=12).mean()
        ax.plot(sub_subset_data['month'], sub_subset_data['MA'], label=journal,
                alpha=0.7, color=colors[journal], linestyle='-')

    ax.plot(subset_data['month'], subset_data['y_pred'], color='black', alpha=0.8, linestyle='dotted')
    ax.set_title(f"{alignment}")
    ax.legend()

add_shaded_periods(axes, main_elec_months, color='black', alpha=0.1)

plt.suptitle("""
Quote Distribution by Political Affiliation and Journal\n
Relative Residuals (%) - 12 months moving averages
""")
plt.tight_layout()
plt.savefig("output/journals_rel_graph.png", dpi=300, bbox_inches='tight')
plt.show()

Les comportements des différents journaux relativement à l'extrême droite s'avèrent très proches.