In [None]:
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf

from joblib import Parallel, delayed
from matplotlib.lines import Line2D
from pandas.tseries.offsets import MonthEnd
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.metrics import root_mean_squared_error as root_mse
from tqdm import tqdm

os.makedirs("output", exist_ok=True)
sns.set(style="white")

# Données

In [None]:
mentions = pd.read_parquet("data/model_data_mentions_njl.parquet")
quotes = pd.read_parquet("data/model_data_njl.parquet")

model_data = pd.read_parquet("data/model_data_mentions.parquet")
model_data_no_journal = pd.read_parquet("data/model_data_mentions_njl.parquet")
nuances_order = ["Far right", "Right", "Center", "Left", "Far left", "Other"]

In [None]:
presi_dates = [
    "26/04/1981", "10/05/1981",
    "24/04/1988", "08/05/1988",
    "23/04/1995", "07/05/1995",
    "21/04/2002", "05/05/2002",
    "22/04/2007", "06/05/2007",
    "22/04/2012", "06/05/2012",
    "21/04/2017", "07/05/2017",
    "10/04/2022", "24/04/2022"]
presi_months = pd.to_datetime(presi_dates, dayfirst=True).to_period('M').drop_duplicates()

legi_dates = [
    "14/06/1981", "21/06/1981",
    "16/03/1986",
    "05/06/1988", "11/06/1988",
    "21/03/1993", "28/03/1993",
    "25/05/1997", "01/06/1997",
    "09/06/2002", "16/06/2002",
    "10/06/2007", "17/06/2007",
    "10/06/2012", "17/06/2012",
    "11/06/2017", "18/06/2017",
    "12/06/2022", "19/06/2022",
    "29/06/2024", "06/07/2024"]
legi_months = pd.to_datetime(legi_dates, dayfirst=True).to_period('M').drop_duplicates()

europ_dates = [
    "17/06/1984",
    "18/06/1989",
    "12/06/1994",
    "13/06/1999",
    "13/06/2004",
    "07/06/2009",
    "25/05/2014",
    "26/05/2019",
    "09/06/2024"]
europ_months = pd.to_datetime(europ_dates, dayfirst=True).to_period('M').drop_duplicates()

main_elec_dates = presi_dates + legi_dates + europ_dates
main_elec_months = pd.to_datetime(main_elec_dates, dayfirst=True).to_period('M').drop_duplicates().sort_values()

def add_shaded_periods(ax_list, periods, color, alpha):
    start_period = None
    for i, period in enumerate(periods):
        if start_period is None:
            start_period = period
        is_last = (i == len(periods) - 1)
        is_gap = (not is_last and periods[i + 1] != period + 1)
        if is_last or is_gap:
            end_period = period
            start = start_period.to_timestamp()
            end = (end_period + MonthEnd(1)).to_timestamp()
            for ax in ax_list:
                ax.axvspan(start, end, color=color, alpha=alpha)
            start_period = None

# Corrélation avec les quotes

In [None]:
mentions = mentions[['month', 'political_alignment', 'quotes_share']]
quotes = quotes[['month', 'political_alignment', 'quotes_share']]
all_data = pd.merge(mentions, quotes, on=['month', 'political_alignment'])

In [None]:
correlation = all_data['quotes_share_x'].corr(all_data['quotes_share_y'])
print(f"{correlation:.4f}")

In [None]:
X = all_data[['quotes_share_x']]
y = all_data['quotes_share_y']

model = LinearRegression().fit(X, y)
print(f"R²: {model.score(X, y)}")
print(f"Intercept: {model.intercept_}, Slope: {model.coef_[0]}")

# Modèle

In [None]:
outcome = "quotes_share"

In [None]:
def compute_y_pred(
    df,
    n, alpha, beta, gamma, theta,
    delta_pre_5, delta_pre_4, delta_pre_3, delta_pre_2, delta_pre_1, delta_pre_0,
    lambda_1, lambda_2):
    
    baseline = (
        beta * (1 - n * alpha - theta) * df["na_share"] +
        gamma * (1 - n * alpha - theta) * df["pres_votes_share"] +
        theta * df["government"])

    y_pred = (
        alpha +
        df["inter_dum"] * baseline +
        df["pre_5"] * (delta_pre_5 * baseline + (1 - delta_pre_5) * df["pres_poll_result"]) +
        df["pre_4"] * (delta_pre_4 * baseline + (1 - delta_pre_4) * df["pres_poll_result"]) +
        df["pre_3"] * (delta_pre_3 * baseline + (1 - delta_pre_3) * df["pres_poll_result"]) +
        df["pre_2"] * (delta_pre_2 * baseline + (1 - delta_pre_2) * df["pres_poll_result"]) +
        df["pre_1"] * (delta_pre_1 * baseline + (1 - delta_pre_1) * df["pres_poll_result"]) +
        df["pre_0"] * (delta_pre_0 * baseline + (1 - delta_pre_0) * df["pres_poll_result"]) +
        df["post_dum"] * (
            (1 - n * alpha - lambda_1 - lambda_2) * df["pres_votes_share"] +
            lambda_1 * df["r2_rank_1"] +
            lambda_2 * df["r2_rank_2"]))

    return y_pred

In [None]:
def objective(params, data_opt, y_opt):
    alpha = params[0]
    beta = params[1]
    gamma = 1 - beta
    theta = params[2]
    delta_pre_5, delta_pre_4, delta_pre_3, delta_pre_2, delta_pre_1, delta_pre_0 = params[3:9]
    lambda_1, lambda_2 = params[9:11]

    y_pred = compute_y_pred(
        data_opt,
        n, alpha, beta, gamma, theta,
        delta_pre_5, delta_pre_4, delta_pre_3, delta_pre_2, delta_pre_1, delta_pre_0,
        lambda_1, lambda_2)

    return root_mse(y_opt, y_pred)

# Initial guess for the coefficients: alpha, beta, theta, delta_pre_5 to delta_pre_0, lambda_1 and lambda_2
initial_guess = [0.01, 0.7, 0.2, 0.9, 0.85, 0.8, 0.7, 0.5, 0.2, 0.15, 0.1]

# Bounds: all coefficients between 0 and 1
bounds = [(0, 1)] * 11

# Constraints: increasing poll weights during electoral campaigns
constraints = [
    {"type": "ineq", "fun": lambda x: x[3] - x[4]},  # delta_pre_5 > delta_pre_4
    {"type": "ineq", "fun": lambda x: x[4] - x[5]},  # delta_pre_4 > delta_pre_3
    {"type": "ineq", "fun": lambda x: x[5] - x[6]},  # delta_pre_3 > delta_pre_2
    {"type": "ineq", "fun": lambda x: x[6] - x[7]},  # delta_pre_2 > delta_pre_1
    {"type": "ineq", "fun": lambda x: x[7] - x[8]},  # delta_pre_1 > delta_pre_0
]

# Number of political nuances
n = len(model_data_no_journal['political_alignment'].unique())

### Sur l'ensemble des données

In [None]:
data_opt = model_data_no_journal[
    ["inter_dum", "pre_5", "pre_4", "pre_3", "pre_2", "pre_1", "pre_0", "post_dum", 
     "na_share", "pres_votes_share", "government", "pres_poll_result", "r2_rank_1", "r2_rank_2"]]

y_opt = model_data_no_journal[outcome]

result = minimize(objective, initial_guess, args=(data_opt, y_opt), bounds=bounds, constraints=[], method='SLSQP')
# result = minimize(objective, initial_guess, args=(data_opt, y_opt), bounds=bounds, constraints=constraints, method='SLSQP')

if result.success:
    alpha, beta, theta, delta_pre_5, delta_pre_4, delta_pre_3, delta_pre_2, delta_pre_1, delta_pre_0, lambda_1, lambda_2 = result.x
    gamma = 1 - beta

    print(f"Optimal alpha: {alpha:.5f}")
    print(f"Optimal beta: {beta:.5f}")
    print(f"Optimal gamma: {gamma:.5f}")
    print(f"Optimal theta: {theta:.5f}")
    print(f"Optimal delta_pre_5: {delta_pre_5:.5f}")
    print(f"Optimal delta_pre_4: {delta_pre_4:.5f}")
    print(f"Optimal delta_pre_3: {delta_pre_3:.5f}")
    print(f"Optimal delta_pre_2: {delta_pre_2:.5f}")
    print(f"Optimal delta_pre_1: {delta_pre_1:.5f}")
    print(f"Optimal delta_pre_0: {delta_pre_0:.5f}")
    print(f"Optimal lambda_1: {lambda_1:.5f}")
    print(f"Optimal lambda_2: {lambda_2:.5f}")
else:
    print("Optimization failed:", result.message)

In [None]:
y_pred = compute_y_pred(
    data_opt,
    n, alpha, beta, gamma, theta,
    delta_pre_5, delta_pre_4, delta_pre_3, delta_pre_2, delta_pre_1, delta_pre_0,
    lambda_1, lambda_2)

r2 = r2_score(y_opt, y_pred)
mr = (y_opt - y_pred).mean()
rmspe = root_mse(y_opt, y_pred)

print(f"R2: {100*r2:.3f}%")
print(f"MR: {100*mr:.5f}%")
print(f"RMSPE: {rmspe:.5f}")

In [None]:
# Bootstrapping (on months instead of observations to respect compositionnality)
n_bootstraps = 5000
n_blocks = len(model_data_no_journal) // 6

def run_bootstrap_iteration(seed):
    np.random.seed(seed)
    sampled_block_ids = np.random.choice(n_blocks, size=n_blocks, replace=True)
    sampled_row_indices = np.concatenate([
        np.arange(block_id * 6, block_id * 6 + 6) for block_id in sampled_block_ids
    ])
    boot_data = model_data_no_journal.iloc[sampled_row_indices].reset_index(drop=True)
    boot_data = boot_data[[
        outcome,
        "political_alignment", "inter_dum", "pre_5", "pre_4", "pre_3", "pre_2", "pre_1", "pre_0", "post_dum",
        "na_share", "pres_votes_share", "government", "pres_poll_result", "r2_rank_1", "r2_rank_2"]]
    boot_y = boot_data[outcome]

    result = minimize(objective, initial_guess, args=(boot_data, boot_y), bounds=bounds, constraints=[], method='SLSQP')
    
    if result.success:
        alpha_bst, beta_bst, theta_bst, delta_pre_5_bst, delta_pre_4_bst, delta_pre_3_bst, delta_pre_2_bst, delta_pre_1_bst, delta_pre_0_bst, lambda_1_bst, lambda_2_bst = result.x
        gamma_bst = 1 - beta_bst
        return {
            "alpha": alpha_bst,
            "beta": beta_bst,
            "gamma": gamma_bst,
            "theta": theta_bst,
            "delta_pre_5": delta_pre_5_bst,
            "delta_pre_4": delta_pre_4_bst,
            "delta_pre_3": delta_pre_3_bst,
            "delta_pre_2": delta_pre_2_bst,
            "delta_pre_1": delta_pre_1_bst,
            "delta_pre_0": delta_pre_0_bst,
            "lambda_1": lambda_1_bst,
            "lambda_2": lambda_2_bst
        }
    else:
        return None

results = Parallel(n_jobs=-1)(
    delayed(run_bootstrap_iteration)(seed)
    for seed in tqdm(range(n_bootstraps), desc="Bootstrapping")
)

# Filter out failed runs (None)
bootstrap_results = [r for r in results if r is not None]

In [None]:
# Using a one-tail approach for pvalues as coefficients are bounded by zero
bootstrap_df = pd.DataFrame(bootstrap_results)
pval = (bootstrap_df <= 0).sum() / len(bootstrap_df)
original_coeffs = pd.Series({
    "alpha": alpha,
    "beta": beta,
    "gamma": gamma,
    "theta": theta,
    "delta_pre_5": delta_pre_5,
    "delta_pre_4": delta_pre_4,
    "delta_pre_3": delta_pre_3,
    "delta_pre_2": delta_pre_2,
    "delta_pre_1": delta_pre_1,
    "delta_pre_0": delta_pre_0,
    "lambda_1": lambda_1,
    "lambda_2": lambda_2})


bootstrap_df = bootstrap_df.describe(percentiles=[0.025, 0.5, 0.975]).T
bootstrap_df["pval"] = pval
bootstrap_df["coeff"] = original_coeffs
bootstrap_df = bootstrap_df.drop(columns = ['count', 'min', '50%', 'max'])
cols = ["coeff"] + [col for col in bootstrap_df.columns if col != "coeff"]
bootstrap_df = bootstrap_df[cols]
bootstrap_df.style

### Pour la droite et la gauche dans *Le Monde* avant juin 2012

In [None]:
# Keeping n=6 for consistent results
cutoff = pd.Period('2012-06', freq='M')

data_opt = model_data[
    (model_data["month"] <= cutoff) & 
    (model_data["journal"] == "Le Monde") &
    (model_data["political_alignment"].isin(['Right', 'Left']))
    ][["inter_dum", "pre_5", "pre_4", "pre_3", "pre_2", "pre_1", "pre_0", "post_dum", 
       "na_share", "pres_votes_share", "government", "pres_poll_result", "r2_rank_1", "r2_rank_2"]]

y_opt = model_data[
    (model_data["month"] <= cutoff) & 
    (model_data["journal"] == "Le Monde") &
    (model_data["political_alignment"].isin(['Right', 'Left']))
    ][outcome]

result = minimize(objective, initial_guess, args=(data_opt, y_opt), bounds=bounds, constraints=[], method='SLSQP')
# result = minimize(objective, initial_guess, args=(data_opt, y_opt), bounds=bounds, constraints=constraints, method='SLSQP')

if result.success:
    alpha_loc, beta_loc, theta_loc, delta_pre_5_loc, delta_pre_4_loc, delta_pre_3_loc, delta_pre_2_loc, delta_pre_1_loc, delta_pre_0_loc, lambda_1_loc, lambda_2_loc = result.x
    gamma_loc = 1 - beta_loc

    print(f"Optimal alpha: {alpha_loc:.5f}")
    print(f"Optimal beta: {beta_loc:.5f}")
    print(f"Optimal gamma: {gamma_loc:.5f}")
    print(f"Optimal theta: {theta_loc:.5f}")
    print(f"Optimal delta_pre_5: {delta_pre_5_loc:.5f}")
    print(f"Optimal delta_pre_4: {delta_pre_4_loc:.5f}")
    print(f"Optimal delta_pre_3: {delta_pre_3_loc:.5f}")
    print(f"Optimal delta_pre_2: {delta_pre_2_loc:.5f}")
    print(f"Optimal delta_pre_1: {delta_pre_1_loc:.5f}")
    print(f"Optimal delta_pre_0: {delta_pre_0_loc:.5f}")
    print(f"Optimal lambda_1: {lambda_1_loc:.5f}")
    print(f"Optimal lambda_2: {lambda_2_loc:.5f}")
else:
    print("Optimization failed:", result.message)

In [None]:
# Performance on restricted dataset
y_pred = compute_y_pred(
    data_opt,
    n, alpha_loc, beta_loc, gamma_loc, theta_loc,
    delta_pre_5_loc, delta_pre_4_loc, delta_pre_3_loc, delta_pre_2_loc, delta_pre_1_loc, delta_pre_0_loc,
    lambda_1_loc, lambda_2_loc)

r2 = r2_score(y_opt, y_pred)
mr = (y_opt - y_pred).mean()
rmspe = root_mse(y_opt, y_pred)

print(f"R2: {100*r2:.3f}%")
print(f"MR: {100*mr:.5f}%")
print(f"RMSPE: {rmspe:.5f}")

In [None]:
# Performance on full dataset
y_pred = compute_y_pred(
    model_data_no_journal,
    n, alpha_loc, beta_loc, gamma_loc, theta_loc,
    delta_pre_5_loc, delta_pre_4_loc, delta_pre_3_loc, delta_pre_2_loc, delta_pre_1_loc, delta_pre_0_loc,
    lambda_1_loc, lambda_2_loc)

y_opt = model_data_no_journal[outcome]

r2 = r2_score(y_opt, y_pred)
mr = (y_opt - y_pred).mean()
rmspe = root_mse(y_opt, y_pred)

print(f"R2: {100*r2:.3f}%")
print(f"MR: {100*mr:.5f}%")
print(f"RMSPE: {rmspe:.5f}")

# 2. Ecarts à la norme de représentativité selon les nuances politiques

In [None]:
model_data_no_journal['y_norm'] = compute_y_pred(
    model_data_no_journal,
    n, alpha, beta, gamma, theta,
    delta_pre_5, delta_pre_4, delta_pre_3, delta_pre_2, delta_pre_1, delta_pre_0,
    lambda_1, lambda_2)
plot_data = model_data_no_journal.copy()
plot_data['abs_residuals'] = 100 * (plot_data[outcome] - plot_data['y_norm'])
plot_data['month'] = plot_data['month'].dt.to_timestamp()

alignment_groups = [
    (['Far left', 'Far right', 'Other'],
     {'Far left': 'crimson',
      'Far right': 'royalblue',
      'Other': 'forestgreen'}),
    (['Right', 'Left', 'Center'],
     {'Right': 'cornflowerblue',
      'Left': 'orchid',
      'Center': 'goldenrod'})]

In [None]:
for alignment in ['Far right', 'Far left']:
    color = 'royalblue' if alignment == 'Far right' else 'crimson'
    subset_data = plot_data[plot_data['political_alignment'] == alignment].copy()
    subset_data['MA_observed'] = subset_data[outcome].rolling(window=3).mean()
    subset_data['MA_abs'] = subset_data['abs_residuals'].rolling(window=3).mean()

    fig, ax = plt.subplots(figsize=(24, 4))

    sns.lineplot(data=subset_data, x='month', y=outcome, ax=ax, label='Observed values', alpha=0.15, color=color, linestyle='-')
    sns.lineplot(data=subset_data, x='month', y='MA_observed', ax=ax, label='12 months moving average for OV', color=color, linestyle='dashdot')
    sns.lineplot(data=subset_data, x='month', y='y_norm', ax=ax, label='Predicted values', color='teal', linestyle='dotted')
    ax.set_title("Observed and Predicted Values")
    ax.set_ylabel('')
    ax.set_xlabel('')

    add_shaded_periods([ax], main_elec_months, color='black', alpha=0.1)

    plt.suptitle(f"Proportion of Mentions Attributed to {alignment} Politicians")
    plt.tight_layout()
    plt.savefig(f"output/{alignment}_mention_graph.png", dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(24, 16), sharex=True)

for ax, (political_alignments, colors) in zip(axes, alignment_groups):
    alignment_handles = []

    for alignment in political_alignments:
        subset_data = plot_data[plot_data['political_alignment'] == alignment].copy()
        subset_data['MA'] = subset_data[outcome].rolling(window=4).mean()
        
        ax.plot(subset_data['month'], subset_data[outcome], label=None,
                alpha=0.3, color=colors[alignment], linestyle='-')
        ax.plot(subset_data['month'], subset_data['MA'], label=None,
                alpha=0.65, color=colors[alignment], linestyle='dashdot')
        ax.plot(subset_data['month'], subset_data['y_norm'], label=None,
                alpha=1, color=colors[alignment], linestyle='dotted')
        
        alignment_handles.append(Line2D([0], [0], color=colors[alignment], lw=2, label=alignment))

    alignment_legend = ax.legend(handles=alignment_handles, title="Political alignment", loc="upper left")
    ax.add_artist(alignment_legend)

    line_type_handles = [
        Line2D([0], [0], color='black', lw=2, linestyle='-', label="Monthly average"),
        Line2D([0], [0], color='black', lw=2, linestyle='dashdot', label="6 months moving average"),
        Line2D([0], [0], color='black', lw=2, linestyle='dotted', label="Predictions")]
    ax.legend(handles=line_type_handles, title="Values", loc="upper right")

axes[-1].set_xlabel("")

add_shaded_periods(axes, main_elec_months, color='black', alpha=0.1)

plt.suptitle("Mentions Distribution by Political Affiliation\nObserved vs. Predicted Values")
plt.tight_layout()
plt.savefig("output/nuances_mentions_val_graph.png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(24, 12), sharex=True)

for ax, (political_alignments, colors) in zip(axes, alignment_groups):
    alignment_handles = []

    for alignment in political_alignments:
        subset_data = plot_data[plot_data['political_alignment'] == alignment].copy()
        subset_data['MA'] = subset_data['abs_residuals'].rolling(window=6).mean()
        
        ax.plot(subset_data['month'], subset_data['abs_residuals'], label=None,
                alpha=0.2, color=colors[alignment], linestyle='-')
        ax.plot(subset_data['month'], subset_data['MA'], label=None,
                alpha=0.65, color=colors[alignment], linestyle='-.')
        ax.plot(subset_data['month'], [0] * len(subset_data), label=None,
                alpha=1, color='darkgray', linestyle=':')
        
        alignment_handles.append(Line2D([0], [0], color=colors[alignment], lw=2, label=alignment))

    alignment_legend = ax.legend(handles=alignment_handles, title="Political alignment", loc="upper left")
    ax.add_artist(alignment_legend)

    line_type_handles = [
        Line2D([0], [0], color='black', lw=2, linestyle='-', label="Monthly average"),
        Line2D([0], [0], color='black', lw=2, linestyle='-.', label="6 months moving average"),
        Line2D([0], [0], color='black', lw=2, linestyle=':', label="Predictions")
    ]
    ax.legend(handles=line_type_handles, title="Values", loc="upper right")

axes[-1].set_xlabel("")

add_shaded_periods(axes, main_elec_months, color='black', alpha=0.1)

plt.suptitle("Mentions Distribution by Political Affiliation\nAbsolute Residuals (%)")
plt.tight_layout()
plt.savefig("output/nuances_mentions_abs_graph.png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
def compute_proportional_metrics(y_norm, y_true):
    y_norm = np.array(y_norm)
    y_true = np.array(y_true)
    
    # Initialize arrays
    TP = np.zeros_like(y_norm)
    TN = np.zeros_like(y_norm)
    FP = np.zeros_like(y_norm)
    FN = np.zeros_like(y_norm)
    
    # Calculate difference
    diff = y_norm - y_true
    
    # Case 1: y_norm - y_true = 0 (perfect prediction)
    perfect_mask = (diff == 0)
    TP[perfect_mask] = 1
    FP[perfect_mask] = 0
    FN[perfect_mask] = 0
    
    # Case 2: y_norm - y_true > 0 (over-prediction)
    over_mask = (diff > 0)
    TP[over_mask] = y_true[over_mask] / y_norm[over_mask]
    FP[over_mask] = diff[over_mask] / y_norm[over_mask]
    FN[over_mask] = 0
    
    # Case 3: y_norm - y_true < 0 (under-prediction)
    under_mask = (diff < 0)
    TP[under_mask] = 1
    FN[under_mask] = - diff[under_mask] / y_true[under_mask]
    FP[under_mask] = 0
    
    return {
        'TP': TP.mean(),
        'FP': FP.mean(),
        'FN': FN.mean()}

In [None]:
cutoff1 = pd.Period('2002-06', freq='M')
cutoff2 = pd.Period('2017-06', freq='M')

results = []

for period_label, period_filter in {
    '1981-2002': model_data_no_journal["month"] < cutoff1,
    '2002-2017': (model_data_no_journal["month"] >= cutoff1) & (model_data_no_journal["month"] < cutoff2),
    '2017-2024': model_data_no_journal["month"] >= cutoff2
}.items():
    period_data = model_data_no_journal[period_filter]
    
    for alignment in period_data["political_alignment"].unique():

        subset_data = period_data[period_data["political_alignment"] == alignment]
        y = subset_data[outcome]
        y_norm = subset_data['y_norm']
        metrics = compute_proportional_metrics(y_norm, y)
        results.append({
            'period': period_label,
            'alignment': alignment,
            '% correct predictions': 100 * metrics['TP'],
            '% excess predictions': 100 * metrics['FP'],
            '% missing predictions': 100 * metrics['FN']})

summary = pd.DataFrame(results)
summary["alignment"] = pd.Categorical(summary["alignment"], categories=nuances_order, ordered=True)
summary = summary.pivot_table(index="alignment",
                                    columns="period",
                                    values=["% correct predictions",
                                            "% excess predictions",
                                            "% missing predictions"],
                                    observed=False)
summary = summary.reset_index()
summary.columns.names = [None, None]
summary.style.hide(axis="index").format({col: "{:.3f}" for col in summary.columns[1:]})

# 2. Ecarts à la norme de représentativité selon les journaux

In [None]:
model_data['y_norm'] = compute_y_pred(
    model_data,
    n, alpha, beta, gamma, theta,
    delta_pre_5, delta_pre_4, delta_pre_3, delta_pre_2, delta_pre_1, delta_pre_0,
    lambda_1, lambda_2)
plot_data = model_data.copy()
plot_data['abs_residuals'] = 100 * (plot_data[outcome] - plot_data['y_norm'])
plot_data['month'] = plot_data['month'].dt.to_timestamp()

colors = {
    'Le Figaro': 'goldenrod',
    'Libération': 'limegreen',
    'Le Monde': 'orchid',
    'La Croix': 'skyblue',
    'Médiapart': 'crimson'}

alignments = [
    "Far right",
    "Right",
    "Center",
    "Left",
    "Far left"]

n_alignments = len(alignments)

In [None]:
fig, axes = plt.subplots(n_alignments, 1, figsize=(24, 4 * n_alignments), sharex=True)

for i, alignment in enumerate(alignments):
    ax = axes[i]
    subset_data = plot_data[plot_data['political_alignment'] == alignment]

    for journal in subset_data['journal'].unique():
        if journal == 'Médiapart': continue
        sub_subset_data = subset_data[subset_data['journal'] == journal].copy()
        sub_subset_data['MA'] = sub_subset_data[outcome].rolling(window=12).mean()
        ax.plot(sub_subset_data['month'], sub_subset_data[outcome], label=journal,
                alpha=0.7, color=colors[journal], linestyle='-')

    ax.plot(subset_data['month'], subset_data['y_norm'], color='black', alpha=0.8, linestyle='dotted')
    ax.set_title(f"{alignment}")
    ax.legend()

add_shaded_periods(axes, main_elec_months, color='black', alpha=0.1)

plt.suptitle("""
Mentions Distribution by Political Affiliation and Journal\n
Observed vs. Predicted Values
""")
plt.tight_layout()
plt.savefig("output/journals_mentions_val_graph.png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
cutoff1 = pd.Period('2002-06', freq='M')
cutoff2 = pd.Period('2017-06', freq='M')

results = []

for period_label, period_filter in {
    '1981-2002': model_data["month"] < cutoff1,
    '2002-2017': (model_data["month"] >= cutoff1) & (model_data["month"] < cutoff2),
    '2017-2024': model_data["month"] >= cutoff2
}.items():
    period_data = model_data[period_filter]
    
    for alignment in period_data["political_alignment"].unique():
        subset_data = period_data[period_data["political_alignment"] == alignment]
        
        for journal in subset_data["journal"].unique():
            sub_subset_data = subset_data[subset_data["journal"] == journal]
            y = sub_subset_data[outcome]
            y_norm = sub_subset_data['y_norm']
            metrics = compute_proportional_metrics(y_norm, y)
            results.append({
                'period': period_label,
                'alignment': alignment,
                'journal': journal,
                '% correct prescriptions': 100 * metrics['TP'],
                '% excess prescriptions': 100 * metrics['FP'],
                '% missing prescriptions': 100 * metrics['FN']})

summary = pd.DataFrame(results)
summary["alignment"] = pd.Categorical(summary["alignment"], categories=nuances_order, ordered=True)
summary = summary.pivot_table(index=(["alignment", "journal"]),
                                    columns="period",
                                    values=["% correct prescriptions",
                                            "% excess prescriptions",
                                            "% missing prescriptions"],
                                    observed=False)
summary = summary.reset_index()
summary.style.hide(axis="index").format({col: "{:.3f}" for col in summary.columns[2:]})