In [None]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

from sklearn.metrics import r2_score

In [None]:
model_data_no_journal = pd.read_parquet("data/model_data_no_journal.parquet")
model_data_no_journal = model_data_no_journal[model_data_no_journal['political_alignment'] != 'autre']
model_data_no_journal['next_pres_votes_share'] = model_data_no_journal['pres_dummy'] * model_data_no_journal['pres_votes_share']
model_data_no_journal["election_year"] = model_data_no_journal["last_election"].dt.year
model_data_no_journal["decade"] = (model_data_no_journal["month"].dt.year // 10) * 10

model_data = pd.read_parquet("data/model_data.parquet")
model_data = model_data[model_data['political_alignment'] != 'autre']
model_data['next_pres_votes_share'] = model_data['pres_dummy'] * model_data['pres_votes_share']
model_data["election_year"] = model_data["last_election"].dt.year
model_data["decade"] = (model_data["month"].dt.year // 10) * 10

In [None]:
outcome = "quotes_nb"

Tentative de modélisation directe du nombre de citations.

In [None]:
model = smf.ols(f"{outcome} ~ C(election_year) + journal + political_alignment + na_share + leg_votes_share + pres_votes_share + government", 
                data=model_data).fit(cov_type='HC3')
y = model_data[outcome]
y_pred = model.predict(model_data)

r2 = r2_score(y, y_pred)
print(f"R2: {100*r2:.2f}%")
print("")

params = model.params.rename("coef").to_frame()
pvalues = model.pvalues.rename("pval").to_frame()
results = pd.merge(params, pvalues, left_index=True, right_index=True)
results = results[results['pval'] <= 0.05]
results.style

On se heurte à la difficulté prévue : la tendance étant forte et indépendante des régresseurs, les effets fixes capturent l'essentiel de la dynamique. Les coefficients des régresseurs sont à la limite de la significativité, et pas forcément interprétables...

In [None]:
model = smf.ols(f"{outcome} ~ C(election_year) * political_alignment + na_share + journal + leg_votes_share + pres_votes_share + government - 1", 
                data=model_data).fit(cov_type='HC3')
y = model_data[outcome]
y_pred = model.predict(model_data)

r2 = r2_score(y, y_pred)
print(f"R2: {100*r2:.2f}%")
print("")

params = model.params.rename("coef").to_frame()
pvalues = model.pvalues.rename("pval").to_frame()
results = pd.merge(params, pvalues, left_index=True, right_index=True)
results = results[results['pval'] <= 0.05]
results.style

L'ajout de termes d'interaction, qui permettraient de refléter une éventuelle normalisation de l'extrême droite, ne modifie pas les résultats.