In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

In [2]:
df=pd.read_csv("Biomedic_clean3.csv")

In [3]:
df.columns = ['Unnamed: 0.1', 'ID', 'ID Name', 'Souches', 'Diabetes', 'Hypertension',
       'Hospital_before', 'Infection_Freq', 'AMX', 'AMC', 'CZ', 'FOX',
       'CTX', 'IPM', 'GEN', 'AN', 'Acide_nalidixique', 'ofx', 'CIP', 'C',
       'Co_trimoxazole', 'Furanes', 'colistine', 'Collection_Date',
       'Etat_Complet', 'Pays', 'Ville', 'age', 'gender', 'Age_Group',
       'Unnamed: 0', 'Population', 'Temp. Min Moy. Annuelle (°C)',
       'Temp. Moy. Annuelle (°C)', 'Temp. Max Moy. Annuelle (°C)',
       'Humidité Moyenne (%)', 'Ensoleillement Moyen (heures/an)',
       'Pluviométrie Moyenne (mm/an)', 'age_cat', 'Mobility', 'Multi_res']

In [4]:
atb = ["AMX", 'AMC', 'CZ', 'FOX','CTX', 'IPM', 'GEN', 'AN', 'Acide_nalidixique', 'ofx', 'CIP', 'C','Co_trimoxazole', 'Furanes', 'colistine']


On va réunir les intermédiaires et les sensibles pour simplifier les calculs.

In [5]:
df[atb] = df[atb].apply(lambda x: x.map({1:1, 0.5:0, 0:0}))

In [6]:
atb = ["AMX", 'AMC', 'CZ', 'FOX','CTX', 'IPM', 'GEN', 'AN', 'Acide_nalidixique', 'ofx', 'CIP', 'C','Co_trimoxazole', 'Furanes', 'colistine']
souches = df['Souches'].unique()

pvals=[]

for s in souches:
    df_s = df[df['Souches'] == s]

    for ab in atb:

        # Vérifications : nécessité de variabilité
        if df_s[ab].nunique() < 2:
            continue
        if df_s["Diabetes"].nunique() < 2:
            continue
        if df_s["Hypertension"].nunique() < 2:
            continue
        
        try:
            model = smf.logit(
                f"{ab} ~ Diabetes * Hypertension",
                data=df_s
            ).fit_regularized(disp=False)  # évite les singularités

            p = model.pvalues

            pvals.append({
                "Souche": s,
                "Antibiotic": ab,
                "Diabetes": p["Diabetes[T.True]"],
                "Hypertension": p["Hypertension[T.True]"],
                "Interaction": p["Diabetes[T.True]:Hypertension[T.True]"]
            })

        except Exception as e:
            print(f"Erreur pour {s}, {ab} : {e}")
            continue

pvals_df = pd.DataFrame(pvals)
pvals_df

Unnamed: 0,Souche,Antibiotic,Diabetes,Hypertension,Interaction
0,ESCHERICHIA COLI,AMX,0.406021,0.169816,0.159228
1,ESCHERICHIA COLI,AMC,0.162813,0.192777,0.634718
2,ESCHERICHIA COLI,CZ,0.813358,0.109797,0.456994
3,ESCHERICHIA COLI,FOX,0.302678,0.411003,0.891400
4,ESCHERICHIA COLI,CTX,0.726578,0.928480,0.522193
...,...,...,...,...,...
130,ACINETOBACTER BAUMANNII,CIP,0.171913,0.999740,0.999712
131,ACINETOBACTER BAUMANNII,C,0.761001,0.230101,0.917879
132,ACINETOBACTER BAUMANNII,Co_trimoxazole,0.519165,0.667772,0.999746
133,ACINETOBACTER BAUMANNII,Furanes,0.212455,0.556735,0.628278


In [8]:
impact_diabetes = pvals_df[['Souche', 'Antibiotic','Diabetes']].pivot_table(index='Souche', columns='Antibiotic', values='Diabetes')
impact_hypertension = pvals_df[['Souche', 'Antibiotic','Hypertension']].pivot_table(index='Souche', columns="Antibiotic", values='Hypertension')
impact_interaction = pvals_df[['Souche', 'Antibiotic','Interaction']].pivot_table(index='Souche', columns="Antibiotic", values='Interaction')

In [None]:
for ab in atb:
    if impact_diabetes[ab] < 0.05:
        impact_diabetes[ab] = impact_diabetes[ab].replace(impact_diabetes[ab], 'Significant')

In [28]:
impact_diabetes[atb] = impact_diabetes[atb]<0.05
impact_hypertension[atb] = impact_hypertension[atb]<0.05
impact_interaction[atb] = impact_interaction[atb]<0.05

In [29]:
pvals_df.to_csv("pvals_regression_logistique.csv", index=False)
impact_diabetes.reset_index().to_csv("impact_diabetes.csv", index=False)
impact_hypertension.reset_index().to_csv("impact_hypertension.csv", index=False) 
impact_interaction.reset_index().to_csv("impact_interaction.csv", index=False)