In [1]:
import numpy as np
from scipy.stats import entropy
import pandas as pd
import math
import matplotlib.pyplot as plt
import wooldridge as woo
import statsmodels.formula.api as smf

# Step 0 : We import the necessary files

In [2]:
dataFF_red_norm01 = pd.read_csv('dataFF_red_norm01.csv')
dataFF_red_norm_gauss = pd.read_csv('dataFF_red_norm_gauss.csv')

# Step 2 : We do regressions

In [3]:
reg = smf.ols(formula='Nb_carac_présentation~ Nb_doct_2km+Nb_doct_8km+Nb_doct_30km', data=dataFF_red_norm01)
results = reg.fit()
b = results.params
print(f'b: \n{b}\n')

"""# scatter plot and fitted values:
plt.plot('APL', 'Nb_carac_présentation', data=dataFF_red_norm, color='grey', marker='o', linestyle='')
plt.plot(dataFF_red_norm['APL'], results.fittedvalues, color='black', linestyle='-')
plt.ylabel('Nb_caracteres_presentation')
plt.xlabel('APL')"""

b: 
Intercept       0.053322
Nb_doct_2km    -0.044071
Nb_doct_8km     0.083637
Nb_doct_30km   -0.002200
dtype: float64



"# scatter plot and fitted values:\nplt.plot('APL', 'Nb_carac_présentation', data=dataFF_red_norm, color='grey', marker='o', linestyle='')\nplt.plot(dataFF_red_norm['APL'], results.fittedvalues, color='black', linestyle='-')\nplt.ylabel('Nb_caracteres_presentation')\nplt.xlabel('APL')"

In [13]:
# print results using summary:
print(f'results.summary(): \n{results.summary()}\n')

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')

results.summary(): 
                            OLS Regression Results                            
Dep. Variable:          Nb_doct_D_8km   R-squared:                       0.070
Model:                            OLS   Adj. R-squared:                  0.069
Method:                 Least Squares   F-statistic:                     155.0
Date:                Mon, 21 Oct 2024   Prob (F-statistic):           6.89e-97
Time:                        16:34:39   Log-Likelihood:                 20887.
No. Observations:                6204   AIC:                        -4.177e+04
Df Residuals:                    6200   BIC:                        -4.174e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                                                             coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------

In [16]:
reg = smf.ols(formula='sum~ Nb_doct_2km+Nb_doct_8km+Nb_doct_30km+Nb_doct_f_2km+Nb_doct_f_8km+Nb_doct_f_30km', data=dataFF_red_norm_gauss)
results = reg.fit()
b = results.params
print(f'b: \n{b}\n')

# Extraire les résultats pour les mettre en LaTeX
params = results.params
bse = results.bse
tvalues = results.tvalues
pvalues = results.pvalues

# Générer un tableau LaTeX
latex_table = f"""
\\begin{{table}}[htbp]
    \\centering
    \\begin{{tabular}}{{lcccc}}
        \\hline
        \\textbf{{Variable}} & \\textbf{{Coefficient}} & \\textbf{{Erreur standard}} & \\textbf{{t-stat}} & \\textbf{{p-valeur}} \\\\
        \\hline
        Constante & {params.iloc[0]:.4f} & {bse.iloc[0]:.4f} & {tvalues.iloc[0]:.4f} & {pvalues.iloc[0]:.4f} \\\\
        X1 & {params.iloc[1]:.4f} & {bse.iloc[1]:.4f} & {tvalues.iloc[1]:.4f} & {pvalues.iloc[1]:.4f} \\\\
        X2 & {params.iloc[2]:.4f} & {bse.iloc[2]:.4f} & {tvalues.iloc[2]:.4f} & {pvalues.iloc[2]:.4f} \\\\
        \\hline
    \\end{{tabular}}
    \\caption{{Résultats de la régression linéaire}}
    \\label{{tab:regression_results}}
\\end{{table}}
"""

# Afficher le tableau LaTeX
print(latex_table)


b: 
Intercept          -0.275568
Nb_doct_2km      -382.532551
Nb_doct_8km       -69.634074
Nb_doct_30km       -1.580851
Nb_doct_f_2km     693.034708
Nb_doct_f_8km     161.105423
Nb_doct_f_30km      0.886223
dtype: float64


\begin{table}[htbp]
    \centering
    \begin{tabular}{lcccc}
        \hline
        \textbf{Variable} & \textbf{Coefficient} & \textbf{Erreur standard} & \textbf{t-stat} & \textbf{p-valeur} \\
        \hline
        Constante & -0.2756 & 0.1477 & -1.8663 & 0.0620 \\
        X1 & -382.5326 & 126.7427 & -3.0182 & 0.0026 \\
        X2 & -69.6341 & 34.2959 & -2.0304 & 0.0424 \\
        \hline
    \end{tabular}
    \caption{Résultats de la régression linéaire}
    \label{tab:regression_results}
\end{table}



In [6]:
# print results using summary:
print(f'results.summary(): \n{results.summary()}\n')

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')
#vote1.describe()

results.summary(): 
                            OLS Regression Results                            
Dep. Variable:                    sum   R-squared:                       0.012
Model:                            OLS   Adj. R-squared:                  0.011
Method:                 Least Squares   F-statistic:                     12.28
Date:                Mon, 21 Oct 2024   Prob (F-statistic):           8.70e-14
Time:                        16:32:42   Log-Likelihood:                -22753.
No. Observations:                6204   AIC:                         4.552e+04
Df Residuals:                    6197   BIC:                         4.557e+04
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept         -0.275

## Regression sur l'instrument

In [7]:
dataFF_red_norm01.columns

Index(['Unnamed: 0', 'Nb_Skills', 'moyens_paiement', 'carte_vitale', 'site',
       'Nb_carac_présentation', 'Nb_langues', 'Carac_card_1', 'Transport',
       'Parking', 'Coordonnées', 'Sans_RDV', 'nb_terms_card_0',
       'nb_terms_card_3', 'nb_terms_card_4', 'nb_carac_card_0',
       'nb_carac_card_3', 'nb_carac_card_4',
       'Diplômes_nationaux_et_universitaires_b', 'Autres_formations_b',
       'Expériences_b', 'Formations_b', 'Travaux_et_publications_b', 'RPPS',
       'share_F',
       'Population standardisée par la consommation de soins par tranche d'âge',
       'secteur', 'Nb_doct_2km', 'Nb_doct_8km', 'Nb_doct_30km',
       'Nb_doct_f_2km', 'Nb_doct_f_8km', 'Nb_doct_f_30km', 'Nb_doct_D_2km',
       'Nb_doct_D_8km', 'Nb_doct_D_30km', 'Nb_doct_f_D_2km', 'Nb_doct_f_D_8km',
       'Nb_doct_f_D_30km', 'APL', 'Fibre', 'share_Doctolib'],
      dtype='object')

In [8]:
dataFF_red_norm01=dataFF_red_norm01.rename(columns={"Population standardisée par la consommation de soins par tranche d'âge": "Population_standardisee_par_la_consommation_de_soins_par_tranche_d_age"})

In [9]:
reg = smf.ols(formula='share_Doctolib~Fibre+share_F+Population_standardisee_par_la_consommation_de_soins_par_tranche_d_age', data=dataFF_red_norm01)
results = reg.fit()
b = results.params
print(f'b: \n{b}\n')

b: 
Intercept                                                                 4.843796e-01
Fibre                                                                    -2.923922e-01
share_F                                                                  -7.325937e-03
Population_standardisee_par_la_consommation_de_soins_par_tranche_d_age   -8.683935e-07
dtype: float64



In [10]:
# print results using summary:
print(f'results.summary(): \n{results.summary()}\n')

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')
#vote1.describe()

results.summary(): 
                            OLS Regression Results                            
Dep. Variable:         share_Doctolib   R-squared:                       0.153
Model:                            OLS   Adj. R-squared:                  0.153
Method:                 Least Squares   F-statistic:                     373.5
Date:                Mon, 21 Oct 2024   Prob (F-statistic):          5.67e-223
Time:                        16:32:42   Log-Likelihood:                 1460.7
No. Observations:                6204   AIC:                            -2913.
Df Residuals:                    6200   BIC:                            -2886.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                                                             coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------

the instrument is sufficiently strong/relevant to identify the endogenous variable if the first-stage f-statistic is larger than 10 and here F-statistic = 0.153, the instrumebt is then relevant

In [11]:
reg = smf.ols(formula='Nb_doct_D_2km~Fibre+share_F+Population_standardisee_par_la_consommation_de_soins_par_tranche_d_age', data=dataFF_red_norm01)
results = reg.fit()
b = results.params
#print(f'b: \n{b}\n')

# print results using summary:
print(f'results.summary(): \n{results.summary()}\n')

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')
#vote1.describe()

results.summary(): 
                            OLS Regression Results                            
Dep. Variable:          Nb_doct_D_2km   R-squared:                       0.052
Model:                            OLS   Adj. R-squared:                  0.051
Method:                 Least Squares   F-statistic:                     113.2
Date:                Mon, 21 Oct 2024   Prob (F-statistic):           2.31e-71
Time:                        16:32:42   Log-Likelihood:                 20836.
No. Observations:                6204   AIC:                        -4.166e+04
Df Residuals:                    6200   BIC:                        -4.164e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                                                             coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------

In [12]:
reg = smf.ols(formula='Nb_doct_D_8km~Fibre+share_F+Population_standardisee_par_la_consommation_de_soins_par_tranche_d_age', data=dataFF_red_norm01)
results = reg.fit()
b = results.params
#print(f'b: \n{b}\n')

# print results using summary:
print(f'results.summary(): \n{results.summary()}\n')

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')
#vote1.describe()

results.summary(): 
                            OLS Regression Results                            
Dep. Variable:          Nb_doct_D_8km   R-squared:                       0.070
Model:                            OLS   Adj. R-squared:                  0.069
Method:                 Least Squares   F-statistic:                     155.0
Date:                Mon, 21 Oct 2024   Prob (F-statistic):           6.89e-97
Time:                        16:32:42   Log-Likelihood:                 20887.
No. Observations:                6204   AIC:                        -4.177e+04
Df Residuals:                    6200   BIC:                        -4.174e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                                                             coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------