In [1]:
# Import librairies

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression

import plotly.express as px

In [2]:
# URL of the CSV file of INSEE data
insee_url = 'https://medical-deserts-project.s3.eu-north-1.amazonaws.com/insee_clean.csv'

# Read the CSV file from the URL into a DataFrame
insee_df_original = pd.read_csv(insee_url, sep = ',', encoding='utf-8')

In [37]:
insee_df = insee_df_original.copy()

In [38]:
pd.set_option("display.max_columns", None)

In [39]:
# Remove useless columns
insee_df = insee_df.drop(["APL aux médecins généralistes de 65 ans et moins", "APL aux médecins généralistes de 62 ans et moins"], axis=1)

# APL column at the end of dataset
APL_column = insee_df.pop("APL aux médecins généralistes (sans borne d'âge)")
insee_df["APL aux médecins généralistes (sans borne d'âge)"] = APL_column

# Rename APL column
insee_df.rename(columns={"APL aux médecins généralistes (sans borne d'âge)": "APL"}, inplace=True)


In [40]:
print('Number of rows :', insee_df.shape[0])
print('Number of columns :', insee_df.shape[1])
print()

# Show first rows of the dataset
print('First rows of the dataset :')
display(insee_df.head())
print()

# Dataset statistics
print('Basics statistics :')
summary_stats_all = insee_df.describe(include='all')
display(summary_stats_all)
print()

# Missing values percentage
missing_percentages = (insee_df.isna().mean() * 100).round(2)
print('Percentage of missing values: ')
print(missing_percentages)

Number of rows : 38590
Number of columns : 88

First rows of the dataset :


Unnamed: 0,Dynamique Entrepreneuriale,Dynamique Entrepreneuriale Service et Commerce,Synergie Médicale COMMUNE,SEG Croissance POP,Nb Omnipraticiens BV,Nb Infirmiers Libéraux BV,Nb dentistes Libéraux BV,Nb pharmaciens Libéraux BV,Densité Médicale BV,Score équipement de santé BV,Indice Démographique,Nb Ménages,Nb Résidences Principales,Nb propriétaire,Nb Logement,Nb Résidences Secondaires,Nb Log Vacants,Nb Occupants Résidence Principale,Nb Entreprises Secteur Services,Nb Entreprises Secteur Commerce,Nb Entreprises Secteur Construction,Nb Entreprises Secteur Industrie,Nb Création Enteprises,Nb Création Industrielles,Nb Création Construction,Nb Création Commerces,Nb Création Services,Moyenne Revenus Fiscaux Départementaux,Moyenne Revenus Fiscaux Régionaux,Dep Moyenne Salaires Horaires,Dep Moyenne Salaires Cadre Horaires,Dep Moyenne Salaires Prof Intermédiaire Horaires,Dep Moyenne Salaires Employé Horaires,Dep Moyenne Salaires Ouvrié Horaires,Reg Moyenne Salaires Horaires,Reg Moyenne Salaires Cadre Horaires,Reg Moyenne Salaires Prof Intermédiaire Horaires,Reg Moyenne Salaires Employé Horaires,Reg Moyenne Salaires Ouvrié Horaires,Valeur ajoutée régionale,Urbanité Ruralité,Nb Logement Secondaire et Occasionnel,Nb Hotel,Capacité Hotel,Nb Camping,Capacité Camping,Dynamique Démographique BV,Taux Propriété,Dynamique Démographique INSEE,Capacité Fisc,Moyenne Revnus fiscaux,"Nb Education, santé, action sociale",Nb Services personnels et domestiques,"Nb Santé, action sociale",Nb Industries des biens intermédiaires,Nb de Commerce,Nb de Services aux particuliers,"Nb institution de Education, santé, action sociale, administration",PIB Régionnal,Score Croissance Entrepreneuriale,Environnement Démographique,Fidélité,Seg Cap Fiscale,Seg Dyn Entre,DYN SetC,latitude,longitude,Catégorie commune dans aire d'attraction des villes 2020,Tranche détaillée d'aire d'attraction des villes 2020,Libellé degré de densité,Population en 2014 (princ),Pop 0-14 ans en 2014 (princ),Pop 15-29 ans en 2014 (princ),Pop 30-44 ans en 2014 (princ),Pop 45-59 ans en 2014 (princ),Pop 60-74 ans en 2014 (princ),Pop 75-89 ans en 2014 (princ),Pop 15 ans ou plus en 2014 (compl),Pop 15 ans ou plus Agriculteurs exploitants en 2014 (compl),"Pop 15 ans ou plus Artisans, Comm., Chefs entr. en 2014 (compl)","Pop 15 ans ou plus Cadres, Prof. intel. sup. en 2014 (compl)",Pop 15 ans ou plus Prof. intermédiaires en 2014 (compl),Pop 15 ans ou plus Employés en 2014 (compl),Pop 15 ans ou plus Ouvriers en 2014 (compl),Pop 15 ans ou plus Retraités en 2014 (compl),Pop 15 ans ou plus Autres en 2014 (compl),taux chômage(15-64 ans),APL
0,57.0,23.0,114,en croissance démographique,9,14,7,7,0.09286,4,44.19769,247,248,196,289,32,9,728,7.0,11.0,2.0,2.0,4.0,0.0,2.0,1.0,1.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,32.0,0.0,0.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,67,-1,117,11483.5,3.0,1.0,0.0,9364,9350,3372,15105,173681,0.01585,Bassin Industriel en croissance démographique,Pop Sédentaire,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,46.153721,4.92585,Commune de la couronne,Aire de moins de 10 000 habitants,Rural à habitat dispersé,767.0,161.0,102.0,132.0,189.0,125.0,53.0,605.0,15.0,20.0,75.0,95.0,100.0,125.0,145.0,30.0,8.776596,2.293
1,45.0,4.0,143,en croissance démographique,31,36,18,18,0.099229,4,10.18071,67,67,61,142,71,4,168,4.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,71.0,0.0,0.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,42,0,110,11483.5,0.0,0.0,0.0,9364,9350,3372,15105,173681,0.00173,Bassin Résidentiel en croissance démographique,Pop Sédentaire,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,46.009606,5.428088,Commune hors attraction des pôles,Commune hors attraction des villes,Rural à habitat dispersé,239.0,54.0,28.0,69.0,36.0,33.0,17.0,192.0,0.0,20.0,15.0,25.0,39.0,10.0,64.0,20.0,8.130081,2.6
2,634.0,828.0,366,en croissance démographique,31,36,18,18,0.099229,4,696.92134,4640,4635,1968,5184,135,414,11015,342.0,301.0,58.0,108.0,83.0,4.0,14.0,27.0,38.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com < 50 m habts,135.0,2.0,52.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,37,-55,250,11483.5,113.0,41.0,118.0,9364,9350,3372,15105,173681,0.38471,Bassin Résidentiel en croissance démographique,Pop Mobile,Fiscalité moyenne,Dynamique Economique,Bonne Dynamique Entreprise Serv et Com,45.961049,5.372275,Commune-centre,Aire de 30 000 à moins de 50 000 habitants,Centres urbains intermédiaires,14022.0,2778.0,2958.0,2642.0,2603.0,1853.0,1045.0,11228.0,3.0,334.0,743.0,1777.0,1918.0,1818.0,2878.0,1757.0,15.859777,4.079
3,113.0,62.0,132,en croissance démographique,12,12,6,6,1.0,1,85.774,473,473,344,505,14,18,1406,22.0,26.0,17.0,10.0,6.0,0.0,4.0,2.0,0.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,14.0,2.0,17.0,1.0,72.0,Grande Ville,68,-3,127,11483.5,5.0,2.0,7.0,9364,9350,3372,15105,173681,0.02824,Bassin Urbain en croissance démographique,Pop Mobile,Fiscalité moyenne,Moyenne dynamique,Faible Dynamique Serv et Com,45.996164,4.911967,Commune de la couronne,Aire de 1 000 000 d’habitants ou plus (hors Pa...,Bourgs ruraux,1627.0,336.0,251.0,323.0,376.0,232.0,99.0,1337.0,5.0,60.0,106.0,231.0,241.0,231.0,312.0,151.0,7.875895,4.378
4,42.0,1.0,121,en croissance démographique,26,21,10,10,0.100905,3,5.24276,41,41,28,57,13,3,86,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,13.0,0.0,0.0,0.0,0.0,3.Accroissement par excédent migratoire,49,0,109,11483.5,0.0,0.0,0.0,9364,9350,3372,15105,173681,0.0,Bassin Résidentiel en croissance démographique,Pop Sédentaire,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,45.749886,5.594585,Commune de la couronne,Aire de 20 000 à moins de 30 000 habitants,Rural à habitat dispersé,109.0,12.0,16.0,15.0,29.0,27.0,10.0,89.0,0.0,0.0,5.0,25.0,15.0,15.0,25.0,5.0,13.793103,1.069



Basics statistics :


Unnamed: 0,Dynamique Entrepreneuriale,Dynamique Entrepreneuriale Service et Commerce,Synergie Médicale COMMUNE,SEG Croissance POP,Nb Omnipraticiens BV,Nb Infirmiers Libéraux BV,Nb dentistes Libéraux BV,Nb pharmaciens Libéraux BV,Densité Médicale BV,Score équipement de santé BV,Indice Démographique,Nb Ménages,Nb Résidences Principales,Nb propriétaire,Nb Logement,Nb Résidences Secondaires,Nb Log Vacants,Nb Occupants Résidence Principale,Nb Entreprises Secteur Services,Nb Entreprises Secteur Commerce,Nb Entreprises Secteur Construction,Nb Entreprises Secteur Industrie,Nb Création Enteprises,Nb Création Industrielles,Nb Création Construction,Nb Création Commerces,Nb Création Services,Moyenne Revenus Fiscaux Départementaux,Moyenne Revenus Fiscaux Régionaux,Dep Moyenne Salaires Horaires,Dep Moyenne Salaires Cadre Horaires,Dep Moyenne Salaires Prof Intermédiaire Horaires,Dep Moyenne Salaires Employé Horaires,Dep Moyenne Salaires Ouvrié Horaires,Reg Moyenne Salaires Horaires,Reg Moyenne Salaires Cadre Horaires,Reg Moyenne Salaires Prof Intermédiaire Horaires,Reg Moyenne Salaires Employé Horaires,Reg Moyenne Salaires Ouvrié Horaires,Valeur ajoutée régionale,Urbanité Ruralité,Nb Logement Secondaire et Occasionnel,Nb Hotel,Capacité Hotel,Nb Camping,Capacité Camping,Dynamique Démographique BV,Taux Propriété,Dynamique Démographique INSEE,Capacité Fisc,Moyenne Revnus fiscaux,"Nb Education, santé, action sociale",Nb Services personnels et domestiques,"Nb Santé, action sociale",Nb Industries des biens intermédiaires,Nb de Commerce,Nb de Services aux particuliers,"Nb institution de Education, santé, action sociale, administration",PIB Régionnal,Score Croissance Entrepreneuriale,Environnement Démographique,Fidélité,Seg Cap Fiscale,Seg Dyn Entre,DYN SetC,latitude,longitude,Catégorie commune dans aire d'attraction des villes 2020,Tranche détaillée d'aire d'attraction des villes 2020,Libellé degré de densité,Population en 2014 (princ),Pop 0-14 ans en 2014 (princ),Pop 15-29 ans en 2014 (princ),Pop 30-44 ans en 2014 (princ),Pop 45-59 ans en 2014 (princ),Pop 60-74 ans en 2014 (princ),Pop 75-89 ans en 2014 (princ),Pop 15 ans ou plus en 2014 (compl),Pop 15 ans ou plus Agriculteurs exploitants en 2014 (compl),"Pop 15 ans ou plus Artisans, Comm., Chefs entr. en 2014 (compl)","Pop 15 ans ou plus Cadres, Prof. intel. sup. en 2014 (compl)",Pop 15 ans ou plus Prof. intermédiaires en 2014 (compl),Pop 15 ans ou plus Employés en 2014 (compl),Pop 15 ans ou plus Ouvriers en 2014 (compl),Pop 15 ans ou plus Retraités en 2014 (compl),Pop 15 ans ou plus Autres en 2014 (compl),taux chômage(15-64 ans),APL
count,38590.0,38590.0,38590.0,38590,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590,38590.0,38590.0,38590.0,38590.0,38590.0,38590,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590,38590,38590,38590,38590,38590.0,38590.0,38590,38590,38590,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0,38590.0
unique,,,,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6,,,,,,7,,,,,,,,,,,,,,7,2,3,3,3,,,5,17,7,,,,,,,,,,,,,,,,,,
top,,,,en croissance démographique,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Com rurale < 2 000 m habts,,,,,,Grande Ville,,,,,,,,,,,,,,Bassin Résidentiel en croissance démographique,Pop Sédentaire,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,,,Commune de la couronne,Commune hors attraction des villes,Rural à habitat dispersé,,,,,,,,,,,,,,,,,,
freq,,,,38579,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,30651,,,,,,9137,,,,,,,,,,,,,,17697,30448,30182,30481,32296,,,26427,9904,19922,,,,,,,,,,,,,,,,,,
mean,122.059031,131.970614,107.723218,,16.670951,16.542187,8.038248,8.038248,0.313389,2.527961,130.105343,887.09575,886.984789,457.078751,1074.334672,110.603265,76.74623,2080.99816,53.855895,48.923944,12.886033,17.892459,11.400985,0.693807,1.876315,2.931718,5.899145,10349.030319,10253.58681,10.996303,21.204149,12.464435,8.662656,9.053691,11.167877,21.281051,12.519946,8.686405,9.115599,41731.992599,,110.553537,0.672687,23.236927,0.278984,34.060767,,57.411791,-7.4383,8.614615,10301.308564,14.450998,4.66398,15.330707,3259.704172,4630.877611,1881.694195,8076.407126,81328.806841,0.045904,,,,,,46.974702,2.774471,,,,2403.482197,430.765924,453.174216,458.560041,467.76893,363.083519,202.945996,1972.782716,15.344312,67.002047,170.923555,275.980254,324.198575,253.273128,533.623063,332.432547,10.820448,3.292126
std,632.82766,948.912967,230.556624,,12.968793,14.425287,6.929272,6.929272,0.383459,1.320803,706.006127,5452.654165,5442.792045,2018.634015,6353.290844,683.544659,632.693386,11205.841451,388.455819,351.468468,62.453891,100.035842,82.514154,3.719735,13.865073,19.891236,46.36077,1564.368493,1086.882824,0.949628,0.807518,0.377483,0.244586,0.399395,0.750011,0.592066,0.298912,0.204727,0.295514,46193.354235,,683.459612,4.141992,209.61803,1.204422,246.912501,,15.61373,65.947977,2267.481665,1052.613147,112.080711,30.743394,113.227901,2586.215602,5041.672809,3397.38884,6993.429302,89322.777725,0.319206,,,,,,2.206645,2.69044,,,,12563.813825,2028.827534,3402.185602,2486.489904,2120.364937,1621.59859,983.293809,10559.841653,29.968274,310.565795,1410.3288,1566.104206,1708.012664,1065.921715,2372.100805,2401.530829,4.840921,1.279848
min,1.0,0.0,13.0,,1.0,0.0,0.0,0.0,0.032518,0.0,0.06096,1.0,1.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6282.0,7748.0,9.8625,19.75,11.74,8.261905,8.3,10.57451,20.509549,12.17538,8.453394,8.725,3021.411002,,0.0,0.0,0.0,0.0,0.0,,0.0,-1929.0,-140594.0,7664.0,0.0,0.0,0.0,30.0,342.0,187.0,828.0,6358.0,0.0,,,,,,41.435023,-5.086014,,,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,20.0,6.0,52.0,,9.0,8.0,4.0,4.0,0.083808,1.0,11.58284,74.0,74.0,58.0,103.0,9.0,5.0,189.0,3.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,9523.0,9716.0,10.4,20.686207,12.220833,8.51875,8.790698,10.679167,21.017098,12.339819,8.569608,8.915652,20389.52153,,9.0,0.0,0.0,0.0,0.0,,48.0,-1.0,72.0,9643.0,0.0,0.0,0.0,1935.0,2425.0,776.0,4406.0,40484.0,0.0,,,,,,45.182952,0.669468,,,,214.0,37.0,27.0,39.0,47.0,37.0,17.0,175.0,0.0,5.0,5.0,20.0,25.0,25.0,52.0,20.0,7.636364,2.507
50%,33.0,13.0,72.0,,12.0,12.0,6.0,6.0,0.104593,3.0,26.39669,166.0,166.0,127.0,219.0,23.0,13.0,428.0,6.0,5.0,4.0,3.0,2.0,0.0,0.0,0.0,1.0,10159.0,10458.0,10.708276,21.0525,12.437931,8.622581,9.038462,10.992609,21.101282,12.435217,8.634906,9.082927,30848.56372,,23.0,0.0,0.0,0.0,0.0,,59.0,0.0,81.0,10304.5,0.0,0.0,0.0,2576.0,3333.0,925.0,6903.0,55218.0,0.00346,,,,,,47.400393,2.683102,,,,507.5,94.0,68.0,97.0,110.0,84.0,39.0,410.0,8.0,17.0,20.0,53.0,65.0,63.0,119.0,48.0,10.090842,3.229
75%,75.0,46.0,105.0,,19.0,19.0,9.0,9.0,0.180691,4.0,68.03396,427.0,428.0,310.0,533.0,56.0,32.0,1094.0,20.0,16.0,9.0,10.0,5.0,0.0,1.0,1.0,2.0,11072.0,11150.0,11.321212,21.695833,12.640404,8.767647,9.2925,11.295855,21.640496,12.673103,8.783164,9.239535,38930.86531,,56.0,0.0,0.0,0.0,0.0,,69.0,0.0,96.0,10942.5,4.0,2.0,5.0,3268.0,4858.0,1552.0,8683.0,79920.0,0.01931,,,,,,48.82634,4.958665,,,,1339.0,253.0,187.0,255.0,286.0,222.0,111.0,1084.0,18.0,46.0,61.0,149.75,176.0,161.0,324.0,133.0,13.303515,4.009



Percentage of missing values: 
Dynamique Entrepreneuriale                        0.0
Dynamique Entrepreneuriale Service et Commerce    0.0
Synergie Médicale COMMUNE                         0.0
SEG Croissance POP                                0.0
Nb Omnipraticiens BV                              0.0
                                                 ... 
Pop 15 ans ou plus Ouvriers en 2014 (compl)       0.0
Pop 15 ans ou plus Retraités  en 2014 (compl)     0.0
Pop 15 ans ou plus Autres en 2014 (compl)         0.0
taux chômage(15-64 ans)                           0.0
APL                                               0.0
Length: 88, dtype: float64


In [41]:
insee_df.drop_duplicates(inplace = True)

In [42]:
insee_df.shape

(34760, 88)

In [43]:
# Separate target variable Y from features X
print("Separating labels from features...")
target_variable = "APL"

X = insee_df.drop(target_variable, axis = 1)
Y = insee_df.loc[:,target_variable]

print("...Done.")
print()

print('Y : ')
print(Y.head())
print()
print('X :')
X.head()

Separating labels from features...
...Done.

Y : 
0    2.293
1    2.600
2    4.079
3    4.378
4    1.069
Name: APL, dtype: float64

X :


Unnamed: 0,Dynamique Entrepreneuriale,Dynamique Entrepreneuriale Service et Commerce,Synergie Médicale COMMUNE,SEG Croissance POP,Nb Omnipraticiens BV,Nb Infirmiers Libéraux BV,Nb dentistes Libéraux BV,Nb pharmaciens Libéraux BV,Densité Médicale BV,Score équipement de santé BV,Indice Démographique,Nb Ménages,Nb Résidences Principales,Nb propriétaire,Nb Logement,Nb Résidences Secondaires,Nb Log Vacants,Nb Occupants Résidence Principale,Nb Entreprises Secteur Services,Nb Entreprises Secteur Commerce,Nb Entreprises Secteur Construction,Nb Entreprises Secteur Industrie,Nb Création Enteprises,Nb Création Industrielles,Nb Création Construction,Nb Création Commerces,Nb Création Services,Moyenne Revenus Fiscaux Départementaux,Moyenne Revenus Fiscaux Régionaux,Dep Moyenne Salaires Horaires,Dep Moyenne Salaires Cadre Horaires,Dep Moyenne Salaires Prof Intermédiaire Horaires,Dep Moyenne Salaires Employé Horaires,Dep Moyenne Salaires Ouvrié Horaires,Reg Moyenne Salaires Horaires,Reg Moyenne Salaires Cadre Horaires,Reg Moyenne Salaires Prof Intermédiaire Horaires,Reg Moyenne Salaires Employé Horaires,Reg Moyenne Salaires Ouvrié Horaires,Valeur ajoutée régionale,Urbanité Ruralité,Nb Logement Secondaire et Occasionnel,Nb Hotel,Capacité Hotel,Nb Camping,Capacité Camping,Dynamique Démographique BV,Taux Propriété,Dynamique Démographique INSEE,Capacité Fisc,Moyenne Revnus fiscaux,"Nb Education, santé, action sociale",Nb Services personnels et domestiques,"Nb Santé, action sociale",Nb Industries des biens intermédiaires,Nb de Commerce,Nb de Services aux particuliers,"Nb institution de Education, santé, action sociale, administration",PIB Régionnal,Score Croissance Entrepreneuriale,Environnement Démographique,Fidélité,Seg Cap Fiscale,Seg Dyn Entre,DYN SetC,latitude,longitude,Catégorie commune dans aire d'attraction des villes 2020,Tranche détaillée d'aire d'attraction des villes 2020,Libellé degré de densité,Population en 2014 (princ),Pop 0-14 ans en 2014 (princ),Pop 15-29 ans en 2014 (princ),Pop 30-44 ans en 2014 (princ),Pop 45-59 ans en 2014 (princ),Pop 60-74 ans en 2014 (princ),Pop 75-89 ans en 2014 (princ),Pop 15 ans ou plus en 2014 (compl),Pop 15 ans ou plus Agriculteurs exploitants en 2014 (compl),"Pop 15 ans ou plus Artisans, Comm., Chefs entr. en 2014 (compl)","Pop 15 ans ou plus Cadres, Prof. intel. sup. en 2014 (compl)",Pop 15 ans ou plus Prof. intermédiaires en 2014 (compl),Pop 15 ans ou plus Employés en 2014 (compl),Pop 15 ans ou plus Ouvriers en 2014 (compl),Pop 15 ans ou plus Retraités en 2014 (compl),Pop 15 ans ou plus Autres en 2014 (compl),taux chômage(15-64 ans)
0,57.0,23.0,114,en croissance démographique,9,14,7,7,0.09286,4,44.19769,247,248,196,289,32,9,728,7.0,11.0,2.0,2.0,4.0,0.0,2.0,1.0,1.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,32.0,0.0,0.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,67,-1,117,11483.5,3.0,1.0,0.0,9364,9350,3372,15105,173681,0.01585,Bassin Industriel en croissance démographique,Pop Sédentaire,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,46.153721,4.92585,Commune de la couronne,Aire de moins de 10 000 habitants,Rural à habitat dispersé,767.0,161.0,102.0,132.0,189.0,125.0,53.0,605.0,15.0,20.0,75.0,95.0,100.0,125.0,145.0,30.0,8.776596
1,45.0,4.0,143,en croissance démographique,31,36,18,18,0.099229,4,10.18071,67,67,61,142,71,4,168,4.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,71.0,0.0,0.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,42,0,110,11483.5,0.0,0.0,0.0,9364,9350,3372,15105,173681,0.00173,Bassin Résidentiel en croissance démographique,Pop Sédentaire,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,46.009606,5.428088,Commune hors attraction des pôles,Commune hors attraction des villes,Rural à habitat dispersé,239.0,54.0,28.0,69.0,36.0,33.0,17.0,192.0,0.0,20.0,15.0,25.0,39.0,10.0,64.0,20.0,8.130081
2,634.0,828.0,366,en croissance démographique,31,36,18,18,0.099229,4,696.92134,4640,4635,1968,5184,135,414,11015,342.0,301.0,58.0,108.0,83.0,4.0,14.0,27.0,38.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com < 50 m habts,135.0,2.0,52.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,37,-55,250,11483.5,113.0,41.0,118.0,9364,9350,3372,15105,173681,0.38471,Bassin Résidentiel en croissance démographique,Pop Mobile,Fiscalité moyenne,Dynamique Economique,Bonne Dynamique Entreprise Serv et Com,45.961049,5.372275,Commune-centre,Aire de 30 000 à moins de 50 000 habitants,Centres urbains intermédiaires,14022.0,2778.0,2958.0,2642.0,2603.0,1853.0,1045.0,11228.0,3.0,334.0,743.0,1777.0,1918.0,1818.0,2878.0,1757.0,15.859777
3,113.0,62.0,132,en croissance démographique,12,12,6,6,1.0,1,85.774,473,473,344,505,14,18,1406,22.0,26.0,17.0,10.0,6.0,0.0,4.0,2.0,0.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,14.0,2.0,17.0,1.0,72.0,Grande Ville,68,-3,127,11483.5,5.0,2.0,7.0,9364,9350,3372,15105,173681,0.02824,Bassin Urbain en croissance démographique,Pop Mobile,Fiscalité moyenne,Moyenne dynamique,Faible Dynamique Serv et Com,45.996164,4.911967,Commune de la couronne,Aire de 1 000 000 d’habitants ou plus (hors Pa...,Bourgs ruraux,1627.0,336.0,251.0,323.0,376.0,232.0,99.0,1337.0,5.0,60.0,106.0,231.0,241.0,231.0,312.0,151.0,7.875895
4,42.0,1.0,121,en croissance démographique,26,21,10,10,0.100905,3,5.24276,41,41,28,57,13,3,86,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,13.0,0.0,0.0,0.0,0.0,3.Accroissement par excédent migratoire,49,0,109,11483.5,0.0,0.0,0.0,9364,9350,3372,15105,173681,0.0,Bassin Résidentiel en croissance démographique,Pop Sédentaire,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,45.749886,5.594585,Commune de la couronne,Aire de 20 000 à moins de 30 000 habitants,Rural à habitat dispersé,109.0,12.0,16.0,15.0,29.0,27.0,10.0,89.0,0.0,0.0,5.0,25.0,15.0,15.0,25.0,5.0,13.793103


Dividing into train and test sets...
...Done.



In [44]:
# Automatically detect names of numeric/categorical columns
numeric_features = []
categorical_features = []
for i, t in X.dtypes.items():
    if ('float' in str(t)) or ('int' in str(t)):
        numeric_features.append(i)
    else:
        categorical_features.append(i)
print('Found numeric features ', numeric_features)
print('Found categorical features ', categorical_features)

Found numeric features  ['Dynamique Entrepreneuriale', 'Dynamique Entrepreneuriale Service et Commerce', 'Synergie Médicale COMMUNE', 'Nb Omnipraticiens BV', 'Nb Infirmiers Libéraux BV', 'Nb dentistes Libéraux BV', 'Nb pharmaciens Libéraux BV', 'Densité Médicale BV', 'Score équipement de santé BV', 'Indice Démographique', 'Nb Ménages', 'Nb Résidences Principales', 'Nb propriétaire', 'Nb Logement', 'Nb Résidences Secondaires', 'Nb Log Vacants', 'Nb Occupants Résidence Principale', 'Nb Entreprises Secteur Services', 'Nb Entreprises Secteur Commerce', 'Nb Entreprises Secteur Construction', 'Nb Entreprises Secteur Industrie', 'Nb Création Enteprises', 'Nb Création Industrielles', 'Nb Création Construction', 'Nb Création Commerces', 'Nb Création Services', 'Moyenne Revenus Fiscaux Départementaux', 'Moyenne Revenus Fiscaux Régionaux', 'Dep Moyenne Salaires Horaires', 'Dep Moyenne Salaires Cadre Horaires', 'Dep Moyenne Salaires Prof Intermédiaire Horaires', 'Dep Moyenne Salaires Employé Horai

In [45]:
for c in numeric_features:
    X[f"{c}^2"] = X[c]**2
    X[f"{c}^3"] = X[c]**3
    X[f"{c}^4"] = X[c]**4
    X[f"{c}^-1"] = 1/X[c]
    X[f"{c}^-2"] = 1/(X[c]**2)
X.head()


  X[f"{c}^-1"] = 1/X[c]
  X[f"{c}^-2"] = 1/(X[c]**2)
  X[f"{c}^2"] = X[c]**2
  X[f"{c}^3"] = X[c]**3
  X[f"{c}^4"] = X[c]**4
  X[f"{c}^-1"] = 1/X[c]
  X[f"{c}^-2"] = 1/(X[c]**2)
  X[f"{c}^2"] = X[c]**2
  X[f"{c}^3"] = X[c]**3
  X[f"{c}^4"] = X[c]**4
  X[f"{c}^-1"] = 1/X[c]
  X[f"{c}^-2"] = 1/(X[c]**2)
  X[f"{c}^2"] = X[c]**2
  X[f"{c}^3"] = X[c]**3
  X[f"{c}^4"] = X[c]**4
  X[f"{c}^-1"] = 1/X[c]
  X[f"{c}^-2"] = 1/(X[c]**2)
  X[f"{c}^2"] = X[c]**2
  X[f"{c}^3"] = X[c]**3
  X[f"{c}^4"] = X[c]**4
  X[f"{c}^-1"] = 1/X[c]
  X[f"{c}^-2"] = 1/(X[c]**2)
  X[f"{c}^2"] = X[c]**2
  X[f"{c}^3"] = X[c]**3
  X[f"{c}^4"] = X[c]**4
  X[f"{c}^-1"] = 1/X[c]
  X[f"{c}^-2"] = 1/(X[c]**2)
  X[f"{c}^2"] = X[c]**2
  X[f"{c}^3"] = X[c]**3
  X[f"{c}^4"] = X[c]**4
  X[f"{c}^-1"] = 1/X[c]
  X[f"{c}^-2"] = 1/(X[c]**2)
  X[f"{c}^2"] = X[c]**2
  X[f"{c}^3"] = X[c]**3
  X[f"{c}^4"] = X[c]**4
  X[f"{c}^-1"] = 1/X[c]
  X[f"{c}^-2"] = 1/(X[c]**2)
  X[f"{c}^2"] = X[c]**2
  X[f"{c}^3"] = X[c]**3
  X[f"{c}^4"] = X[c]**4


Unnamed: 0,Dynamique Entrepreneuriale,Dynamique Entrepreneuriale Service et Commerce,Synergie Médicale COMMUNE,SEG Croissance POP,Nb Omnipraticiens BV,Nb Infirmiers Libéraux BV,Nb dentistes Libéraux BV,Nb pharmaciens Libéraux BV,Densité Médicale BV,Score équipement de santé BV,Indice Démographique,Nb Ménages,Nb Résidences Principales,Nb propriétaire,Nb Logement,Nb Résidences Secondaires,Nb Log Vacants,Nb Occupants Résidence Principale,Nb Entreprises Secteur Services,Nb Entreprises Secteur Commerce,Nb Entreprises Secteur Construction,Nb Entreprises Secteur Industrie,Nb Création Enteprises,Nb Création Industrielles,Nb Création Construction,Nb Création Commerces,Nb Création Services,Moyenne Revenus Fiscaux Départementaux,Moyenne Revenus Fiscaux Régionaux,Dep Moyenne Salaires Horaires,Dep Moyenne Salaires Cadre Horaires,Dep Moyenne Salaires Prof Intermédiaire Horaires,Dep Moyenne Salaires Employé Horaires,Dep Moyenne Salaires Ouvrié Horaires,Reg Moyenne Salaires Horaires,Reg Moyenne Salaires Cadre Horaires,Reg Moyenne Salaires Prof Intermédiaire Horaires,Reg Moyenne Salaires Employé Horaires,Reg Moyenne Salaires Ouvrié Horaires,Valeur ajoutée régionale,Urbanité Ruralité,Nb Logement Secondaire et Occasionnel,Nb Hotel,Capacité Hotel,Nb Camping,Capacité Camping,Dynamique Démographique BV,Taux Propriété,Dynamique Démographique INSEE,Capacité Fisc,Moyenne Revnus fiscaux,"Nb Education, santé, action sociale",Nb Services personnels et domestiques,"Nb Santé, action sociale",Nb Industries des biens intermédiaires,Nb de Commerce,Nb de Services aux particuliers,"Nb institution de Education, santé, action sociale, administration",PIB Régionnal,Score Croissance Entrepreneuriale,Environnement Démographique,Fidélité,Seg Cap Fiscale,Seg Dyn Entre,DYN SetC,latitude,longitude,Catégorie commune dans aire d'attraction des villes 2020,Tranche détaillée d'aire d'attraction des villes 2020,Libellé degré de densité,Population en 2014 (princ),Pop 0-14 ans en 2014 (princ),Pop 15-29 ans en 2014 (princ),Pop 30-44 ans en 2014 (princ),Pop 45-59 ans en 2014 (princ),Pop 60-74 ans en 2014 (princ),Pop 75-89 ans en 2014 (princ),Pop 15 ans ou plus en 2014 (compl),Pop 15 ans ou plus Agriculteurs exploitants en 2014 (compl),"Pop 15 ans ou plus Artisans, Comm., Chefs entr. en 2014 (compl)","Pop 15 ans ou plus Cadres, Prof. intel. sup. en 2014 (compl)",Pop 15 ans ou plus Prof. intermédiaires en 2014 (compl),Pop 15 ans ou plus Employés en 2014 (compl),Pop 15 ans ou plus Ouvriers en 2014 (compl),Pop 15 ans ou plus Retraités en 2014 (compl),Pop 15 ans ou plus Autres en 2014 (compl),taux chômage(15-64 ans),Dynamique Entrepreneuriale^2,Dynamique Entrepreneuriale^3,Dynamique Entrepreneuriale^4,Dynamique Entrepreneuriale^-1,Dynamique Entrepreneuriale^-2,Dynamique Entrepreneuriale Service et Commerce^2,Dynamique Entrepreneuriale Service et Commerce^3,Dynamique Entrepreneuriale Service et Commerce^4,Dynamique Entrepreneuriale Service et Commerce^-1,Dynamique Entrepreneuriale Service et Commerce^-2,Synergie Médicale COMMUNE^2,Synergie Médicale COMMUNE^3,Synergie Médicale COMMUNE^4,Synergie Médicale COMMUNE^-1,Synergie Médicale COMMUNE^-2,Nb Omnipraticiens BV^2,Nb Omnipraticiens BV^3,Nb Omnipraticiens BV^4,Nb Omnipraticiens BV^-1,Nb Omnipraticiens BV^-2,Nb Infirmiers Libéraux BV^2,Nb Infirmiers Libéraux BV^3,Nb Infirmiers Libéraux BV^4,Nb Infirmiers Libéraux BV^-1,Nb Infirmiers Libéraux BV^-2,Nb dentistes Libéraux BV^2,Nb dentistes Libéraux BV^3,Nb dentistes Libéraux BV^4,Nb dentistes Libéraux BV^-1,Nb dentistes Libéraux BV^-2,Nb pharmaciens Libéraux BV^2,Nb pharmaciens Libéraux BV^3,Nb pharmaciens Libéraux BV^4,Nb pharmaciens Libéraux BV^-1,Nb pharmaciens Libéraux BV^-2,Densité Médicale BV^2,Densité Médicale BV^3,Densité Médicale BV^4,Densité Médicale BV^-1,Densité Médicale BV^-2,Score équipement de santé BV^2,Score équipement de santé BV^3,Score équipement de santé BV^4,Score équipement de santé BV^-1,Score équipement de santé BV^-2,Indice Démographique^2,Indice Démographique^3,Indice Démographique^4,Indice Démographique^-1,Indice Démographique^-2,Nb Ménages^2,Nb Ménages^3,Nb Ménages^4,Nb Ménages^-1,Nb Ménages^-2,Nb Résidences Principales^2,Nb Résidences Principales^3,Nb Résidences Principales^4,Nb Résidences Principales^-1,Nb Résidences Principales^-2,Nb propriétaire^2,Nb propriétaire^3,Nb propriétaire^4,Nb propriétaire^-1,Nb propriétaire^-2,Nb Logement^2,Nb Logement^3,Nb Logement^4,Nb Logement^-1,Nb Logement^-2,Nb Résidences Secondaires^2,Nb Résidences Secondaires^3,Nb Résidences Secondaires^4,Nb Résidences Secondaires^-1,Nb Résidences Secondaires^-2,Nb Log Vacants^2,Nb Log Vacants^3,Nb Log Vacants^4,Nb Log Vacants^-1,Nb Log Vacants^-2,Nb Occupants Résidence Principale^2,Nb Occupants Résidence Principale^3,Nb Occupants Résidence Principale^4,Nb Occupants Résidence Principale^-1,Nb Occupants Résidence Principale^-2,Nb Entreprises Secteur Services^2,Nb Entreprises Secteur Services^3,Nb Entreprises Secteur Services^4,Nb Entreprises Secteur Services^-1,Nb Entreprises Secteur Services^-2,Nb Entreprises Secteur Commerce^2,Nb Entreprises Secteur Commerce^3,Nb Entreprises Secteur Commerce^4,Nb Entreprises Secteur Commerce^-1,Nb Entreprises Secteur Commerce^-2,Nb Entreprises Secteur Construction^2,Nb Entreprises Secteur Construction^3,Nb Entreprises Secteur Construction^4,Nb Entreprises Secteur Construction^-1,Nb Entreprises Secteur Construction^-2,Nb Entreprises Secteur Industrie^2,Nb Entreprises Secteur Industrie^3,Nb Entreprises Secteur Industrie^4,Nb Entreprises Secteur Industrie^-1,Nb Entreprises Secteur Industrie^-2,Nb Création Enteprises^2,Nb Création Enteprises^3,Nb Création Enteprises^4,Nb Création Enteprises^-1,Nb Création Enteprises^-2,Nb Création Industrielles^2,Nb Création Industrielles^3,Nb Création Industrielles^4,Nb Création Industrielles^-1,Nb Création Industrielles^-2,Nb Création Construction^2,Nb Création Construction^3,Nb Création Construction^4,Nb Création Construction^-1,Nb Création Construction^-2,Nb Création Commerces^2,Nb Création Commerces^3,Nb Création Commerces^4,Nb Création Commerces^-1,Nb Création Commerces^-2,Nb Création Services^2,Nb Création Services^3,Nb Création Services^4,Nb Création Services^-1,Nb Création Services^-2,Moyenne Revenus Fiscaux Départementaux^2,Moyenne Revenus Fiscaux Départementaux^3,Moyenne Revenus Fiscaux Départementaux^4,Moyenne Revenus Fiscaux Départementaux^-1,Moyenne Revenus Fiscaux Départementaux^-2,Moyenne Revenus Fiscaux Régionaux^2,Moyenne Revenus Fiscaux Régionaux^3,Moyenne Revenus Fiscaux Régionaux^4,Moyenne Revenus Fiscaux Régionaux^-1,Moyenne Revenus Fiscaux Régionaux^-2,Dep Moyenne Salaires Horaires^2,Dep Moyenne Salaires Horaires^3,Dep Moyenne Salaires Horaires^4,Dep Moyenne Salaires Horaires^-1,Dep Moyenne Salaires Horaires^-2,Dep Moyenne Salaires Cadre Horaires^2,Dep Moyenne Salaires Cadre Horaires^3,Dep Moyenne Salaires Cadre Horaires^4,Dep Moyenne Salaires Cadre Horaires^-1,Dep Moyenne Salaires Cadre Horaires^-2,Dep Moyenne Salaires Prof Intermédiaire Horaires^2,Dep Moyenne Salaires Prof Intermédiaire Horaires^3,Dep Moyenne Salaires Prof Intermédiaire Horaires^4,Dep Moyenne Salaires Prof Intermédiaire Horaires^-1,Dep Moyenne Salaires Prof Intermédiaire Horaires^-2,Dep Moyenne Salaires Employé Horaires^2,Dep Moyenne Salaires Employé Horaires^3,Dep Moyenne Salaires Employé Horaires^4,Dep Moyenne Salaires Employé Horaires^-1,Dep Moyenne Salaires Employé Horaires^-2,Dep Moyenne Salaires Ouvrié Horaires^2,Dep Moyenne Salaires Ouvrié Horaires^3,Dep Moyenne Salaires Ouvrié Horaires^4,Dep Moyenne Salaires Ouvrié Horaires^-1,Dep Moyenne Salaires Ouvrié Horaires^-2,Reg Moyenne Salaires Horaires^2,Reg Moyenne Salaires Horaires^3,Reg Moyenne Salaires Horaires^4,Reg Moyenne Salaires Horaires^-1,Reg Moyenne Salaires Horaires^-2,Reg Moyenne Salaires Cadre Horaires^2,Reg Moyenne Salaires Cadre Horaires^3,Reg Moyenne Salaires Cadre Horaires^4,Reg Moyenne Salaires Cadre Horaires^-1,Reg Moyenne Salaires Cadre Horaires^-2,Reg Moyenne Salaires Prof Intermédiaire Horaires^2,Reg Moyenne Salaires Prof Intermédiaire Horaires^3,Reg Moyenne Salaires Prof Intermédiaire Horaires^4,Reg Moyenne Salaires Prof Intermédiaire Horaires^-1,Reg Moyenne Salaires Prof Intermédiaire Horaires^-2,Reg Moyenne Salaires Employé Horaires^2,Reg Moyenne Salaires Employé Horaires^3,Reg Moyenne Salaires Employé Horaires^4,Reg Moyenne Salaires Employé Horaires^-1,Reg Moyenne Salaires Employé Horaires^-2,Reg Moyenne Salaires Ouvrié Horaires^2,Reg Moyenne Salaires Ouvrié Horaires^3,Reg Moyenne Salaires Ouvrié Horaires^4,Reg Moyenne Salaires Ouvrié Horaires^-1,Reg Moyenne Salaires Ouvrié Horaires^-2,Valeur ajoutée régionale^2,Valeur ajoutée régionale^3,Valeur ajoutée régionale^4,Valeur ajoutée régionale^-1,Valeur ajoutée régionale^-2,Nb Logement Secondaire et Occasionnel^2,Nb Logement Secondaire et Occasionnel^3,Nb Logement Secondaire et Occasionnel^4,Nb Logement Secondaire et Occasionnel^-1,Nb Logement Secondaire et Occasionnel^-2,Nb Hotel^2,Nb Hotel^3,Nb Hotel^4,Nb Hotel^-1,Nb Hotel^-2,Capacité Hotel^2,Capacité Hotel^3,Capacité Hotel^4,Capacité Hotel^-1,Capacité Hotel^-2,Nb Camping^2,Nb Camping^3,Nb Camping^4,Nb Camping^-1,Nb Camping^-2,Capacité Camping^2,Capacité Camping^3,Capacité Camping^4,Capacité Camping^-1,Capacité Camping^-2,Taux Propriété^2,Taux Propriété^3,Taux Propriété^4,Taux Propriété^-1,Taux Propriété^-2,Dynamique Démographique INSEE^2,Dynamique Démographique INSEE^3,Dynamique Démographique INSEE^4,Dynamique Démographique INSEE^-1,Dynamique Démographique INSEE^-2,Capacité Fisc^2,Capacité Fisc^3,Capacité Fisc^4,Capacité Fisc^-1,Capacité Fisc^-2,Moyenne Revnus fiscaux^2,Moyenne Revnus fiscaux^3,Moyenne Revnus fiscaux^4,Moyenne Revnus fiscaux^-1,Moyenne Revnus fiscaux^-2,"Nb Education, santé, action sociale^2","Nb Education, santé, action sociale^3","Nb Education, santé, action sociale^4","Nb Education, santé, action sociale^-1","Nb Education, santé, action sociale^-2",Nb Services personnels et domestiques^2,Nb Services personnels et domestiques^3,Nb Services personnels et domestiques^4,Nb Services personnels et domestiques^-1,Nb Services personnels et domestiques^-2,"Nb Santé, action sociale^2","Nb Santé, action sociale^3","Nb Santé, action sociale^4","Nb Santé, action sociale^-1","Nb Santé, action sociale^-2",Nb Industries des biens intermédiaires^2,Nb Industries des biens intermédiaires^3,Nb Industries des biens intermédiaires^4,Nb Industries des biens intermédiaires^-1,Nb Industries des biens intermédiaires^-2,Nb de Commerce^2,Nb de Commerce^3,Nb de Commerce^4,Nb de Commerce^-1,Nb de Commerce^-2,Nb de Services aux particuliers^2,Nb de Services aux particuliers^3,Nb de Services aux particuliers^4,Nb de Services aux particuliers^-1,Nb de Services aux particuliers^-2,"Nb institution de Education, santé, action sociale, administration^2","Nb institution de Education, santé, action sociale, administration^3","Nb institution de Education, santé, action sociale, administration^4","Nb institution de Education, santé, action sociale, administration^-1","Nb institution de Education, santé, action sociale, administration^-2",PIB Régionnal^2,PIB Régionnal^3,PIB Régionnal^4,PIB Régionnal^-1,PIB Régionnal^-2,Score Croissance Entrepreneuriale^2,Score Croissance Entrepreneuriale^3,Score Croissance Entrepreneuriale^4,Score Croissance Entrepreneuriale^-1,Score Croissance Entrepreneuriale^-2,latitude^2,latitude^3,latitude^4,latitude^-1,latitude^-2,longitude^2,longitude^3,longitude^4,longitude^-1,longitude^-2,Population en 2014 (princ)^2,Population en 2014 (princ)^3,Population en 2014 (princ)^4,Population en 2014 (princ)^-1,Population en 2014 (princ)^-2,Pop 0-14 ans en 2014 (princ)^2,Pop 0-14 ans en 2014 (princ)^3,Pop 0-14 ans en 2014 (princ)^4,Pop 0-14 ans en 2014 (princ)^-1,Pop 0-14 ans en 2014 (princ)^-2,Pop 15-29 ans en 2014 (princ)^2,Pop 15-29 ans en 2014 (princ)^3,Pop 15-29 ans en 2014 (princ)^4,Pop 15-29 ans en 2014 (princ)^-1,Pop 15-29 ans en 2014 (princ)^-2,Pop 30-44 ans en 2014 (princ)^2,Pop 30-44 ans en 2014 (princ)^3,Pop 30-44 ans en 2014 (princ)^4,Pop 30-44 ans en 2014 (princ)^-1,Pop 30-44 ans en 2014 (princ)^-2,Pop 45-59 ans en 2014 (princ)^2,Pop 45-59 ans en 2014 (princ)^3,Pop 45-59 ans en 2014 (princ)^4,Pop 45-59 ans en 2014 (princ)^-1,Pop 45-59 ans en 2014 (princ)^-2,Pop 60-74 ans en 2014 (princ)^2,Pop 60-74 ans en 2014 (princ)^3,Pop 60-74 ans en 2014 (princ)^4,Pop 60-74 ans en 2014 (princ)^-1,Pop 60-74 ans en 2014 (princ)^-2,Pop 75-89 ans en 2014 (princ)^2,Pop 75-89 ans en 2014 (princ)^3,Pop 75-89 ans en 2014 (princ)^4,Pop 75-89 ans en 2014 (princ)^-1,Pop 75-89 ans en 2014 (princ)^-2,Pop 15 ans ou plus en 2014 (compl)^2,Pop 15 ans ou plus en 2014 (compl)^3,Pop 15 ans ou plus en 2014 (compl)^4,Pop 15 ans ou plus en 2014 (compl)^-1,Pop 15 ans ou plus en 2014 (compl)^-2,Pop 15 ans ou plus Agriculteurs exploitants en 2014 (compl)^2,Pop 15 ans ou plus Agriculteurs exploitants en 2014 (compl)^3,Pop 15 ans ou plus Agriculteurs exploitants en 2014 (compl)^4,Pop 15 ans ou plus Agriculteurs exploitants en 2014 (compl)^-1,Pop 15 ans ou plus Agriculteurs exploitants en 2014 (compl)^-2,"Pop 15 ans ou plus Artisans, Comm., Chefs entr. en 2014 (compl)^2","Pop 15 ans ou plus Artisans, Comm., Chefs entr. en 2014 (compl)^3","Pop 15 ans ou plus Artisans, Comm., Chefs entr. en 2014 (compl)^4","Pop 15 ans ou plus Artisans, Comm., Chefs entr. en 2014 (compl)^-1","Pop 15 ans ou plus Artisans, Comm., Chefs entr. en 2014 (compl)^-2","Pop 15 ans ou plus Cadres, Prof. intel. sup. en 2014 (compl)^2","Pop 15 ans ou plus Cadres, Prof. intel. sup. en 2014 (compl)^3","Pop 15 ans ou plus Cadres, Prof. intel. sup. en 2014 (compl)^4","Pop 15 ans ou plus Cadres, Prof. intel. sup. en 2014 (compl)^-1","Pop 15 ans ou plus Cadres, Prof. intel. sup. en 2014 (compl)^-2",Pop 15 ans ou plus Prof. intermédiaires en 2014 (compl)^2,Pop 15 ans ou plus Prof. intermédiaires en 2014 (compl)^3,Pop 15 ans ou plus Prof. intermédiaires en 2014 (compl)^4,Pop 15 ans ou plus Prof. intermédiaires en 2014 (compl)^-1,Pop 15 ans ou plus Prof. intermédiaires en 2014 (compl)^-2,Pop 15 ans ou plus Employés en 2014 (compl)^2,Pop 15 ans ou plus Employés en 2014 (compl)^3,Pop 15 ans ou plus Employés en 2014 (compl)^4,Pop 15 ans ou plus Employés en 2014 (compl)^-1,Pop 15 ans ou plus Employés en 2014 (compl)^-2,Pop 15 ans ou plus Ouvriers en 2014 (compl)^2,Pop 15 ans ou plus Ouvriers en 2014 (compl)^3,Pop 15 ans ou plus Ouvriers en 2014 (compl)^4,Pop 15 ans ou plus Ouvriers en 2014 (compl)^-1,Pop 15 ans ou plus Ouvriers en 2014 (compl)^-2,Pop 15 ans ou plus Retraités en 2014 (compl)^2,Pop 15 ans ou plus Retraités en 2014 (compl)^3,Pop 15 ans ou plus Retraités en 2014 (compl)^4,Pop 15 ans ou plus Retraités en 2014 (compl)^-1,Pop 15 ans ou plus Retraités en 2014 (compl)^-2,Pop 15 ans ou plus Autres en 2014 (compl)^2,Pop 15 ans ou plus Autres en 2014 (compl)^3,Pop 15 ans ou plus Autres en 2014 (compl)^4,Pop 15 ans ou plus Autres en 2014 (compl)^-1,Pop 15 ans ou plus Autres en 2014 (compl)^-2,taux chômage(15-64 ans)^2,taux chômage(15-64 ans)^3,taux chômage(15-64 ans)^4,taux chômage(15-64 ans)^-1,taux chômage(15-64 ans)^-2
0,57.0,23.0,114,en croissance démographique,9,14,7,7,0.09286,4,44.19769,247,248,196,289,32,9,728,7.0,11.0,2.0,2.0,4.0,0.0,2.0,1.0,1.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,32.0,0.0,0.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,67,-1,117,11483.5,3.0,1.0,0.0,9364,9350,3372,15105,173681,0.01585,Bassin Industriel en croissance démographique,Pop Sédentaire,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,46.153721,4.92585,Commune de la couronne,Aire de moins de 10 000 habitants,Rural à habitat dispersé,767.0,161.0,102.0,132.0,189.0,125.0,53.0,605.0,15.0,20.0,75.0,95.0,100.0,125.0,145.0,30.0,8.776596,3249.0,185193.0,10556000.0,0.017544,0.000308,529.0,12167.0,279841.0,0.043478,0.00189,12996,1481544,168896016,0.008772,7.7e-05,81,729,6561,0.111111,0.012346,196,2744,38416,0.071429,0.005102,49,343,2401,0.142857,0.020408,49,343,2401,0.142857,0.020408,0.008623,0.000801,7.4e-05,10.768953,115.970351,16,64,256,0.25,0.0625,1953.435801,86337.35,3815911.0,0.022626,0.000512,61009,15069223,3722098081,0.004049,1.639102e-05,61504,15252992,3782742016,0.004032,1.625911e-05,38416,7529536,1475789056,0.005102,2.603082e-05,83521,24137569,6975757441,0.00346,1.197304e-05,1024,32768,1048576,0.03125,0.000977,81,729,6561,0.111111,0.012346,529984,385828352,280883040256,0.001374,1.886849e-06,49.0,343.0,2401.0,0.142857,0.020408,121.0,1331.0,14641.0,0.090909,0.008264,4.0,8.0,16.0,0.5,0.25,4.0,8.0,16.0,0.5,0.25,16.0,64.0,256.0,0.25,0.0625,0.0,0.0,0.0,inf,inf,4.0,8.0,16.0,0.5,0.25,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,156475081,1957346788229,24484450973956561,8e-05,6.390794e-09,109369764,1143788991912,11961745277415696,9.6e-05,9.143295e-09,130.195969,1485.580903,16950.99038,0.08764,0.007681,482.408207,10595.514056,232717.678509,0.045529,0.002073,157.718954,1980.732514,24875.268336,0.079627,0.00634,76.441858,668.339071,5843.35764,0.114376,0.013082,85.913722,796.331324,7381.167586,0.107887,0.01164,140.968659,1673.724031,19872.162756,0.084225,0.007094,474.709519,10342.890117,225349.127538,0.045897,0.002107,161.393059,2050.346591,26047.719551,0.078715,0.006196,77.143975,677.568208,5951.192897,0.113854,0.012963,86.516412,804.725472,7485.089502,0.10751,0.011559,7561600000.0,657537500000000.0,5.717779e+19,1.1e-05,1.322472e-10,1024.0,32768.0,1048576.0,0.03125,0.000977,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,4489,300763,20151121,0.014925,0.000223,1,-1,1,-1.0,1.0,13689,1601613,187388721,0.008547,7.3e-05,131870800.0,1514338000000.0,1.73899e+16,8.7e-05,7.583181e-09,9.0,27.0,81.0,0.333333,0.111111,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,inf,inf,87684496,821077620544,7688570838774016,0.000107,1.140452e-08,87422500,817400375000,7642693506250000,0.000107,1.14387e-08,11370384,38340934848,129285632307456,0.000297,8.794778e-08,228161025,3446372282625,52057453329050625,6.6e-05,4.38287e-09,30165089761,5239102954780241,6042180677419007937,6e-06,3.31509e-11,0.000251,3.981877e-06,6.311274e-08,63.091483,3980.535,2130.165964,98315.085654,4537607.0,0.021667,0.000469,24.264,119.520826,588.74168,0.203011,0.041213,588289.0,451217700.0,346083900000.0,0.001304,1.699845e-06,25921.0,4173281.0,671898200.0,0.006211,3.857876e-05,10404.0,1061208.0,108243200.0,0.009804,9.611688e-05,17424.0,2299968.0,303595800.0,0.007576,5.73921e-05,35721.0,6751269.0,1275990000.0,0.005291,2.799474e-05,15625.0,1953125.0,244140600.0,0.008,6.4e-05,2809.0,148877.0,7890481.0,0.018868,0.0003559986,366025.0,221445100.0,133974300000.0,0.001653,2.732054e-06,225.0,3375.0,50625.0,0.066667,0.004444,400.0,8000.0,160000.0,0.05,0.0025,5625.0,421875.0,31640620.0,0.013333,0.000178,9025.0,857375.0,81450620.0,0.010526,0.0001108033,10000.0,1000000.0,100000000.0,0.01,0.0001,15625.0,1953125.0,244140600.0,0.008,6.4e-05,21025.0,3048625.0,442050600.0,0.006897,4.756243e-05,900.0,27000.0,810000.0,0.033333,0.001111111,77.028633,676.049171,5933.410281,0.113939,0.012982
1,45.0,4.0,143,en croissance démographique,31,36,18,18,0.099229,4,10.18071,67,67,61,142,71,4,168,4.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,71.0,0.0,0.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,42,0,110,11483.5,0.0,0.0,0.0,9364,9350,3372,15105,173681,0.00173,Bassin Résidentiel en croissance démographique,Pop Sédentaire,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,46.009606,5.428088,Commune hors attraction des pôles,Commune hors attraction des villes,Rural à habitat dispersé,239.0,54.0,28.0,69.0,36.0,33.0,17.0,192.0,0.0,20.0,15.0,25.0,39.0,10.0,64.0,20.0,8.130081,2025.0,91125.0,4100625.0,0.022222,0.000494,16.0,64.0,256.0,0.25,0.0625,20449,2924207,418161601,0.006993,4.9e-05,961,29791,923521,0.032258,0.001041,1296,46656,1679616,0.027778,0.000772,324,5832,104976,0.055556,0.003086,324,5832,104976,0.055556,0.003086,0.009846,0.000977,9.7e-05,10.077703,101.560092,16,64,256,0.25,0.0625,103.646856,1055.199,10742.67,0.098225,0.009648,4489,300763,20151121,0.014925,0.0002227668,4489,300763,20151121,0.014925,0.0002227668,3721,226981,13845841,0.016393,0.000268745,20164,2863288,406586896,0.007042,4.959333e-05,5041,357911,25411681,0.014085,0.000198,16,64,256,0.25,0.0625,28224,4741632,796594176,0.005952,3.543084e-05,16.0,64.0,256.0,0.25,0.0625,0.0,0.0,0.0,inf,inf,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,inf,inf,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,1.0,1.0,1.0,1.0,1.0,156475081,1957346788229,24484450973956561,8e-05,6.390794e-09,109369764,1143788991912,11961745277415696,9.6e-05,9.143295e-09,130.195969,1485.580903,16950.99038,0.08764,0.007681,482.408207,10595.514056,232717.678509,0.045529,0.002073,157.718954,1980.732514,24875.268336,0.079627,0.00634,76.441858,668.339071,5843.35764,0.114376,0.013082,85.913722,796.331324,7381.167586,0.107887,0.01164,140.968659,1673.724031,19872.162756,0.084225,0.007094,474.709519,10342.890117,225349.127538,0.045897,0.002107,161.393059,2050.346591,26047.719551,0.078715,0.006196,77.143975,677.568208,5951.192897,0.113854,0.012963,86.516412,804.725472,7485.089502,0.10751,0.011559,7561600000.0,657537500000000.0,5.717779e+19,1.1e-05,1.322472e-10,5041.0,357911.0,25411681.0,0.014085,0.000198,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,1764,74088,3111696,0.02381,0.000567,0,0,0,inf,inf,12100,1331000,146410000,0.009091,8.3e-05,131870800.0,1514338000000.0,1.73899e+16,8.7e-05,7.583181e-09,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,87684496,821077620544,7688570838774016,0.000107,1.140452e-08,87422500,817400375000,7642693506250000,0.000107,1.14387e-08,11370384,38340934848,129285632307456,0.000297,8.794778e-08,228161025,3446372282625,52057453329050625,6.6e-05,4.38287e-09,30165089761,5239102954780241,6042180677419007937,6e-06,3.31509e-11,3e-06,5.177717e-09,8.95745e-12,578.034682,334124.1,2116.883815,97396.989584,4481197.0,0.021735,0.000472,29.464137,159.933923,868.135376,0.184227,0.03394,57121.0,13651920.0,3262809000.0,0.004184,1.75067e-05,2916.0,157464.0,8503056.0,0.018519,0.0003429355,784.0,21952.0,614656.0,0.035714,0.00127551,4761.0,328509.0,22667120.0,0.014493,0.0002100399,1296.0,46656.0,1679616.0,0.027778,0.0007716049,1089.0,35937.0,1185921.0,0.030303,0.0009182736,289.0,4913.0,83521.0,0.058824,0.003460208,36864.0,7077888.0,1358954000.0,0.005208,2.712674e-05,0.0,0.0,0.0,inf,inf,400.0,8000.0,160000.0,0.05,0.0025,225.0,3375.0,50625.0,0.066667,0.004444,625.0,15625.0,390625.0,0.04,0.0016,1521.0,59319.0,2313441.0,0.025641,0.0006574622,100.0,1000.0,10000.0,0.1,0.01,4096.0,262144.0,16777220.0,0.015625,0.0002441406,400.0,8000.0,160000.0,0.05,0.0025,66.098222,537.383918,4368.974946,0.123,0.015129
2,634.0,828.0,366,en croissance démographique,31,36,18,18,0.099229,4,696.92134,4640,4635,1968,5184,135,414,11015,342.0,301.0,58.0,108.0,83.0,4.0,14.0,27.0,38.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com < 50 m habts,135.0,2.0,52.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,37,-55,250,11483.5,113.0,41.0,118.0,9364,9350,3372,15105,173681,0.38471,Bassin Résidentiel en croissance démographique,Pop Mobile,Fiscalité moyenne,Dynamique Economique,Bonne Dynamique Entreprise Serv et Com,45.961049,5.372275,Commune-centre,Aire de 30 000 à moins de 50 000 habitants,Centres urbains intermédiaires,14022.0,2778.0,2958.0,2642.0,2603.0,1853.0,1045.0,11228.0,3.0,334.0,743.0,1777.0,1918.0,1818.0,2878.0,1757.0,15.859777,401956.0,254840104.0,161568600000.0,0.001577,2e-06,685584.0,567663552.0,470025400000.0,0.001208,1e-06,133956,49027896,17944209936,0.002732,7e-06,961,29791,923521,0.032258,0.001041,1296,46656,1679616,0.027778,0.000772,324,5832,104976,0.055556,0.003086,324,5832,104976,0.055556,0.003086,0.009846,0.000977,9.7e-05,10.077703,101.560092,16,64,256,0.25,0.0625,485699.354147,338494200.0,235903900000.0,0.001435,2e-06,21529600,99897344000,463523676160000,0.000216,4.644768e-08,21483225,99574747875,461528956400625,0.000216,4.654795e-08,3873024,7622111232,15000314904576,0.000508,2.581962e-07,26873856,139314069504,722204136308736,0.000193,3.721089e-08,18225,2460375,332150625,0.007407,5.5e-05,171396,70957944,29376588816,0.002415,6e-06,121330225,1336452428375,14721023498550625,9.1e-05,8.241969e-09,116964.0,40001688.0,13680580000.0,0.002924,9e-06,90601.0,27270901.0,8208541000.0,0.003322,1.1e-05,3364.0,195112.0,11316496.0,0.017241,0.000297,11664.0,1259712.0,136048896.0,0.009259,8.6e-05,6889.0,571787.0,47458321.0,0.012048,0.000145,16.0,64.0,256.0,0.25,0.0625,196.0,2744.0,38416.0,0.071429,0.005102,729.0,19683.0,531441.0,0.037037,0.001372,1444.0,54872.0,2085136.0,0.026316,0.000693,156475081,1957346788229,24484450973956561,8e-05,6.390794e-09,109369764,1143788991912,11961745277415696,9.6e-05,9.143295e-09,130.195969,1485.580903,16950.99038,0.08764,0.007681,482.408207,10595.514056,232717.678509,0.045529,0.002073,157.718954,1980.732514,24875.268336,0.079627,0.00634,76.441858,668.339071,5843.35764,0.114376,0.013082,85.913722,796.331324,7381.167586,0.107887,0.01164,140.968659,1673.724031,19872.162756,0.084225,0.007094,474.709519,10342.890117,225349.127538,0.045897,0.002107,161.393059,2050.346591,26047.719551,0.078715,0.006196,77.143975,677.568208,5951.192897,0.113854,0.012963,86.516412,804.725472,7485.089502,0.10751,0.011559,7561600000.0,657537500000000.0,5.717779e+19,1.1e-05,1.322472e-10,18225.0,2460375.0,332150625.0,0.007407,5.5e-05,4.0,8.0,16.0,0.5,0.25,2704.0,140608.0,7311616.0,0.019231,0.00037,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,1369,50653,1874161,0.027027,0.00073,3025,-166375,9150625,-0.018182,0.000331,62500,15625000,3906250000,0.004,1.6e-05,131870800.0,1514338000000.0,1.73899e+16,8.7e-05,7.583181e-09,12769.0,1442897.0,163047361.0,0.00885,7.8e-05,1681.0,68921.0,2825761.0,0.02439,0.000595,13924.0,1643032.0,193877776.0,0.008475,7.2e-05,87684496,821077620544,7688570838774016,0.000107,1.140452e-08,87422500,817400375000,7642693506250000,0.000107,1.14387e-08,11370384,38340934848,129285632307456,0.000297,8.794778e-08,228161025,3446372282625,52057453329050625,6.6e-05,4.38287e-09,30165089761,5239102954780241,6042180677419007937,6e-06,3.31509e-11,0.148002,0.05693777,0.02190453,2.599361,6.756675,2112.418012,97088.947426,4462310.0,0.021758,0.000473,28.861343,155.051085,832.977135,0.186141,0.034648,196616484.0,2756956000000.0,3.865804e+16,7.1e-05,5.086044e-09,7717284.0,21438610000.0,59556470000000.0,0.00036,1.295793e-07,8749764.0,25881800000.0,76558370000000.0,0.000338,1.142888e-07,6980164.0,18441590000.0,48722690000000.0,0.000379,1.432631e-07,6775609.0,17636910000.0,45908880000000.0,0.000384,1.475882e-07,3433609.0,6362477000.0,11789670000000.0,0.00054,2.912388e-07,1092025.0,1141166000.0,1192519000000.0,0.000957,9.1573e-07,126067984.0,1415491000000.0,1.589314e+16,8.9e-05,7.932228e-09,9.0,27.0,81.0,0.333333,0.111111,111556.0,37259704.0,12444740000.0,0.002994,9e-06,552049.0,410172407.0,304758100000.0,0.001346,2e-06,3157729.0,5611284000.0,9971252000000.0,0.000563,3.166833e-07,3678724.0,7055793000.0,13533010000000.0,0.000521,2.718334e-07,3305124.0,6008715000.0,10923840000000.0,0.00055,3.025605e-07,8282884.0,23838140000.0,68606170000000.0,0.000347,1.207309e-07,3087049.0,5423945000.0,9529872000000.0,0.000569,3.23934e-07,251.532526,3989.249776,63268.611824,0.063053,0.003976
3,113.0,62.0,132,en croissance démographique,12,12,6,6,1.0,1,85.774,473,473,344,505,14,18,1406,22.0,26.0,17.0,10.0,6.0,0.0,4.0,2.0,0.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,14.0,2.0,17.0,1.0,72.0,Grande Ville,68,-3,127,11483.5,5.0,2.0,7.0,9364,9350,3372,15105,173681,0.02824,Bassin Urbain en croissance démographique,Pop Mobile,Fiscalité moyenne,Moyenne dynamique,Faible Dynamique Serv et Com,45.996164,4.911967,Commune de la couronne,Aire de 1 000 000 d’habitants ou plus (hors Pa...,Bourgs ruraux,1627.0,336.0,251.0,323.0,376.0,232.0,99.0,1337.0,5.0,60.0,106.0,231.0,241.0,231.0,312.0,151.0,7.875895,12769.0,1442897.0,163047400.0,0.00885,7.8e-05,3844.0,238328.0,14776340.0,0.016129,0.00026,17424,2299968,303595776,0.007576,5.7e-05,144,1728,20736,0.083333,0.006944,144,1728,20736,0.083333,0.006944,36,216,1296,0.166667,0.027778,36,216,1296,0.166667,0.027778,1.0,1.0,1.0,1.0,1.0,1,1,1,1.0,1.0,7357.179076,631054.7,54128080.0,0.011659,0.000136,223729,105823817,50054665441,0.002114,4.469693e-06,223729,105823817,50054665441,0.002114,4.469693e-06,118336,40707584,14003408896,0.002907,8.450514e-06,255025,128787625,65037750625,0.00198,3.921184e-06,196,2744,38416,0.071429,0.005102,324,5832,104976,0.055556,0.003086,1976836,2779431416,3907880570896,0.000711,5.058589e-07,484.0,10648.0,234256.0,0.045455,0.002066,676.0,17576.0,456976.0,0.038462,0.001479,289.0,4913.0,83521.0,0.058824,0.00346,100.0,1000.0,10000.0,0.1,0.01,36.0,216.0,1296.0,0.166667,0.027778,0.0,0.0,0.0,inf,inf,16.0,64.0,256.0,0.25,0.0625,4.0,8.0,16.0,0.5,0.25,0.0,0.0,0.0,inf,inf,156475081,1957346788229,24484450973956561,8e-05,6.390794e-09,109369764,1143788991912,11961745277415696,9.6e-05,9.143295e-09,130.195969,1485.580903,16950.99038,0.08764,0.007681,482.408207,10595.514056,232717.678509,0.045529,0.002073,157.718954,1980.732514,24875.268336,0.079627,0.00634,76.441858,668.339071,5843.35764,0.114376,0.013082,85.913722,796.331324,7381.167586,0.107887,0.01164,140.968659,1673.724031,19872.162756,0.084225,0.007094,474.709519,10342.890117,225349.127538,0.045897,0.002107,161.393059,2050.346591,26047.719551,0.078715,0.006196,77.143975,677.568208,5951.192897,0.113854,0.012963,86.516412,804.725472,7485.089502,0.10751,0.011559,7561600000.0,657537500000000.0,5.717779e+19,1.1e-05,1.322472e-10,196.0,2744.0,38416.0,0.071429,0.005102,4.0,8.0,16.0,0.5,0.25,289.0,4913.0,83521.0,0.058824,0.00346,1.0,1.0,1.0,1.0,1.0,5184.0,373248.0,26873856.0,0.013889,0.000193,4624,314432,21381376,0.014706,0.000216,9,-27,81,-0.333333,0.111111,16129,2048383,260144641,0.007874,6.2e-05,131870800.0,1514338000000.0,1.73899e+16,8.7e-05,7.583181e-09,25.0,125.0,625.0,0.2,0.04,4.0,8.0,16.0,0.5,0.25,49.0,343.0,2401.0,0.142857,0.020408,87684496,821077620544,7688570838774016,0.000107,1.140452e-08,87422500,817400375000,7642693506250000,0.000107,1.14387e-08,11370384,38340934848,129285632307456,0.000297,8.794778e-08,228161025,3446372282625,52057453329050625,6.6e-05,4.38287e-09,30165089761,5239102954780241,6042180677419007937,6e-06,3.31509e-11,0.000797,2.252133e-05,6.360024e-07,35.410765,1253.922,2115.647063,97311.648373,4475962.0,0.021741,0.000473,24.127421,118.5131,582.132452,0.203584,0.041447,2647129.0,4306879000.0,7007292000000.0,0.000615,3.777678e-07,112896.0,37933060.0,12745510000.0,0.002976,8.85771e-06,63001.0,15813250.0,3969126000.0,0.003984,1.587276e-05,104329.0,33698270.0,10884540000.0,0.003096,9.585063e-06,141376.0,53157380.0,19987170000.0,0.00266,7.073336e-06,53824.0,12487170.0,2897023000.0,0.00431,1.857907e-05,9801.0,970299.0,96059600.0,0.010101,0.0001020304,1787569.0,2389980000.0,3195403000000.0,0.000748,5.59419e-07,25.0,125.0,625.0,0.2,0.04,3600.0,216000.0,12960000.0,0.016667,0.000278,11236.0,1191016.0,126247700.0,0.009434,8.9e-05,53361.0,12326390.0,2847396000.0,0.004329,1.874028e-05,58081.0,13997520.0,3373403000.0,0.004149,1.721733e-05,53361.0,12326390.0,2847396000.0,0.004329,1.874028e-05,97344.0,30371330.0,9475854000.0,0.003205,1.027285e-05,22801.0,3442951.0,519885600.0,0.006623,4.385773e-05,62.029722,488.539576,3847.686394,0.12697,0.016121
4,42.0,1.0,121,en croissance démographique,26,21,10,10,0.100905,3,5.24276,41,41,28,57,13,3,86,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12509,10458,11.410345,21.963793,12.558621,8.743103,9.268966,11.873022,21.78783,12.704057,8.783164,9.30142,86957.45836,Com rurale < 2 000 m habts,13.0,0.0,0.0,0.0,0.0,3.Accroissement par excédent migratoire,49,0,109,11483.5,0.0,0.0,0.0,9364,9350,3372,15105,173681,0.0,Bassin Résidentiel en croissance démographique,Pop Sédentaire,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,45.749886,5.594585,Commune de la couronne,Aire de 20 000 à moins de 30 000 habitants,Rural à habitat dispersé,109.0,12.0,16.0,15.0,29.0,27.0,10.0,89.0,0.0,0.0,5.0,25.0,15.0,15.0,25.0,5.0,13.793103,1764.0,74088.0,3111696.0,0.02381,0.000567,1.0,1.0,1.0,1.0,1.0,14641,1771561,214358881,0.008264,6.8e-05,676,17576,456976,0.038462,0.001479,441,9261,194481,0.047619,0.002268,100,1000,10000,0.1,0.01,100,1000,10000,0.1,0.01,0.010182,0.001027,0.000104,9.910299,98.214026,9,27,81,0.333333,0.111111,27.486532,144.1053,755.5095,0.190739,0.036381,1681,68921,2825761,0.02439,0.000594884,1681,68921,2825761,0.02439,0.000594884,784,21952,614656,0.035714,0.00127551,3249,185193,10556001,0.017544,0.000307787,169,2197,28561,0.076923,0.005917,9,27,81,0.333333,0.111111,7396,636056,54700816,0.011628,0.0001352082,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,156475081,1957346788229,24484450973956561,8e-05,6.390794e-09,109369764,1143788991912,11961745277415696,9.6e-05,9.143295e-09,130.195969,1485.580903,16950.99038,0.08764,0.007681,482.408207,10595.514056,232717.678509,0.045529,0.002073,157.718954,1980.732514,24875.268336,0.079627,0.00634,76.441858,668.339071,5843.35764,0.114376,0.013082,85.913722,796.331324,7381.167586,0.107887,0.01164,140.968659,1673.724031,19872.162756,0.084225,0.007094,474.709519,10342.890117,225349.127538,0.045897,0.002107,161.393059,2050.346591,26047.719551,0.078715,0.006196,77.143975,677.568208,5951.192897,0.113854,0.012963,86.516412,804.725472,7485.089502,0.10751,0.011559,7561600000.0,657537500000000.0,5.717779e+19,1.1e-05,1.322472e-10,169.0,2197.0,28561.0,0.076923,0.005917,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,2401,117649,5764801,0.020408,0.000416,0,0,0,inf,inf,11881,1295029,141158161,0.009174,8.4e-05,131870800.0,1514338000000.0,1.73899e+16,8.7e-05,7.583181e-09,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,87684496,821077620544,7688570838774016,0.000107,1.140452e-08,87422500,817400375000,7642693506250000,0.000107,1.14387e-08,11370384,38340934848,129285632307456,0.000297,8.794778e-08,228161025,3446372282625,52057453329050625,6.6e-05,4.38287e-09,30165089761,5239102954780241,6042180677419007937,6e-06,3.31509e-11,0.0,0.0,0.0,inf,inf,2093.052097,95756.895458,4380867.0,0.021858,0.000478,31.299377,175.107012,979.65099,0.178744,0.03195,11881.0,1295029.0,141158200.0,0.009174,8.4168e-05,144.0,1728.0,20736.0,0.083333,0.006944444,256.0,4096.0,65536.0,0.0625,0.00390625,225.0,3375.0,50625.0,0.066667,0.004444444,841.0,24389.0,707281.0,0.034483,0.001189061,729.0,19683.0,531441.0,0.037037,0.001371742,100.0,1000.0,10000.0,0.1,0.01,7921.0,704969.0,62742240.0,0.011236,0.0001262467,0.0,0.0,0.0,inf,inf,0.0,0.0,0.0,inf,inf,25.0,125.0,625.0,0.2,0.04,625.0,15625.0,390625.0,0.04,0.0016,225.0,3375.0,50625.0,0.066667,0.004444444,225.0,3375.0,50625.0,0.066667,0.004444444,625.0,15625.0,390625.0,0.04,0.0016,25.0,125.0,625.0,0.2,0.04,190.249703,2624.133831,36194.949391,0.0725,0.005256


In [46]:
#Train-test splitting

print("Dividing into train and test sets...")
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
print("...Done.")
print()

Dividing into train and test sets...
...Done.



In [47]:
#Multiple Transformations preprocessing

# Create pipeline for numeric features
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')), # missing values will be replaced by columns' mean
    ('scaler', StandardScaler())
])

# Create pipeline for categorical features
categorical_transformer = Pipeline(
    steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')), # missing values will be replaced by most frequent value
    ('encoder', OneHotEncoder(drop='first', handle_unknown='ignore')) # first column will be dropped to avoid creating correlations between features
    ])

# Use ColumnTransformer to make a preprocessor object that describes all the treatments to be done
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)

In [48]:
# Preprocessings on train set
print("Performing preprocessings on train set...")
print(X_train.head())
X_train = preprocessor.fit_transform(X_train)
print('...Done.')
print(X_train[0:5,:])
print()

# Preprocessings on test set
print("Performing preprocessings on test set...")
print(X_test.head())
X_test = preprocessor.transform(X_test) # Don't fit again !!
print('...Done.')
print(X_test[0:5,:])
print()

Performing preprocessings on train set...
       Dynamique Entrepreneuriale  \
5635                         18.0   
3516                         21.0   
26672                        24.0   
23722                        15.0   
29126                       207.0   

       Dynamique Entrepreneuriale Service et Commerce  \
5635                                             13.0   
3516                                              5.0   
26672                                             1.0   
23722                                             2.0   
29126                                           199.0   

       Synergie Médicale COMMUNE           SEG Croissance POP  \
5635                          59  en croissance démographique   
3516                          64  en croissance démographique   
26672                         91  en croissance démographique   
23722                         53  en croissance démographique   
29126                        191  en croissance démographique   

 

In [49]:
# Train model
model = LinearRegression() # or LniearRegression pour regression lineaire par exemple
print("Training model...")
model.fit(X_train, Y_train)  # Training is always done on train set !!
print("...Done.")

Training model...
...Done.


In [50]:
# Print R^2 scores
print("R2 score on training set : ", model.score(X_train, Y_train))
print("R2 score on test set : ", model.score(X_test, Y_test))

R2 score on training set :  0.23714806973656788
R2 score on test set :  0.23177815983702088


### Le feature engineering n'améliore pas le score :(

## Forward selection

In [15]:
from sklearn.feature_selection import  SequentialFeatureSelector
feature_selector =  SequentialFeatureSelector(model, n_features_to_select = 20)
feature_selector.fit(X_train, Y_train)
features_list = X.columns
best_features = features_list[feature_selector.support_]
print("According to the forward selection algorithm, the following features should be kept: ")
print(best_features.to_list())

IndexError: boolean index did not match indexed array along dimension 0; dimension is 87 but corresponding boolean dimension is 127

In [17]:
print("Dimensions de X :", X.shape)
print("Dimensions de X_train :", X_train.shape)
print("Dimensions de Y_train :", Y_train.shape)
#print("Indices des caractéristiques sélectionnées :", selected_features_indices.shape)

Dimensions de X : (38590, 87)
Dimensions de X_train : (30872, 127)
Dimensions de Y_train : (30872,)


In [18]:
from sklearn.feature_selection import SelectFromModel

# Create a feature selector based on the model
selector = SelectFromModel(model, max_features=20)

# Fit the selector to the training data
selector.fit(X_train, Y_train)

# Get the indices of the selected features
selected_feature_indices = selector.get_support(indices=True)

# Get the names of the selected features
selected_features = X.columns[selected_feature_indices]

print("The selected features are:")
print(selected_features.tolist())

The selected features are:
['Dynamique Entrepreneuriale', 'Nb Ménages', 'Nb propriétaire', 'Nb Logement', 'Nb Résidences Secondaires', 'Nb Entreprises Secteur Industrie', 'Nb Création Construction', 'Nb Création Commerces', 'Nb de Services aux particuliers', 'longitude']


In [None]:
X_best = X.loc[:, best_features]

In [None]:
#Train-test splitting

print("Dividing into train and test sets...")
X_train, X_test, Y_train, Y_test = train_test_split(X_best, Y, test_size=0.2, random_state=42)
print("...Done.")
print()

In [None]:
# Automatically detect names of numeric/categorical columns
numeric_features = []
categorical_features = []
for i, t in X.dtypes.items():
    if ('float' in str(t)) or ('int' in str(t)):
        numeric_features.append(i)
    else:
        categorical_features.append(i)
print('Found numeric features ', numeric_features)
print('Found categorical features ', categorical_features)

In [None]:
#Multiple Transformations preprocessing

# Create pipeline for numeric features
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')), # missing values will be replaced by columns' mean
    ('scaler', StandardScaler())
])

# Create pipeline for categorical features
categorical_transformer = Pipeline(
    steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')), # missing values will be replaced by most frequent value
    ('encoder', OneHotEncoder(drop='first', handle_unknown='ignore')) # first column will be dropped to avoid creating correlations between features
    ])

# Use ColumnTransformer to make a preprocessor object that describes all the treatments to be done
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)

In [None]:
# Preprocessings on train set
print("Performing preprocessings on train set...")
print(X_train.head())
X_train = preprocessor.fit_transform(X_train)
print('...Done.')
print(X_train[0:5,:])
print()

# Preprocessings on test set
print("Performing preprocessings on test set...")
print(X_test.head())
X_test = preprocessor.transform(X_test) # Don't fit again !!
print('...Done.')
print(X_test[0:5,:])
print()

In [None]:
# Train model
model = LinearRegression() # or LniearRegression pour regression lineaire par exemple
print("Training model...")
model.fit(X_train, Y_train)  # Training is always done on train set !!
print("...Done.")

In [None]:
# Print R^2 scores
print("R2 score on training set : ", model.score(X_train, Y_train))
print("R2 score on test set : ", model.score(X_test, Y_test))