In [53]:
import pandas as pd
from numpy import asarray
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, OneHotEncoder, PolynomialFeatures
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
import pickle

### Régression Linéaire

Import

In [54]:
df = pd.read_csv('data/cars_cleaned.csv')

Répartition

In [55]:
y = df['prix']
X = df[['type_carburant', 'aspiration', 'nombre_portes', 'type_carrosserie',
       'roues_motrices', 'emplacement_moteur', 'empattement',
       'longueur_voiture', 'largeur_voiture', 'hauteur_voiture', 'poids_vide',
       'type_moteur', 'nombre_cylindres', 'taille_moteur', 'systeme_carburant',
       'alesage', 'course', 'taux_compression', 'puissance', 'trmin_max',
       'consommation_ville', 'consommation_autoroute', 'marque',
       'modele']]


Preparation du pipeline

In [56]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.8, random_state=42
    )

In [57]:
numeric_features = [    'longueur_voiture',    'hauteur_voiture',    'largeur_voiture',    'empattement',    'taille_moteur',    'poids_vide',    'consommation_ville',    'consommation_autoroute',    'puissance',    'trmin_max',    'nombre_cylindres',    'alesage',    'course',    'taux_compression',    ]

categorial_features = [    'marque',    'modele',    'type_carrosserie',    'nombre_portes',    'type_carburant',    'emplacement_moteur',    'systeme_carburant',    'aspiration',    'roues_motrices',    'type_moteur']


In [58]:
#preparation des transformateurs numériques
numeric_transformer_minmax = Pipeline([('minmax', MinMaxScaler())])
numeric_transformer_std = Pipeline([('standard', StandardScaler())])
numeric_transformer_rbst = Pipeline([('standard', RobustScaler()),])

In [59]:
categorial_transformer = OneHotEncoder(sparse_output=True, handle_unknown='ignore')

In [60]:
preprocessor_minmax = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer_minmax, numeric_features),
        ('cat', categorial_transformer, categorial_features)
    ],
    remainder='passthrough'
)

preprocessor_std = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer_std, numeric_features),
        ('cat', categorial_transformer, categorial_features)
    ],
    remainder='passthrough'
)

preprocessor_rbst = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer_rbst, numeric_features),
        ('cat', categorial_transformer, categorial_features)
    ],
    remainder='passthrough'
)

Estimateur

In [61]:
lnr = LinearRegression()

In [62]:
pipe = Pipeline([
    ('prep', preprocessor_rbst),
    ('lnr', lnr)
])

trained_pipe = pipe.fit(X_train, y_train)
trained_pipe.predict(X_test)
trained_pipe.score(X_test, y_test)

0.7995277415878961

In [63]:
pickle.dump(trained_pipe, open('data/trained_pipe.pkl', 'wb'))