# Importation des bibliothèques

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import missingno as msno
import seaborn as sns
from sklearn.impute import SimpleImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder

# Lecture du dataset

In [None]:
df = pd.read_csv('wandaloo_cars.csv')

# Data cleaning et data preprocessing

## Duplication

In [None]:
df.duplicated().any().any()

In [None]:
df[df.duplicated()]

In [None]:
df=df.drop_duplicates()

In [None]:
df.duplicated().any().any()

In [None]:
df[df.duplicated()]

## Missing values

In [None]:
msno.matrix(df)
plt.show()

In [None]:
cols_to_drop = ['Architecture', 'Cylindrée', 'Conso. ville', 'Conso. route', 'Vitesse maxi.', 'Volume du réservoir', 'Modèle' ]
df = df.drop(cols_to_drop, axis=1)

In [None]:
df.isnull().sum()

In [None]:
index_null = df[df['Price'].isnull()].index.tolist()
index_null

In [None]:
index_null = df[df['Brand'].isnull()].index.tolist()
index_null

In [None]:
index_null = df[df['Model'].isnull()].index.tolist()
index_null

In [None]:
df = df.drop([48])

In [None]:
index_null = df[df['Puissance fiscale'].isnull()].index.tolist()
index_null

In [None]:
df = df.drop([2509,
 2943,
 2944,
 2945,
 2947,
 2948,
 2986,
 3023,
 3040,
 3041,
 3042,
 3043,
 3044,
 3045,
 3114])

In [None]:
df.isnull().sum() 

In [None]:
df['Version'].fillna("Version non connue", inplace=True)

In [None]:
df['Price'] = df['Price'].str.strip("DH")
df['Price']

In [None]:
df["Price"] = df["Price"].replace("[^0-9\.]", "", regex=True)
df['Price']

In [None]:
df["Price"] = df["Price"].str.replace(".", "")
df['Price']

In [None]:
df["Price"] = df["Price"].astype(float)

In [None]:
imputer = IterativeImputer(estimator=LogisticRegression())
encoder = OneHotEncoder()
data_encoded = encoder.fit_transform(df[['Main']])
data_imputed = imputer.fit_transform(data_encoded.toarray())
df['Main'] = data_imputed

In [None]:
df['Kilométrage']= df['Kilométrage'].str.strip('Km')
df['Kilométrage']

In [None]:
df['Kilométrage']= df['Kilométrage'].str.strip('k')
df['Kilométrage'].value_counts()

In [None]:
df['Kilométrage'].dtypes

In [None]:
df["Kilométrage"] = df["Kilométrage"].str.replace(".", "")
df["Kilométrage"]= df["Kilométrage"].astype('float')
df['Kilométrage']

In [None]:
imputer = IterativeImputer(estimator=LogisticRegression())
df['Kilométrage'] = imputer.fit_transform(df[['Kilométrage']])

In [None]:
df['Carburant'].fillna('Unknown', inplace=True)

In [None]:
sns.set(style="ticks", font_scale=1.5, rc={"figure.figsize":(10, 30)})
fig = plt.figure(figsize = (10,10))
ax = fig.subplots()
df.Carburant.value_counts().plot(ax=ax, kind='pie')
ax.set_ylabel("")
ax.set_title("Distribution des voitures par type de Carburant")
plt.show()

In [None]:
df['Transmision'].fillna('Unknown', inplace=True)

In [None]:
df['Puissance fiscale'] = df['Puissance fiscale'].str.strip("cv")
df['Puissance fiscale'].value_counts()

In [None]:
df["Puissance fiscale"] = df["Puissance fiscale"].str.replace("-", "0")

In [None]:
index1 = df['Puissance fiscale'].index[df['Puissance fiscale'] == ''][0]

In [None]:
index1

In [None]:
df.at[2607, 'Puissance fiscale'] = '0'

In [None]:
index2 = df['Puissance fiscale'].index[df['Puissance fiscale'] == ''][0]

In [None]:
index2

In [None]:
df.at[2608, 'Puissance fiscale'] = '0'

In [None]:
df['Puissance fiscale'] = df['Puissance fiscale'].astype('float')

In [None]:
df['Puissance fiscale'] = df['Puissance fiscale'].replace(0,df['Puissance fiscale'].mean())

In [None]:
df['Couleur extérieure'].fillna('Unknown', inplace=True)

In [None]:
#sns.set(style="ticks", font_scale=1.5, rc={"figure.figsize":(200, 200)})
plt.figure(figsize = (20, 15))
sns.countplot(x="Couleur extérieure", data=df,palette = 'viridis')
#sns.countplot(df['Couleur extérieure'], palette = 'viridis')
plt.title("Le nombre de voitures par couleurs", fontsize = 20)
plt.show()

In [None]:
df['Etat du véhicule'].value_counts()

In [None]:
df['Etat du véhicule'] = df['Etat du véhicule'].replace('-','Unknown')
df['Etat du véhicule'].fillna('Unknown', inplace=True)

In [None]:
plt.figure(figsize = (10, 10))
sns.countplot(x = 'Etat du véhicule', data = df, palette = 'Set1')

In [None]:
df['Climatisation'].value_counts()

In [None]:
df['Climatisation'] = df['Climatisation'].replace('تلقاءي.','auto.') 
df['Climatisation'] = df['Climatisation'].replace('non','non connue')  

In [None]:
df['Climatisation'].value_counts()

In [None]:
df['Vitres électriques'].value_counts()

In [None]:
df['Vitres électriques'] = df['Vitres électriques'].replace('non','pas de vitres électriques')
df['Vitres électriques'] = df['Vitres électriques'].replace('-','Unknown') 

In [None]:
df['Vitres électriques'].value_counts()

In [None]:
df['Sièges électriques'].value_counts()

In [None]:
df['Sièges électriques'].fillna('Unknown', inplace=True)
df['Sièges électriques'] = df['Vitres électriques'].replace('non','pas de sièges')

In [None]:
df['Sièges électriques'].value_counts()

In [None]:
df['Ordinateur de bord'].value_counts()

In [None]:
df["Ordinateur de bord"] = df["Ordinateur de bord"].map({"oui": 1, "non": 0})

In [None]:
df['Start & Stop'].value_counts()

In [None]:
df["Start & Stop"] = df["Start & Stop"].map({"oui": 1, "non": 0})

In [None]:
df['Régulateur de vitesse'].value_counts()

In [None]:
df["Régulateur de vitesse"] = df["Régulateur de vitesse"].map({"oui": 1, "non": 0})

In [None]:
df['Allumage auto. des feux'].value_counts()

In [None]:
df["Allumage auto. des feux"] = df["Allumage auto. des feux"].map({"oui": 1, "non": 0})

In [None]:
df['Détecteur de pluie'].value_counts()

In [None]:
df["Détecteur de pluie"] = df["Détecteur de pluie"].map({"oui": 1, "non": 0})

In [None]:
df['Commandes au volant'].value_counts()

In [None]:
df["Commandes au volant"] = df["Commandes au volant"].map({"oui": 1, "non": 0})

In [None]:
df['Ecran tactile'].value_counts()

In [None]:
df["Ecran tactile"] = df["Ecran tactile"].map({"oui": 1, "non": 0})

In [None]:
df['Rétroviseurs électriques'].value_counts()

In [None]:
df["Rétroviseurs électriques"] = df["Rétroviseurs électriques"].map({"oui": 1, "non": 0})

In [None]:
mode_imputer = SimpleImputer(strategy='most_frequent')
data_imputed = mode_imputer.fit_transform(df[['Ouverture auto. du coffre']])
df['Ouverture auto. du coffre'] = data_imputed

In [None]:
df['Ouverture auto. du coffre'].value_counts()

In [None]:
df["Ouverture auto. du coffre"] = df["Ouverture auto. du coffre"].map({"oui": 1, "non": 0})

In [None]:
df['Démarrage mains libres'].value_counts()

In [None]:
df["Démarrage mains libres"] = df["Démarrage mains libres"].map({"oui": 1, "non": 0})

In [None]:
df['Banquette arrière rabattable 1/3-2/3'].value_counts()

In [None]:
df["Banquette arrière rabattable 1/3-2/3"] = df["Banquette arrière rabattable 1/3-2/3"].map({"oui": 1, "non": 0})

In [None]:
df['Caméra de recul'].value_counts()

In [None]:
df["Caméra de recul"] = df["Caméra de recul"].map({"oui": 1, "non": 0})

In [None]:
df['Bluetooth'].value_counts()

In [None]:
df["Bluetooth"] = df["Bluetooth"].map({"oui": 1, "non": 0})

In [None]:
df['Jantes aluminium'].value_counts()

In [None]:
df["Jantes aluminium"] = df["Jantes aluminium"].str.replace("18 pouces", "oui")
df["Jantes aluminium"] = df["Jantes aluminium"].str.replace("17 pouces", "oui")
df["Jantes aluminium"] = df["Jantes aluminium"].str.replace("19 pouces", "oui")
df["Jantes aluminium"] = df["Jantes aluminium"].str.replace("16 pouces", "oui")
df["Jantes aluminium"] = df["Jantes aluminium"].str.replace("20 pouces", "oui")
df["Jantes aluminium"] = df["Jantes aluminium"].str.replace("21 pouces", "oui")
df["Jantes aluminium"] = df["Jantes aluminium"].str.replace("15 pouces", "oui")
df["Jantes aluminium"] = df["Jantes aluminium"].str.replace("14 pouces", "oui")
df["Jantes aluminium"] = df["Jantes aluminium"].str.replace("22 pouces", "oui")

In [None]:
df['Jantes aluminium'].value_counts()

In [None]:
df["Jantes aluminium"] = df["Jantes aluminium"].map({"oui": 1, "non": 0})

In [None]:
df['Volant cuir'].value_counts()

In [None]:
df["Volant cuir"] = df["Volant cuir"].map({"oui": 1, "non": 0})

In [None]:
df['Feux de jour'].value_counts()

In [None]:
df["Feux de jour"] = df["Feux de jour"].map({"oui": 1, "non": 0})

In [None]:
df['Barres de toit'].value_counts()

In [None]:
df["Barres de toit"] = df["Barres de toit"].map({"oui": 1, "non": 0})

In [None]:
df['Toit'].value_counts()

In [None]:
df['Airbags'].value_counts()

In [None]:
df["Airbags"] = df["Airbags"].map({"oui": 1, "non": 0,"6.0": 1, "8.0": 1,"7.0": 1, "2.0": 1,"4.0": 1, "9.0": 1,"10.0": 1, "1.0": 1, "0.0": 0})

In [None]:
df['ABS'].value_counts()

In [None]:
df["ABS"] = df["ABS"].map({"oui": 1, "non": 0})

In [None]:
df['ESP'].value_counts()

In [None]:
df["ESP"] = df["ESP"].map({"oui": 1, "non": 0})

In [None]:
df['Antipatinage'].value_counts()

In [None]:
df["Antipatinage"] = df["Antipatinage"].map({"oui": 1, "non": 0})

In [None]:
df.isnull().sum()

In [None]:
df

In [None]:
msno.matrix(df)
plt.show()

# Visualisation

In [None]:
correlation_matrix = df.corr()

In [None]:
plt.figure(figsize = (35, 35))
sns.heatmap(correlation_matrix, annot=True)
plt.title('Matrice de corrélation', fontsize = 30)

In [None]:
df_sample = df.sample(n=1000)

In [None]:
sns.set(style="ticks", font_scale=1.5, rc={"figure.figsize":(40, 20)})
sns.boxplot(x="Brand", y="Price", data=df_sample)
plt.title('Le prix en fonction de la marque de la voiture', fontsize = 70)

In [None]:
sns.histplot(df_sample, x='Price', bins=15)
plt.title('Les prix du marché', fontsize = 70)

In [None]:
plt.figure(figsize=(10,8)) 
sns.scatterplot(data=df_sample, x='Puissance fiscale', y='Price',hue='Transmision',palette='viridis',alpha=.89 , s=120 ); 
plt.xticks(fontsize=13); 
plt.yticks(fontsize=13) 
plt.xlabel('power',fontsize=14) 
plt.ylabel('price',fontsize=14) 
plt.title('Relation entre la puissance fiscale et le prix',fontsize=20);

In [None]:
plt.figure(figsize=(16,7)) 
sns.countplot(data=df_sample, y='Brand',alpha=.6,color='blue') 
plt.title('Voitures par type de marque',fontsize =20) 
plt.xticks(fontsize=14) 
plt.yticks(fontsize=14) 
plt.xlabel('') 
plt.ylabel('');

In [None]:
sns.set(style="ticks", font_scale=1.5, rc={"figure.figsize":(40, 20)})
sns.lmplot(data=df_sample , x="Kilométrage",y="Price",fit_reg=False,hue="Carburant",aspect=1)

In [None]:
sns.set(style="ticks", font_scale=1.5, rc={"figure.figsize":(10, 20)})
plt.scatter(df['Jantes aluminium'], df['Price'])
plt.xlabel('Jantes aluminium')
plt.ylabel('Price')
plt.title('Le prix en fonction de Jantes aluminium ')

In [None]:
df.dtypes

In [None]:
df = pd.get_dummies(df, columns=['Brand', 'Model', 'Version','Carburant','Transmision', 'Climatisation','Vitres électriques','Sièges électriques', 'Toit','Couleur extérieure', 'Etat du véhicule'])


# Modèles de prédiction des prix

## Sur toute la dataset

In [None]:
y = df['Price']
X = df.drop(['Price'], axis=1)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train , X_test , Y_train , Y_test = train_test_split(X , y , test_size = 0.40,random_state =2)

In [None]:
from sklearn import linear_model
from sklearn.metrics import r2_score
import numpy as np
from sklearn.model_selection import cross_val_score

regr = linear_model.LinearRegression()
regr.fit(X_train, Y_train)

predicted = regr.predict(X_test)
residual = Y_test - predicted

fig = plt.figure(figsize=(30,30))
ax1 = plt.subplot(211)
sns.distplot(residual, color ='teal')
plt.tick_params(axis='both', which='major', labelsize=20)
plt.title('Residual counts',fontsize=35)
plt.xlabel('Residual',fontsize=25)
plt.ylabel('Count',fontsize=25)

ax2 = plt.subplot(212)
plt.scatter(predicted, residual, color ='teal')
plt.tick_params(axis='both', which='major', labelsize=20)
plt.xlabel('Predicted',fontsize=25)
plt.ylabel('Residual',fontsize=25)
plt.axhline(y=0)
plt.title('Residual vs. Predicted',fontsize=35)

plt.show()

from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(Y_test, predicted))
scores = cross_val_score(regr, X, y, cv=12)

print('RMSE: ',rmse)
print('\nMean Score:')
print(scores.mean())
print('Variance score: %.2f' % r2_score(Y_test, predicted))

In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

model = DecisionTreeRegressor(max_depth=5)
model.fit(X_train, Y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(Y_test, y_pred)
print("Erreur moyenne absolue :", mae)
print('Variance score: %.2f' % r2_score(Y_test, predicted))

In [None]:
df

In [None]:
data= df[['Price','Puissance fiscale', 'Start & Stop','Allumage auto. des feux', 'Détecteur de pluie', 'Ecran tactile', 'Démarrage mains libres','Caméra de recul','Volant cuir','Feux de jour']]

In [None]:
data

## Avec features selection

In [None]:
y = data['Price']
X = data.drop(['Price'], axis=1)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train , X_test , Y_train , Y_test = train_test_split(X , y , test_size = 0.40,random_state =2)

In [None]:
from sklearn import linear_model
from sklearn.metrics import r2_score
import numpy as np
from sklearn.model_selection import cross_val_score

regr = linear_model.LinearRegression()
regr.fit(X_train, Y_train)

predicted = regr.predict(X_test)
residual = Y_test - predicted

fig = plt.figure(figsize=(30,30))
ax1 = plt.subplot(211)
sns.distplot(residual, color ='teal')
plt.tick_params(axis='both', which='major', labelsize=20)
plt.title('Residual counts',fontsize=35)
plt.xlabel('Residual',fontsize=25)
plt.ylabel('Count',fontsize=25)

ax2 = plt.subplot(212)
plt.scatter(predicted, residual, color ='teal')
plt.tick_params(axis='both', which='major', labelsize=20)
plt.xlabel('Predicted',fontsize=25)
plt.ylabel('Residual',fontsize=25)
plt.axhline(y=0)
plt.title('Residual vs. Predicted',fontsize=35)

plt.show()

from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(Y_test, predicted))
scores = cross_val_score(regr, X, y, cv=12)


print('RMSE: ',rmse)
print('\nMean Score:')
print(scores.mean())
print('Variance score: %.2f' % r2_score(Y_test, predicted))


In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

model = DecisionTreeRegressor(max_depth=5)
model.fit(X_train, Y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(Y_test, y_pred)
print("Erreur moyenne absolue :", mae)
print('Variance score: %.2f' % r2_score(Y_test, predicted))

# Amélioration de performances:

In [None]:
data_frame= pd.read_csv('wandaloo_cars.csv')

In [None]:
data_frame.duplicated().any().any()

In [None]:
data_frame=data_frame.drop_duplicates()

In [None]:
data_frame.duplicated().any().any()

In [None]:
msno.matrix(data_frame)
plt.show()

In [None]:
cols_to_drop = ['Architecture', 'Cylindrée', 'Conso. ville', 'Conso. route', 'Vitesse maxi.', 'Volume du réservoir', 'Modèle', 'Version','Modèle','Main','Carburant','Transmision','Couleur extérieure','Etat du véhicule','Ouverture auto. du coffre','Kilométrage','Sièges électriques','Vitres électriques']
data_frame = data_frame.drop(cols_to_drop, axis=1)

In [None]:
msno.matrix(data_frame)
plt.show()

In [None]:
index_null = data_frame[data_frame['Puissance fiscale'].isnull()].index.tolist()
index_null

In [None]:
data_frame = data_frame.drop([48,
 2509,
 2943,
 2944,
 2945,
 2947,
 2948,
 2986,
 3023,
 3040,
 3041,
 3042,
 3043,
 3044,
 3045,
 3114])

In [None]:
data_frame.isnull().sum()

In [None]:
data_frame['Price'] = data_frame['Price'].str.strip("DH")
data_frame["Price"] = data_frame["Price"].replace("[^0-9\.]", "", regex=True)
data_frame["Price"] = data_frame["Price"].str.replace(".", "")
data_frame["Price"] = data_frame["Price"].astype(float)


In [None]:
data_frame['Puissance fiscale'] = data_frame['Puissance fiscale'].str.strip("cv")
data_frame['Puissance fiscale'].value_counts()


In [None]:
data_frame["Puissance fiscale"] = data_frame["Puissance fiscale"].str.replace("-", "0")

In [None]:
index1 = data_frame['Puissance fiscale'].index[data_frame['Puissance fiscale'] == ''][0]

In [None]:
index1

In [None]:
data_frame.at[2607, 'Puissance fiscale'] = '0'

In [None]:
index2 = data_frame['Puissance fiscale'].index[data_frame['Puissance fiscale'] == ''][0]

In [None]:
index2

In [None]:
data_frame.at[2608, 'Puissance fiscale'] = '0'

In [None]:
data_frame['Puissance fiscale'] = data_frame['Puissance fiscale'].astype('float')

In [None]:
data_frame['Puissance fiscale'] = data_frame['Puissance fiscale'].replace(0,df['Puissance fiscale'].mean())

In [None]:
data_frame['Climatisation'].value_counts()

In [None]:
data_frame['Climatisation'] = data_frame['Climatisation'].replace('تلقاءي.','auto.') 
data_frame['Climatisation'] = data_frame['Climatisation'].replace('non','non connue')  

# Prediction

In [None]:
data_frame.dtypes

In [None]:
data_frame_new= pd.get_dummies(data_frame)

In [None]:
y = data_frame_new['Price']
X = data_frame_new.drop(['Price'], axis=1)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train , X_test , Y_train , Y_test = train_test_split(X , y , test_size = 0.40,random_state =2)

In [None]:
from sklearn import linear_model
from sklearn.metrics import r2_score
import numpy as np
from sklearn.model_selection import cross_val_score

regr = linear_model.LinearRegression()
regr.fit(X_train, Y_train)

predicted = regr.predict(X_test)
residual = Y_test - predicted

fig = plt.figure(figsize=(30,30))
ax1 = plt.subplot(211)
sns.distplot(residual, color ='teal')
plt.tick_params(axis='both', which='major', labelsize=20)
plt.title('Residual counts',fontsize=35)
plt.xlabel('Residual',fontsize=25)
plt.ylabel('Count',fontsize=25)

ax2 = plt.subplot(212)
plt.scatter(predicted, residual, color ='teal')
plt.tick_params(axis='both', which='major', labelsize=20)
plt.xlabel('Predicted',fontsize=25)
plt.ylabel('Residual',fontsize=25)
plt.axhline(y=0)
plt.title('Residual vs. Predicted',fontsize=35)

plt.show()

from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(Y_test, predicted))
scores = cross_val_score(regr, X, y, cv=12)

print('RMSE: ',rmse)
print('\nMean Score:')
print(scores.mean())
print('Variance score: %.2f' % r2_score(Y_test, predicted))

In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

model = DecisionTreeRegressor(max_depth=5)
model.fit(X_train, Y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(Y_test, y_pred)
print("Erreur moyenne absolue :", mae)
print('Variance score: %.2f' % r2_score(Y_test, predicted))

## Features selection

In [None]:
data_frame= data_frame[['Price','Puissance fiscale', 'Start & Stop','Allumage auto. des feux', 'Détecteur de pluie', 'Ecran tactile', 'Démarrage mains libres','Caméra de recul','Volant cuir']]

In [None]:
data_frame= pd.get_dummies(data_frame)

In [None]:
y = data_frame['Price']
X = data_frame.drop(['Price'], axis=1)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train , X_test , Y_train , Y_test = train_test_split(X , y , test_size = 0.40,random_state =2)

In [None]:
from sklearn import linear_model
from sklearn.metrics import r2_score
import numpy as np
from sklearn.model_selection import cross_val_score

regr = linear_model.LinearRegression()
regr.fit(X_train, Y_train)

predicted = regr.predict(X_test)
residual = Y_test - predicted

fig = plt.figure(figsize=(30,30))
ax1 = plt.subplot(211)
sns.distplot(residual, color ='teal')
plt.tick_params(axis='both', which='major', labelsize=20)
plt.title('Residual counts',fontsize=35)
plt.xlabel('Residual',fontsize=25)
plt.ylabel('Count',fontsize=25)

ax2 = plt.subplot(212)
plt.scatter(predicted, residual, color ='teal')
plt.tick_params(axis='both', which='major', labelsize=20)
plt.xlabel('Predicted',fontsize=25)
plt.ylabel('Residual',fontsize=25)
plt.axhline(y=0)
plt.title('Residual vs. Predicted',fontsize=35)

plt.show()

from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(Y_test, predicted))
scores = cross_val_score(regr, X, y, cv=12)

print('RMSE: ',rmse)
print('\nMean Score:')
print(scores.mean())
print('Variance score: %.2f' % r2_score(Y_test, predicted))

In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

model = DecisionTreeRegressor(max_depth=5)
model.fit(X_train, Y_train)

y_pred = model.predict(X_test)
mae = mean_absolute_error(Y_test, y_pred)
print("Erreur moyenne absolue :", mae)
print('Variance score: %.2f' % r2_score(Y_test, predicted))