# Hands On Machine Learning : Régression et prédire le prix d'une maison

## Import

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Les URLS correspondent aux URL d'exercice publique sur GitHub permettant de telecharger directemetn la data depuis GitHub. Si une erreur arrive veuillez contacter l'adresse mail plus bas.

# contact.aguynextdoor@gmail.com 

df_dataset = pd.read_csv('https://raw.githubusercontent.com/AGuyNextDoor/AProfNextDoor/main/Exercises/Hands%20on%20Machine%20Learning/house-prices-data/dataset.csv')
df_to_predict_and_submit = pd.read_csv("https://raw.githubusercontent.com/AGuyNextDoor/AProfNextDoor/main/Exercises/Hands%20on%20Machine%20Learning/house-prices-data/data_to_predict_and_submit.csv", index_col="Id")



In [3]:
df_dataset

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,...,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1.0,60.0,3.0,65.0,8450.0,1.0,3.0,3.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,2.0,2008.0,8.0,4.0,208500.0
1,2.0,20.0,3.0,80.0,9600.0,1.0,3.0,3.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,5.0,2007.0,8.0,4.0,181500.0
2,3.0,60.0,3.0,68.0,11250.0,1.0,0.0,3.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,9.0,2008.0,8.0,4.0,223500.0
3,4.0,70.0,3.0,60.0,9550.0,1.0,0.0,3.0,0.0,0.0,...,272.0,0.0,0.0,0.0,0.0,2.0,2006.0,8.0,0.0,140000.0
4,5.0,60.0,3.0,84.0,14260.0,1.0,0.0,3.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,12.0,2008.0,8.0,4.0,250000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456.0,60.0,3.0,62.0,7917.0,1.0,3.0,3.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,8.0,2007.0,8.0,4.0,175000.0
1456,1457.0,20.0,3.0,85.0,13175.0,1.0,3.0,3.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,2.0,2010.0,8.0,4.0,210000.0
1457,1458.0,70.0,3.0,66.0,9042.0,1.0,3.0,3.0,0.0,4.0,...,0.0,0.0,0.0,0.0,2500.0,5.0,2010.0,8.0,4.0,266500.0
1458,1459.0,20.0,3.0,68.0,9717.0,1.0,3.0,3.0,0.0,4.0,...,112.0,0.0,0.0,0.0,0.0,4.0,2010.0,8.0,4.0,142125.0


## DataFrame Creation and Analysis


## Data Preprocessing

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, ElasticNet

available_models_name = ['Linear Regression', 'Lasso', 'Elastic Net']
available_models = [LinearRegression, Lasso, ElasticNet]

In [5]:
X = df_dataset.drop("SalePrice", axis=1)
y = df_dataset["SalePrice"]

def model_training(model_selection, division_value, result_data):
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=division_value/100, random_state=42)
    
    chosen_model = available_models[available_models_name.index(model_selection)]
    
    if(model_selection == "Lasso" or model_selection == "Elastic Net"):
      fit_model = chosen_model(max_iter=20000).fit(X_train, y_train)
    else : 
      fit_model = chosen_model().fit(X_train, y_train)
    
    train_score = fit_model.score(X_train, y_train)
    test_score = fit_model.score(X_test, y_test)
    
    prediction = fit_model.predict(X_test) - y_test
    average_diff = np.mean(np.abs(prediction))
    
    result_data.loc[len(result_data)] = [model_selection, 100 - division_value, train_score, test_score, len(X_test.values), average_diff]
    
    return result_data
    
    
    
    

## MODELS

In [16]:
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Label, Dropdown, Button, IntSlider, interact

import panel as pn


dataset_division = IntSlider(
  min=1,
  max=99,
  value=70, #Default value
  style={'description_width': 'initial'},
  orientation='vertical'
)

models_dropdown = Dropdown(
   options=available_models_name,
   value=available_models_name[0],
   description='Modèle:',
   disabled=False,
   style={'description_width': 'large'}
)

# Create a button
generation_button = Button(
   description='Training !',
   button_style='success', # 'success', 'info', 'warning', 'danger' or ''
)

reset_button = Button(
   description='Reset',
   button_style='danger', # 'success', 'info', 'warning', 'danger' or ''
)

result_dataframe = pd.DataFrame(columns=["Modele", "Training %", "Training Score", "Test Score", "# of test values", "Average $ diff"])
df_widget = pn.widgets.DataFrame(result_dataframe)

def setup_ui(df):
    
    out = widgets.Output()
    with out:
        display(df.style.hide_index())
    return out

def setup_reset(df):
    
    df = df[0:0]
    result_dataframe = pd.DataFrame(columns=["Modele", "Training %", "Training Score", "Test Score", "# of test values", "Average $ diff"])
    return df


def on_button_clicked(event):
    # Get the selected range
    
#     status_label.value = "running...."
    
    division_value = 100 - dataset_division.value
    model_value = models_dropdown.value

    data = model_training(model_value, division_value, result_dataframe)
    
    result_box = setup_ui(data)
    second_column.children = [models_dropdown, result_box]
#     status_label.value = "Done!"

def on_reset_clicked(event):
    # Get the selected range
    
    data = setup_reset(result_dataframe)
    result_box = setup_ui(data)
    second_column.children = [models_dropdown, result_box]


# Define the 'on_click' event
generation_button.on_click(on_button_clicked)
reset_button.on_click(on_reset_clicked)

first_column = VBox([Label('% of data for training'), dataset_division])
second_column = VBox([models_dropdown, setup_ui(result_dataframe)])

final_version = HBox([first_column, second_column, VBox([generation_button, reset_button])])
display(final_version)

HBox(children=(VBox(children=(Label(value='% of data for training'), IntSlider(value=70, max=99, min=1, orient…