In [None]:
"""Prédire la Valeur fonciere du bien avec les critères suivants : type local, code postal,
Nombre pieces principales et Surface reelle bati, montrer les résultats sur un graphique
grâce à Plotly"""

In [None]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
import plotly.express as px

In [None]:
def load_house_attributes(file):
    # get the dataframe
    cols = ['Valeur fonciere', 'Surface reelle bati', 'Nombre pieces principales',
            'Type local', 'Commune']
    df0 = pd.read_csv(file, usecols=cols, low_memory=False, sep="|")
    df = df0.copy()

    df['Type local'].replace('', np.nan, inplace=True)
    df.dropna(inplace=True)

    Arrondissements_Paris = ['PARIS 01', 'PARIS 02', 'PARIS 03', 'PARIS 04', 'PARIS 05',
                             'PARIS 06', 'PARIS 07', 'PARIS 08', 'PARIS 09', 'PARIS 10',
                             'PARIS 11', 'PARIS 12', 'PARIS 13', 'PARIS 14', 'PARIS 15',
                             'PARIS 16', 'PARIS 17', 'PARIS 18', 'PARIS 19', 'PARIS 20']
    df_paris = df[df['Commune'].isin(Arrondissements_Paris)]
    df_paris = df_paris.drop_duplicates()
    df_paris['Valeur fonciere'] = df_paris['Valeur fonciere'].str.replace(',', '.')

    # remove outliers
    df_paris['Valeur fonciere'] = df_paris['Valeur fonciere'].astype(float)
    df_paris['Valeur fonciere'] = df_paris[df_paris['Valeur fonciere'] < 100000000]
    df_paris = df_paris[df_paris['Surface reelle bati'] >= 10]
    df_paris['Code postal'] = df_paris['Code postal'].astype(str)
    df_paris['Nombre pieces principales'] = df_paris['Nombre pieces principales'].astype(int)
    df_paris = df_paris[df_paris['Nombre pieces principales'] <= 20]

    return df_paris

In [None]:
fig = px.box(df_paris, x='Valeur fonciere', y='Code postal', hover_name='Type local')
fig.show()

In [None]:
def process_house_attributes(df_paris):

    # we would like to exploit continuous data from 0 to 1
    continuous = df_paris[["Surface reelle bati"], ["Nombre pieces principales"]]
    cs = MinMaxScaler()

    Continuous = cs.fit_transform(train[continuous])

    # put into categorical data the qualitative variables
    CategoricalBin = OrdinalEncoder().fit_transform(df_paris["Code postal"], df_paris["Type Local"])
    Categorical = CategoricalBin.transform(df_paris["Code postal"], df_paris["Type Local"])

    # construct and concatenate the training and testing categorical and continuous dataset
    X = np.hstack([Categorical, Continuous])

    # return the concatenated data
    return X

In [None]:
labels = np.array(X['Valeur fonciere'])
list_attributs = list(X.columns)
attributs = np.array(X)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train_x, test_x, train_y, test_y = train_test_split(X, labels, test_size = 0.25, random_state = 42)

In [None]:
print('Training Features Shape:', train_x.shape)
print('Training Labels Shape:', train_y.shape)
print('Testing Features Shape:', test_x.shape)
print('Testing Labels Shape:', test_y.shape)

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)
rf.fit(train_x, train_y)

In [None]:
predictions = rf.predict(test_x)
errors = abs(predictions - test_y)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'euros.')

In [None]:
import argparse
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-n", "--name", required=True,
	help="give house attributes to predict the value of a property")
args = vars(ap.parse_args())