<a href="https://colab.research.google.com/github/MaikeFogaca/Python-Projects/blob/main/Calculadora_Aluguel_Machine_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.metrics import r2_score

In [2]:
treino = pd.read_csv('https://raw.githubusercontent.com/Mario-RJunior/calculadora-imoveis/master/treino_preprocessado.csv')
teste = pd.read_csv('https://raw.githubusercontent.com/Mario-RJunior/calculadora-imoveis/master/teste_preprocessado.csv')

In [3]:
# Definindo a ordem das colunas
order_columns = ['zona_leste', 'zona_norte', 'zona_oeste', 'zona_sul', 'quartos', 'area', 'preco'	]

# Alterando a ordem
treino = treino.reindex(columns=order_columns)
teste = teste.reindex(columns=order_columns)

In [4]:
# Cabeçalho da base de treinos
treino.head()

Unnamed: 0,zona_leste,zona_norte,zona_oeste,zona_sul,quartos,area,preco
0,0,0,1,0,0.693147,3.044522,6.908755
1,0,1,0,0,0.693147,3.713572,7.601402
2,0,0,1,0,1.609438,5.70711,9.615205
3,0,0,0,1,1.098612,4.110874,7.496097
4,0,0,1,0,1.098612,5.493061,8.412055


In [5]:
# Cabeçalho da base de teste
teste.head()

Unnamed: 0,zona_leste,zona_norte,zona_oeste,zona_sul,quartos,area,preco
0,0,0,0,1,1.386294,4.465908,7.313887
1,0,0,0,1,1.386294,5.968708,10.59666
2,0,0,1,0,1.609438,6.196444,9.305741
3,1,0,0,0,0.693147,4.795791,7.266129
4,0,0,1,0,0.693147,3.970292,8.537192


In [6]:
# Divisão para variáveis X e y
X_train = treino.drop(labels='preco', axis=1)
y_train = treino['preco']
X_test = teste.drop('preco', axis=1)
y_test = teste['preco']

In [7]:
# Criando o modelo de regressão linear
rl = LinearRegression()
rl.fit(X_train, y_train)
rl.score(X_test, y_test)

0.5605126467102994

In [8]:
neigh = KNeighborsRegressor()
neigh.fit(X_train, y_train)

KNeighborsRegressor()

In [9]:
neigh.score(X_test, y_test)


0.6449494094322331

In [10]:
rf = RandomForestRegressor()
rf.fit(X_train, y_train)

RandomForestRegressor()

In [11]:
rf.score(X_test, y_test)


0.6455780593804492

In [12]:
regr = AdaBoostRegressor()
regr.fit(X_train, y_train)

AdaBoostRegressor()

In [13]:
regr.score(X_test, y_test)


0.6092145605120001

In [14]:
# Importando os estimadores
from sklearn.linear_model import RidgeCV, Lasso, ElasticNet, LassoLars, HuberRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor

In [15]:
# Criando uma lista com todos os estimadores
reg_list = [RidgeCV(),
            LGBMRegressor(), 
            XGBRegressor(objective='reg:squarederror'),
            SVR(),
            GradientBoostingRegressor(),
            MLPRegressor()
            ]

In [16]:
# Criando o modelo
from sklearn.model_selection import cross_val_score
import numpy as np

for reg in reg_list:
    print(f'Treinando Modelo {reg.__class__.__name__}')
    reg.fit(X_train, y_train)
    
    train_score = reg.score(X_train, y_train)
    cv_scores = cross_val_score(reg, X_train, y_train)
    test_score = reg.score(X_test, y_test)
    
    print(f"R2 Score Train: {train_score}")
    print(f"R2 Score Valid: {np.mean(cv_scores):.2f} +- {np.std(cv_scores):.2f}")
    print(f"R2 Score Test: {test_score}")
    print('='*80)

Treinando Modelo RidgeCV
R2 Score Train: 0.6949954947925866
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5628158188285928
Treinando Modelo LGBMRegressor
R2 Score Train: 0.8335649897226413
R2 Score Valid: 0.73 +- 0.01
R2 Score Test: 0.6524018217807195
Treinando Modelo XGBRegressor
R2 Score Train: 0.8173085197618972
R2 Score Valid: 0.74 +- 0.01
R2 Score Test: 0.6754518346344691
Treinando Modelo SVR
R2 Score Train: 0.7331335431720395
R2 Score Valid: 0.72 +- 0.03
R2 Score Test: 0.6424482920301615
Treinando Modelo GradientBoostingRegressor
R2 Score Train: 0.827916398475484
R2 Score Valid: 0.74 +- 0.01
R2 Score Test: 0.6639192375662468
Treinando Modelo MLPRegressor




R2 Score Train: 0.704074562257327
R2 Score Valid: 0.70 +- 0.04
R2 Score Test: 0.5715626620954379




In [17]:
# Testando com todos os regressores do sklearn
from sklearn.utils import all_estimators

estimators = all_estimators(type_filter='regressor')

relatorio = {'nome':[],
             'train_score':[],
             'cv_scores_mean':[],
             'test_score':[],
             'estimador':[]
             }

ignore_list = ['IsotonicRegression',
 'MultiOutputRegressor',
 'ElasticNet',
 'MultiTaskElasticNet',
 'MultiTaskElasticNetCV',
 'MultiTaskLasso',
 'MultiTaskLassoCV',
 'RadiusNeighborsRegressor',
 'RegressorChain',
 'StackingRegressor',
 'VotingRegressor']

In [18]:
estimators.extend(
    [('LGBMRegressor', LGBMRegressor),
     ('XGBRegressor', XGBRegressor)]
)

In [19]:
# Criando os modelos
for name, RegressorClass in estimators:
  if name not in ignore_list:
    print(f'Treinando Modelo {name}')
    reg = RegressorClass()
    reg.fit(X_train, y_train)

    train_score = reg.score(X_train, y_train)
    cv_scores = cross_val_score(reg, X_train, y_train)
    test_score = reg.score(X_test, y_test)

    print(f"R2 Score Train: {train_score}")
    print(f"R2 Score Valid: {np.mean(cv_scores):.2f} +- {np.std(cv_scores):.2f}")
    print(f"R2 Score Test: {test_score}")
    print('='*80)

    relatorio['nome'].append(name)
    relatorio['train_score'].append(train_score)
    relatorio['cv_scores_mean'].append(np.mean(cv_scores))
    relatorio['test_score'].append(test_score)
    relatorio['estimador'].append(reg)

Treinando Modelo ARDRegression
R2 Score Train: 0.6949800511386857
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5626085196793564
Treinando Modelo AdaBoostRegressor
R2 Score Train: 0.7196555792082444
R2 Score Valid: 0.67 +- 0.02
R2 Score Test: 0.5979974691718237
Treinando Modelo BaggingRegressor
R2 Score Train: 0.8904584343937534
R2 Score Valid: 0.68 +- 0.01
R2 Score Test: 0.639997803155116
Treinando Modelo BayesianRidge
R2 Score Train: 0.6949911822249594
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5627539730309865
Treinando Modelo CCA
R2 Score Train: 0.5268173739911577
R2 Score Valid: 0.52 +- 0.05
R2 Score Test: 0.42478940399983756
Treinando Modelo DecisionTreeRegressor
R2 Score Train: 0.9216100052828616
R2 Score Valid: 0.61 +- 0.04
R2 Score Test: 0.5706834400605976
Treinando Modelo DummyRegressor
R2 Score Train: 0.0
R2 Score Valid: -0.01 +- 0.01
R2 Score Test: -0.0013810536838239074
Treinando Modelo ElasticNetCV




R2 Score Train: 0.6949788061574413
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5625562709006551
Treinando Modelo ExtraTreeRegressor
R2 Score Train: 0.9216100052828616
R2 Score Valid: 0.61 +- 0.04
R2 Score Test: 0.5634759866222806
Treinando Modelo ExtraTreesRegressor
R2 Score Train: 0.9216100052828616
R2 Score Valid: 0.67 +- 0.02
R2 Score Test: 0.631953790854646
Treinando Modelo GammaRegressor
R2 Score Train: 0.4784793609393545
R2 Score Valid: 0.47 +- 0.02
R2 Score Test: 0.4050966513599863
Treinando Modelo GaussianProcessRegressor
R2 Score Train: 0.8261811762005272
R2 Score Valid: -96044.57 +- 128605.44
R2 Score Test: -34339.38794954753
Treinando Modelo GradientBoostingRegressor
R2 Score Train: 0.8279163984754841
R2 Score Valid: 0.74 +- 0.01
R2 Score Test: 0.6650482123104164
Treinando Modelo HistGradientBoostingRegressor
R2 Score Train: 0.8378845337772924
R2 Score Valid: 0.73 +- 0.02
R2 Score Test: 0.6628372044846564
Treinando Modelo HuberRegressor
R2 Score Train: 0.6891757245509442
R

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Lars())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Lars())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)


If you wish to scale the data, use Pipeline with a StandardScaler in a

R2 Score Train: 0.6949884060829883
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5626554939081945
Treinando Modelo LassoLars
R2 Score Train: 0.0
R2 Score Valid: -0.01 +- 0.01
R2 Score Test: -0.0013810536838239074
Treinando Modelo LassoLarsCV
R2 Score Train: 0.6949996135157472
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5629558167496815
Treinando Modelo LassoLarsIC
R2 Score Train: 0.6949996135157472
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5629558167496815
Treinando Modelo LinearRegression


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLars())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLars())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)



R2 Score Train: 0.6943843575260824
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5605126467102994
Treinando Modelo LinearSVR
R2 Score Train: 0.6836331767233005
R2 Score Valid: 0.68 +- 0.04
R2 Score Test: 0.5388005277469922
Treinando Modelo MLPRegressor




R2 Score Train: 0.7241498868945426
R2 Score Valid: 0.70 +- 0.05
R2 Score Test: 0.6095730070715786
Treinando Modelo NuSVR
R2 Score Train: 0.7353168434788004
R2 Score Valid: 0.73 +- 0.03
R2 Score Test: 0.6448316482280201
Treinando Modelo OrthogonalMatchingPursuit
R2 Score Train: 0.6256572870612798
R2 Score Valid: 0.62 +- 0.04
R2 Score Test: 0.4850259732055404
Treinando Modelo OrthogonalMatchingPursuitCV
R2 Score Train: 0.6949996135157471
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5629558167496815
Treinando Modelo PLSCanonical
R2 Score Train: 0.35937095508459904
R2 Score Valid: 0.35 +- 0.01
R2 Score Test: 0.34148287622231643
Treinando Modelo PLSRegression
R2 Score Train: 0.6884671082779387
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5611160410265197


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), OrthogonalMatchingPursuit())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), OrthogonalMatchingPursuit())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)


If you wish to scale the dat

Treinando Modelo PassiveAggressiveRegressor
R2 Score Train: 0.389522764926746
R2 Score Valid: 0.63 +- 0.03
R2 Score Test: 0.27915983309840064
Treinando Modelo PoissonRegressor
R2 Score Train: 0.686925366942527
R2 Score Valid: 0.68 +- 0.04
R2 Score Test: 0.5702212258651699
Treinando Modelo QuantileRegressor
R2 Score Train: -0.010894999539851291
R2 Score Valid: -0.02 +- 0.03
R2 Score Test: -0.0164549340549347
Treinando Modelo RANSACRegressor
R2 Score Train: 0.6763946043307851
R2 Score Valid: 0.65 +- 0.04
R2 Score Test: 0.533363391287195
Treinando Modelo RandomForestRegressor
R2 Score Train: 0.9011040142298897
R2 Score Valid: 0.71 +- 0.02
R2 Score Test: 0.6487071060764347
Treinando Modelo Ridge
R2 Score Train: 0.694995494792587
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5628158188285836
Treinando Modelo RidgeCV
R2 Score Train: 0.6949954947925866
R2 Score Valid: 0.69 +- 0.04
R2 Score Test: 0.5628158188285928
Treinando Modelo SGDRegressor
R2 Score Train: 0.6756219261968471
R2 Score Valid

In [20]:
relatorio = pd.DataFrame(relatorio).sort_values(by='cv_scores_mean', ascending=False)
relatorio.head(10)


Unnamed: 0,nome,train_score,cv_scores_mean,test_score,estimador
12,GradientBoostingRegressor,0.827916,0.741223,0.665048,([DecisionTreeRegressor(criterion='friedman_ms...
45,XGBRegressor,0.817309,0.738721,0.675452,XGBRegressor()
44,LGBMRegressor,0.833565,0.731345,0.652402,LGBMRegressor()
13,HistGradientBoostingRegressor,0.837885,0.728931,0.662837,HistGradientBoostingRegressor()
27,NuSVR,0.735317,0.726797,0.644832,NuSVR()
40,SVR,0.733134,0.723369,0.642448,SVR()
15,KNeighborsRegressor,0.803215,0.707596,0.644949,KNeighborsRegressor()
36,RandomForestRegressor,0.901104,0.705825,0.648707,"(DecisionTreeRegressor(max_features='auto', ra..."
26,MLPRegressor,0.72415,0.702494,0.609573,MLPRegressor()
37,Ridge,0.694995,0.691922,0.562816,Ridge()


In [21]:
# Importando a biblioteca
from sklearn.model_selection import GridSearchCV

In [22]:
# Definindo os parâmetros
parameters = {
    'n_estimators': [400, 700, 1000],
    'colsample_bytree': [0.7, 0.8],
    'max_depth': [15,20,25],
    'reg_alpha': [1.1, 1.2, 1.3],
    'reg_lambda': [1.1, 1.2, 1.3],
    'subsample': [0.7, 0.8, 0.9]
}

# Criando o classificador
xgb_reg = XGBRegressor(objective='reg:squarederror')

# Criando o GridSearch
gs = GridSearchCV(xgb_reg, parameters)

In [None]:
# Treinando o modelo
gs.fit(X_train, y_train)

In [None]:
# Verificando o melhor estimador
best_gs = gs.best_estimator_
best_gs

In [None]:
# Verificando a melhor pontuação
gs.best_score_

In [None]:
# Score na base de teste
best_gs.score(X_test, y_test)

In [None]:
# Definindo os parâmetros
param_grid = {
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000]
}

# Criando o classificador
gbr_reg = GradientBoostingRegressor()

# Criando o GridSearch
gbr_gs = GridSearchCV(gbr_reg, param_grid)

In [None]:
# Treinando o modelo
gbr_gs.fit(X_train, y_train)

In [None]:
# Verificando o melhor estimador
best_gbr_gs = gbr_gs.best_estimator_
best_gbr_gs

In [None]:
# Verificando a melhor pontuação
gbr_gs.best_score_

In [None]:
# Score na base de teste
best_gbr_gs.score(X_test, y_test)

In [None]:
# Recriando o modelo
best_gb = GradientBoostingRegressor()
best_gb.fit(X_train, y_train)

In [None]:
# Importando a biblioteca
import pickle

In [None]:
# Exportando o modelo
pickle.dump(best_gb, open('gb_regressor.pkl', 'wb'), protocol=4)