In [1]:
import pandas as pd

df = pd.read_csv("../../../data/DataSet3_Ready.csv")
df.head()

Unnamed: 0,price,latitude,longitude,bathrooms,status,furnished_status
0,21000000.0,19.026733,72.855339,2.0,1.0,0.0
1,14100000.0,19.100052,72.884913,2.0,-1.0,0.0
2,17000000.0,19.124473,72.852247,2.0,1.0,-1.0
3,26000000.0,19.037694,72.84029,2.0,-1.0,0.0
4,14500000.0,19.213984,72.855087,2.0,1.0,2.0


In [55]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
def metrics(y_test, y_pred):
    return f'\n MAE: {mean_absolute_error(y_test, y_pred)}\n MSE: {mean_squared_error(y_test, y_pred)} \n RMSE: {(mean_squared_error(y_test, y_pred))**0.5}\n MAPE: {(mean_absolute_percentage_error(y_test, y_pred))**0.5} \n R^2: {r2_score(y_test, y_pred)}'

In [56]:
from sklearn.model_selection import train_test_split

def test_model(x, y, model, transformer = None):
    x = x if not transformer else transformer.fit_transform(x)
    
    x_tr, x_test, y_tr, y_test = train_test_split(
        x, y, test_size=0.2, random_state=42
    )
    
    model = model.fit(x_tr, y_tr)
    
    y_pred = model.predict(x_test)

    return metrics(y_test, y_pred)

In [57]:
y, x = df['price'], df.drop('price',axis=1)

In [58]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

result = test_model(x,y,LinearRegression(),PolynomialFeatures())
print(result)


 MAE: 5566097.767422076
 MSE: 55650457039258.28 
 RMSE: 7459923.393658831
 MAPE: 0.5490754418993876 
 R^2: 0.5169461299356146


In [59]:
from sklearn.tree import DecisionTreeRegressor
result = test_model(x,y,DecisionTreeRegressor(max_depth=13,min_samples_split=5,min_samples_leaf=22))
print(result)


 MAE: 5614479.017602797
 MSE: 55724193180635.66 
 RMSE: 7464863.909049894
 MAPE: 0.5502839144609136 
 R^2: 0.5163060897571337


In [60]:
from sklearn.ensemble import GradientBoostingRegressor
result = test_model(x,y,GradientBoostingRegressor())
print(result)


 MAE: 5297701.979652246
 MSE: 50266178679913.09 
 RMSE: 7089864.503635672
 MAPE: 0.5318570665235252 
 R^2: 0.5636824307201855


In [61]:
from catboost import CatBoostRegressor
result = test_model(x,y,CatBoostRegressor())
print(result)

Learning rate set to 0.039335
0:	learn: 10122612.1992117	total: 1.43ms	remaining: 1.43s
1:	learn: 9943054.1304517	total: 3.12ms	remaining: 1.56s
2:	learn: 9770104.1484893	total: 4.71ms	remaining: 1.56s
3:	learn: 9594619.9112139	total: 6.34ms	remaining: 1.58s
4:	learn: 9441713.8464731	total: 8.02ms	remaining: 1.59s
5:	learn: 9293423.0777275	total: 9.58ms	remaining: 1.59s
6:	learn: 9150199.6308006	total: 11.2ms	remaining: 1.58s
7:	learn: 9019358.5117858	total: 12.6ms	remaining: 1.56s
8:	learn: 8877362.3543632	total: 14ms	remaining: 1.54s
9:	learn: 8751929.6601213	total: 15.3ms	remaining: 1.52s
10:	learn: 8636624.5068871	total: 16.8ms	remaining: 1.51s
11:	learn: 8508159.1875819	total: 18.2ms	remaining: 1.5s
12:	learn: 8389394.4591174	total: 19.6ms	remaining: 1.49s
13:	learn: 8277897.2602892	total: 21ms	remaining: 1.48s
14:	learn: 8171792.5333720	total: 22.3ms	remaining: 1.47s
15:	learn: 8069881.4871469	total: 23.6ms	remaining: 1.45s
16:	learn: 7971479.3675866	total: 24.9ms	remaining: 1.44

In [62]:
from sklearn.ensemble import StackingRegressor
estimators = [('StackingRegressor', DecisionTreeRegressor(max_depth=13,min_samples_split=5,min_samples_leaf=22)),
              ('GradientBoostingRegressor', GradientBoostingRegressor()),
              ('CatBoostRegressor', CatBoostRegressor())]
stacking_regr = StackingRegressor(estimators)
result = test_model(x,y,stacking_regr)
print(result)

Learning rate set to 0.039335
0:	learn: 10122612.1992117	total: 1.22ms	remaining: 1.22s
1:	learn: 9943054.1304517	total: 2.55ms	remaining: 1.27s
2:	learn: 9770104.1484893	total: 3.77ms	remaining: 1.25s
3:	learn: 9594619.9112139	total: 5ms	remaining: 1.25s
4:	learn: 9441713.8464731	total: 6.38ms	remaining: 1.27s
5:	learn: 9293423.0777275	total: 7.55ms	remaining: 1.25s
6:	learn: 9150199.6308006	total: 8.94ms	remaining: 1.27s
7:	learn: 9019358.5117858	total: 10.2ms	remaining: 1.26s
8:	learn: 8877362.3543632	total: 11.4ms	remaining: 1.25s
9:	learn: 8751929.6601213	total: 12.7ms	remaining: 1.25s
10:	learn: 8636624.5068871	total: 14.3ms	remaining: 1.28s
11:	learn: 8508159.1875819	total: 15.7ms	remaining: 1.29s
12:	learn: 8389394.4591174	total: 17ms	remaining: 1.29s
13:	learn: 8277897.2602892	total: 18.4ms	remaining: 1.29s
14:	learn: 8171792.5333720	total: 19.7ms	remaining: 1.3s
15:	learn: 8069881.4871469	total: 21ms	remaining: 1.29s
16:	learn: 7971479.3675866	total: 22.3ms	remaining: 1.29s
1

In [63]:
from sklearn.ensemble import BaggingRegressor

result = test_model(x,y,BaggingRegressor(DecisionTreeRegressor(max_depth=13,min_samples_split=5,min_samples_leaf=22)))
print(result)


 MAE: 5458774.121307438
 MSE: 52000512255746.65 
 RMSE: 7211138.069385903
 MAPE: 0.5385668923058994 
 R^2: 0.5486281689879233


In [64]:
import pickle

### первая модель ###
with open('../../../models/LinearRegression.pickle','wb') as f:
    pickle.dump(LinearRegression(), f)

with open('../../../models/PolynomialFeatures.pickle','wb') as f:
    pickle.dump(PolynomialFeatures(), f)

### вторая модель ###
with open('../../../models/DecisionTreeRegressor.pickle','wb') as f:
    pickle.dump(
        DecisionTreeRegressor(
            max_depth=13,
            min_samples_split=6,
            min_samples_leaf=22
        ), f)

### третья модель ###
with open('../../../models/GradientBoostingRegressor.pickle','wb') as f:
    pickle.dump(GradientBoostingRegressor(), f)

### четвёртая модель ###
with open('../../../models/CatBoostRegressor.pickle','wb') as f:
    pickle.dump(CatBoostRegressor(), f)

### пятая модель ###
with open('../../../models/StackingRegressor.pickle','wb') as f:
    pickle.dump(StackingRegressor(estimators), f)

In [65]:
with open('../../../models/BaggingRegressor.pickle','wb') as f:
    pickle.dump(
        BaggingRegressor(
            DecisionTreeRegressor(
                max_depth=13,
                min_samples_split=5,
                min_samples_leaf=22
            )
        ), f)

In [66]:
load_model = pickle.load(open('../../../models/CatBoostRegressor.pickle', 'rb'))
result = test_model(x,y,load_model)
print(result)

Learning rate set to 0.039335
0:	learn: 10122612.1992117	total: 1.5ms	remaining: 1.5s
1:	learn: 9943054.1304517	total: 3.31ms	remaining: 1.65s
2:	learn: 9770104.1484893	total: 4.8ms	remaining: 1.59s
3:	learn: 9594619.9112139	total: 6.2ms	remaining: 1.54s
4:	learn: 9441713.8464731	total: 7.79ms	remaining: 1.55s
5:	learn: 9293423.0777275	total: 9.36ms	remaining: 1.55s
6:	learn: 9150199.6308006	total: 10.9ms	remaining: 1.55s
7:	learn: 9019358.5117858	total: 12.5ms	remaining: 1.55s
8:	learn: 8877362.3543632	total: 14.3ms	remaining: 1.57s
9:	learn: 8751929.6601213	total: 16ms	remaining: 1.58s
10:	learn: 8636624.5068871	total: 17.8ms	remaining: 1.6s
11:	learn: 8508159.1875819	total: 20.4ms	remaining: 1.68s
12:	learn: 8389394.4591174	total: 22.5ms	remaining: 1.71s
13:	learn: 8277897.2602892	total: 24.4ms	remaining: 1.72s
14:	learn: 8171792.5333720	total: 26.2ms	remaining: 1.72s
15:	learn: 8069881.4871469	total: 28ms	remaining: 1.72s
16:	learn: 7971479.3675866	total: 29.7ms	remaining: 1.72s
17