In [74]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from pycaret.regression import setup, compare_models
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import KFold, cross_val_score

In [94]:
suffix = "12"
df = pd.read_csv(f'data/preprocessed{suffix}.csv')
df.head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SalePrice
0,60,65.0,9.04204,7.0,5.0,2003.0,2003.0,1.83788,6.561031,0.0,...,0,0,1,0,0,0,0,1,0,208500.0
1,20,80.0,9.169623,6.0,8.0,1976.0,1976.0,0.0,6.886532,0.0,...,0,0,1,0,0,0,0,1,0,181500.0
2,60,68.0,9.328212,7.0,5.0,2001.0,2002.0,1.807264,6.188264,0.0,...,0,0,1,0,0,0,0,1,0,223500.0
3,70,60.0,9.164401,7.0,5.0,1915.0,1970.0,0.0,5.379897,0.0,...,0,0,1,1,0,0,0,0,0,140000.0
4,60,84.0,9.565284,8.0,5.0,2000.0,2000.0,1.925822,6.486161,0.0,...,0,0,1,0,0,0,0,1,0,250000.0


# store X and y

In [76]:
y = df.iloc[:, -1].values
X = df.iloc[:, :-1].values
X.shape, y.shape

((2919, 823), (2919,))

# scale X

In [77]:
X = StandardScaler().fit_transform(X)

# split test and train

In [78]:
train_idx = 1460
X_train, X_test = X[:train_idx], X[train_idx:]
y_train, _ = y[:train_idx], y[train_idx:]

# take log of y train

In [79]:
y_train = np.log(y_train)

In [80]:
df_train = pd.DataFrame(np.concatenate((X_train, y_train.reshape(-1, 1)), axis=1))
df_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,...,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,680,681,682,683,684,685,686,687,688,689,690,691,692,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823
0,60.0,65.0,9.04204,7.0,5.0,2003.0,2003.0,1.83788,6.561031,0.0,150.0,856.0,2.048136,6.751101,7.444833,0.526589,0.0,1.098612,0.693147,3.0,0.423036,2.197225,0.0,2003.0,2.0,548.0,0.0,4.127134,0.0,0.0,0.0,1.047198,2008.0,4225.0,587.732604,455.0,325.0,130195.0,130195.0,119.4622,426.466993,0.0,9750.0,55640.0,133.128863,438.821595,483.914163,34.228287,0.0,71.409799,45.054567,195.0,27.497331,142.819598,0.0,130195.0,130.0,35620.0,0.0,268.263735,0.0,0.0,0.0,68.067843,130520.0,81.758488,63.29428,45.2102,18111.206233,18111.206233,16.618185,59.325102,0.0,1356.306008,7739.986288,18.519331,61.04373,67.316481,4.761439,0.0,9.933696,6.267465,27.12612,3.825107,19.867393,0.0,18111.206233,18.08408,4955.037951,0.0,37.317714,0.0,0.0,0.0,9.468802,18156.416434,49.0,35.0,14021.0,14021.0,12.86516,45.927215,0.0,1050.0,5992.0,14.336954,47.25771,52.113833,3.686123,0.0,7.690286,4.85203,21.0,2.961251,15.380572,0.0,14021.0,14.0,3836.0,0.0,28.889941,0.0,0.0,0.0,7.330383,14056.0,25.0,10015.0,10015.0,9.1894,32.805153,0.0,750.0,4280.0,10.240682,33.755507,37.224166,2.632945,0.0,5.493061,3.465736,15.0,2.115179,10.986123,0.0,10015.0,10.0,2740.0,0.0,20.635672,0.0,0.0,0.0,5.235988,10040.0,4012009.0,4012009.0,3681.273637,13141.744424,0.0,300450.0,1714568.0,4102.417104,13522.456242,14912.001048,1054.757835,0.0,2200.520414,1388.373803,6009.0,847.340822,4401.040828,0.0,4012009.0,4006.0,1097644.0,0.0,8266.650173,0.0,0.0,0.0,2097.536753,4022024.0,4012009.0,3681.273637,13141.744424,0.0,300450.0,1714568.0,4102.417104,13522.456242,14912.001048,1054.757835,0.0,2200.520414,1388.373803,6009.0,847.340822,4401.040828,0.0,4012009.0,4006.0,1097644.0,0.0,8266.650173,0.0,0.0,0.0,2097.536753,4022024.0,3.377803,12.058387,0.0,275.682,1573.225279,3.764229,12.407714,13.68271,0.967807,0.0,2.019118,1.273921,5.51364,0.777489,4.038235,0.0,3681.273637,3.67576,1007.158239,0.0,7.585178,0.0,0.0,0.0,1.924623,3690.463037,43.047123,0.0,984.1546,5616.24225,13.437885,44.294184,48.845779,3.454967,0.0,7.208029,4.54776,19.683092,2.775551,14.416058,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,12.247694
1,20.0,80.0,9.169623,6.0,8.0,1976.0,1976.0,0.0,6.886532,0.0,284.0,1262.0,2.096943,0.0,7.141245,0.0,0.693147,1.098612,0.0,3.0,0.423036,1.94591,0.526589,1976.0,2.0,460.0,1.902174,0.0,0.0,0.0,0.0,2.617994,2007.0,6400.0,733.569803,480.0,640.0,158080.0,158080.0,0.0,550.922531,0.0,22720.0,100960.0,167.755451,0.0,571.29961,0.0,55.451774,87.888983,0.0,240.0,33.842869,155.672812,42.127123,158080.0,160.0,36800.0,152.173898,0.0,0.0,0.0,0.0,209.439526,160560.0,84.081978,55.017735,73.35698,18119.174136,18119.174136,0.0,63.146896,0.0,2604.172801,11572.063644,19.228177,0.0,65.482522,0.0,6.355898,10.07386,0.0,27.508868,3.879079,17.843262,4.828623,18119.174136,18.339245,4218.026368,17.442215,0.0,0.0,0.0,0.0,24.006017,18403.432435,36.0,48.0,11856.0,11856.0,0.0,41.31919,0.0,1704.0,7572.0,12.581659,0.0,42.847471,0.0,4.158883,6.591674,0.0,18.0,2.538215,11.675461,3.159534,11856.0,12.0,2760.0,11.413042,0.0,0.0,0.0,0.0,15.707964,12042.0,64.0,15808.0,15808.0,0.0,55.092253,0.0,2272.0,10096.0,16.775545,0.0,57.129961,0.0,5.545177,8.788898,0.0,24.0,3.384287,15.567281,4.212712,15808.0,16.0,3680.0,15.21739,0.0,0.0,0.0,0.0,20.943953,16056.0,3904576.0,3904576.0,0.0,13607.786526,0.0,561184.0,2493712.0,4143.559628,0.0,14111.100362,0.0,1369.658829,2170.857882,0.0,5928.0,835.918854,3845.118455,1040.539931,3904576.0,3952.0,908960.0,3758.695289,0.0,0.0,0.0,0.0,5173.156282,3965832.0,3904576.0,0.0,13607.786526,0.0,561184.0,2493712.0,4143.559628,0.0,14111.100362,0.0,1369.658829,2170.857882,0.0,5928.0,835.918854,3845.118455,1040.539931,3904576.0,3952.0,908960.0,3758.695289,0.0,0.0,0.0,0.0,5173.156282,3965832.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47.424318,0.0,1955.774986,8690.802933,14.440665,0.0,49.178411,0.0,4.77338,7.565628,0.0,20.659595,2.91325,13.400572,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,12.109011
2,60.0,68.0,9.328212,7.0,5.0,2001.0,2002.0,1.807264,6.188264,0.0,434.0,920.0,2.057383,6.765039,7.488294,0.526589,0.0,1.098612,0.693147,3.0,0.423036,1.94591,0.526589,2001.0,2.0,608.0,0.0,3.7612,0.0,0.0,0.0,4.712389,2008.0,4624.0,634.318436,476.0,340.0,136068.0,136136.0,122.893931,420.80196,0.0,29512.0,62560.0,139.902012,460.02265,509.203959,35.808054,0.0,74.705636,47.134008,204.0,28.766438,132.32189,35.808054,136068.0,136.0,41344.0,0.0,255.761608,0.0,0.0,0.0,320.442451,136544.0,87.015545,65.297486,46.641061,18665.752797,18675.08101,16.858539,57.725441,0.0,4048.444135,8581.955309,19.191701,63.10572,69.852392,4.912134,0.0,10.248089,6.465824,27.984637,3.946168,18.151863,4.912134,18665.752797,18.656425,5671.553074,0.0,35.085273,0.0,0.0,0.0,43.958165,18731.050283,49.0,35.0,14007.0,14014.0,12.650846,43.317849,0.0,3038.0,6440.0,14.401678,47.355273,52.418055,3.686123,0.0,7.690286,4.85203,21.0,2.961251,13.621371,3.686123,14007.0,14.0,4256.0,0.0,26.328401,0.0,0.0,0.0,32.986723,14056.0,25.0,10005.0,10010.0,9.036318,30.941321,0.0,2170.0,4600.0,10.286913,33.825195,37.441468,2.632945,0.0,5.493061,3.465736,15.0,2.115179,9.729551,2.632945,10005.0,10.0,3040.0,0.0,18.806001,0.0,0.0,0.0,23.561945,10040.0,4004001.0,4006002.0,3616.334641,12382.71651,0.0,868434.0,1840920.0,4116.822433,13536.842993,14984.075324,1053.704657,0.0,2198.32319,1386.987508,6003.0,846.49475,3893.766208,1053.704657,4004001.0,4002.0,1216608.0,0.0,7526.161432,0.0,0.0,0.0,9429.490374,4018008.0,4008004.0,3618.141905,12388.904774,0.0,868868.0,1841840.0,4118.879816,13543.608032,14991.563617,1054.231246,0.0,2199.421802,1387.680655,6006.0,846.917786,3895.712118,1054.231246,4006002.0,4004.0,1217216.0,0.0,7529.922632,0.0,0.0,0.0,9434.202763,4020016.0,3.266202,11.183825,0.0,784.352441,1662.682594,3.718233,12.226209,13.533321,0.951685,0.0,1.985482,1.2527,5.421791,0.764537,3.516773,0.951685,3616.334641,3.614527,1098.816323,0.0,6.79748,0.0,0.0,0.0,8.51653,3628.985487,38.294613,0.0,2685.706629,5693.202993,12.731626,41.863848,46.339538,3.258672,0.0,6.798503,4.289378,18.564792,2.617858,12.041806,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,12.317167
3,70.0,60.0,9.164401,7.0,5.0,1915.0,1970.0,0.0,5.379897,0.0,540.0,756.0,2.062933,6.629363,7.448916,0.526589,0.0,0.693147,0.0,3.0,0.423036,2.079442,0.526589,1998.0,3.0,642.0,0.0,3.583519,1.888504,0.0,0.0,1.047198,2006.0,3600.0,549.864068,420.0,300.0,114900.0,118200.0,0.0,322.793841,0.0,32400.0,45360.0,123.77597,397.761795,446.934966,31.595342,0.0,41.588831,0.0,180.0,25.382151,124.766493,31.595342,119880.0,180.0,38520.0,0.0,215.011136,113.310224,0.0,0.0,62.831855,120360.0,83.986248,64.150808,45.822006,17549.828183,18053.870246,0.0,49.303537,0.0,4948.776616,6928.287262,18.905544,60.754144,68.264855,4.825873,0.0,6.352279,0.0,27.493203,3.87687,19.056836,4.825873,18310.473478,27.493203,5883.545532,0.0,32.840805,17.307006,0.0,0.0,9.596939,18383.788687,49.0,35.0,13405.0,13790.0,0.0,37.659281,0.0,3780.0,5292.0,14.44053,46.405543,52.142413,3.686123,0.0,4.85203,0.0,21.0,2.961251,14.556091,3.686123,13986.0,21.0,4494.0,0.0,25.084633,13.219526,0.0,0.0,7.330383,14042.0,25.0,9575.0,9850.0,0.0,26.899487,0.0,2700.0,3780.0,10.314664,33.146816,37.244581,2.632945,0.0,3.465736,0.0,15.0,2.115179,10.397208,2.632945,9990.0,15.0,3210.0,0.0,17.917595,9.442519,0.0,0.0,5.235988,10030.0,3667225.0,3772550.0,0.0,10302.503432,0.0,1034100.0,1447740.0,3950.516362,12695.23063,14264.674336,1008.418,0.0,1327.376851,0.0,5745.0,810.113666,3982.130552,1008.418,3826170.0,5745.0,1229430.0,0.0,6862.438767,3616.484663,0.0,0.0,2005.383366,3841490.0,3880900.0,0.0,10598.397786,0.0,1063800.0,1489320.0,4063.977667,13059.845609,14674.364722,1037.380397,0.0,1365.499946,0.0,5910.0,833.380639,4096.499837,1037.380397,3936060.0,5910.0,1264740.0,0.0,7059.532309,3720.352369,0.0,0.0,2062.979233,3951820.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.943296,0.0,2905.144571,4067.202399,11.098367,35.665294,40.074404,2.832995,0.0,3.729061,0.0,16.139692,2.275889,11.187182,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,11.849398
4,60.0,84.0,9.565284,8.0,5.0,2000.0,2000.0,1.925822,6.486161,0.0,490.0,1145.0,2.084931,6.960348,7.695758,0.526589,0.0,1.098612,0.693147,4.0,0.423036,2.302585,0.526589,2000.0,3.0,836.0,1.83461,4.442651,0.0,0.0,0.0,6.283185,2008.0,7056.0,803.483841,672.0,420.0,168000.0,168000.0,161.769052,544.837506,0.0,41160.0,96180.0,175.134167,584.669209,646.443671,44.233479,0.0,92.283432,58.224363,336.0,35.535012,193.417148,44.233479,168000.0,252.0,70224.0,154.107226,373.182706,0.0,0.0,0.0,527.78758,168672.0,91.494655,76.522271,47.826419,19130.567635,19130.567635,18.421034,62.041969,0.0,4686.989071,10952.249971,19.942953,66.577701,73.612109,5.036974,0.0,10.508538,6.63015,38.261135,4.046458,22.02488,5.036974,19130.567635,28.695851,7996.577272,17.548564,42.49522,0.0,0.0,0.0,60.100452,19207.089906,64.0,40.0,16000.0,16000.0,15.406576,51.889286,0.0,3920.0,9160.0,16.679444,55.682782,61.566064,4.212712,0.0,8.788898,5.545177,32.0,3.384287,18.420681,4.212712,16000.0,24.0,6688.0,14.676879,35.54121,0.0,0.0,0.0,50.265484,16064.0,25.0,10000.0,10000.0,9.62911,32.430804,0.0,2450.0,5725.0,10.424653,34.801739,38.47879,2.632945,0.0,5.493061,3.465736,20.0,2.115179,11.512925,2.632945,10000.0,15.0,4180.0,9.173049,22.213256,0.0,0.0,0.0,31.415927,10040.0,4000000.0,4000000.0,3851.64409,12972.321578,0.0,980000.0,2290000.0,4169.861123,13920.695458,15391.515981,1053.178068,0.0,2197.224577,1386.294361,8000.0,846.071714,4605.170186,1053.178068,4000000.0,6000.0,1672000.0,3669.219671,8885.302513,0.0,0.0,0.0,12566.370964,4016000.0,4000000.0,3851.64409,12972.321578,0.0,980000.0,2290000.0,4169.861123,13920.695458,15391.515981,1053.178068,0.0,2197.224577,1386.294361,8000.0,846.071714,4605.170186,1053.178068,4000000.0,6000.0,1672000.0,3669.219671,8885.302513,0.0,0.0,0.0,12566.370964,4016000.0,3.708791,12.491191,0.0,943.652802,2205.066242,4.015205,13.404391,14.82066,1.014117,0.0,2.115732,1.334878,7.703288,0.814692,4.434369,1.014117,3851.64409,5.777466,1609.98723,3.533132,8.555756,0.0,0.0,0.0,12.100297,3867.050666,42.070282,0.0,3178.218787,7426.654103,13.523195,45.145935,49.915924,3.415541,0.0,7.125776,4.495864,25.944643,2.743879,14.934937,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,12.429216


# rename target col

In [81]:
# rename last column to target
df_train.rename(columns={df_train.columns[-1]: 'target'}, inplace=True)
df_train["target"]

0       12.247694
1       12.109011
2       12.317167
3       11.849398
4       12.429216
          ...    
1455    12.072541
1456    12.254863
1457    12.493130
1458    11.864462
1459    11.901583
Name: target, Length: 1460, dtype: float64

# setup pycaret

In [82]:
s = setup(data=df_train, target='target')

Unnamed: 0,Description,Value
0,Session id,6965
1,Target,target
2,Target type,Regression
3,Data shape,"(1460, 824)"
4,Train data shape,"(1021, 824)"
5,Test data shape,"(439, 824)"
6,Numeric features,823
7,Preprocess,True
8,Imputation type,simple
9,Numeric imputation,mean


# Compare different models

In [85]:
best = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
gbr,Gradient Boosting Regressor,0.0915,0.018,0.1323,0.8887,0.0103,0.0077,2.093
lightgbm,Light Gradient Boosting Machine,0.0913,0.0181,0.1326,0.888,0.0103,0.0076,0.917
et,Extra Trees Regressor,0.0945,0.0196,0.1385,0.8796,0.0108,0.0079,1.767
rf,Random Forest Regressor,0.0949,0.0196,0.1387,0.8794,0.0108,0.008,2.356
omp,Orthogonal Matching Pursuit,0.0855,0.0197,0.1342,0.8791,0.0103,0.0072,0.05
xgboost,Extreme Gradient Boosting,0.1048,0.0231,0.1501,0.8571,0.0117,0.0088,1.891
lasso,Lasso Regression,0.0905,0.0263,0.1467,0.8412,0.0112,0.0076,0.115
ada,AdaBoost Regressor,0.1178,0.0264,0.1615,0.8379,0.0125,0.0098,0.877
en,Elastic Net,0.0916,0.0308,0.153,0.8155,0.0115,0.0077,0.12
lr,Linear Regression,0.1217,0.0298,0.1711,0.8117,0.0133,0.0102,0.081


In [88]:
result = cross_val_score(best, 
                        X_train, 
                        y_train, 
                        scoring="neg_mean_squared_error",
                        cv=KFold(n_splits=10))
print(np.exp(np.sqrt(-result)).mean())

1.1400376180672283


In [89]:
pred = np.exp(best.predict(X_test))
pd.DataFrame(pred, 
            index=range(1461, len(df)+1), 
            columns=['SalePrice']).reset_index().rename(columns={'index': 'id'}).\
                to_csv(f'data/submission{suffix}_best.csv', index=False)

# train ensemble of best models 

In [67]:
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, ExtraTreesRegressor
from sklearn.linear_model import BayesianRidge, OrthogonalMatchingPursuit, LinearRegression, Lasso, Ridge
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

# Bagging ensemble

In [68]:
models = {
    "gbr": GradientBoostingRegressor(verbose=0),
    "rf": RandomForestRegressor(),
    "xgb": XGBRegressor(),
    "lgbm": LGBMRegressor(),
    "et": ExtraTreesRegressor(),
    "br": BayesianRidge(),
    "omp": OrthogonalMatchingPursuit(),
}

# Evaluate

In [69]:
results = {}
for name, model in models.items():
    result = cross_val_score(model, 
                            X_train, 
                            y_train, 
                            scoring="neg_mean_squared_error",
                            cv=KFold(n_splits=10))
    results[name] = np.exp(np.sqrt(-result))

In [70]:
results

{'gbr': array([1.13605481, 1.11290607, 1.11881022, 1.19309385, 1.16995792,
        1.12717144, 1.14129604, 1.11635698, 1.13679067, 1.14575931]),
 'rf': array([1.1506536 , 1.11996163, 1.13269635, 1.18766705, 1.17685496,
        1.1305786 , 1.13390794, 1.12806345, 1.16677043, 1.15493304]),
 'xgb': array([1.16505496, 1.13470109, 1.16390057, 1.20583595, 1.1756001 ,
        1.14220334, 1.1616369 , 1.13218538, 1.14695166, 1.17433248]),
 'lgbm': array([1.13734728, 1.11305822, 1.12109834, 1.18623983, 1.17283698,
        1.12227567, 1.13192154, 1.11575235, 1.14459544, 1.15264308]),
 'et': array([1.14227902, 1.11448389, 1.13231433, 1.18086023, 1.1727946 ,
        1.11913407, 1.12905718, 1.11802082, 1.1476179 , 1.14763828]),
 'br': array([1.11939883, 1.10163111, 1.11191488, 1.18333471, 1.14887638,
        1.13620639, 1.12157118, 1.10356167, 1.24016792, 1.13784051]),
 'omp': array([1.12058136, 1.10528201, 1.11255767, 1.18669344, 1.15994902,
        1.16590421, 1.12968398, 1.10848162, 1.24121157, 1

In [71]:
means = []
for name, result in results.items():
    means.append(result.mean())
    print(f"{name}: {result.mean():.5f}")
top_model = [name for _,name in sorted(zip(means, models.keys()))]
print(top_model)

gbr: 1.13982
rf: 1.14821
xgb: 1.16024
lgbm: 1.13978
et: 1.14042
br: 1.14045
omp: 1.14706
['lgbm', 'gbr', 'et', 'br', 'omp', 'rf', 'xgb']


# Combine predictions

In [72]:
n_models = 2
pred = np.zeros(X_test.shape[0])
for model in top_model[:n_models]:
    models[model].fit(X_train, y_train)
    pred += np.exp(models[model].predict(X_test)) / n_models

# Save submission

In [73]:
y_submission = pred
pd.DataFrame(y_submission, 
            index=range(1461, len(df)+1), 
            columns=['SalePrice']).reset_index().rename(columns={'index': 'id'}).to_csv(f'data/submission{suffix}.csv', index=False)