In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/house-prices-advanced-regression-techniques/sample_submission.csv
/kaggle/input/house-prices-advanced-regression-techniques/data_description.txt
/kaggle/input/house-prices-advanced-regression-techniques/train.csv
/kaggle/input/house-prices-advanced-regression-techniques/test.csv


In [16]:
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor, Pool
from tqdm import tqdm

# Homework description

Take data from kaggle housing
1. Take your best prediction
2. Try pseudo-labeling
3. Try noisy student approach

Hypertuned CatBoostRegressor provided best score on Kaggle so far: 0.12548 public score (from Homework #5)

depth=6, n_estimators=465, learning_rate=0.06

Let's try to create bagging ensemble of 20 CatBoostRegressors with such hyperparameters first and see if the result will be better then currently the best one

No features will be transformed from numerical to categorical

In [2]:
train_data = pd.read_csv("/kaggle/input/house-prices-advanced-regression-techniques/train.csv")
test_data = pd.read_csv("/kaggle/input/house-prices-advanced-regression-techniques/test.csv")

In [57]:
numeric_columns = [i for i, j in zip(train_data.columns, train_data.dtypes) if j in [np.int64, np.float64] and i not in ['SalePrice', 'Id']]
categorical_columns = [i for i, j in zip(train_data.columns, train_data.dtypes) if j not in [np.int64, np.float64]]

train_data[categorical_columns] = train_data[categorical_columns].fillna("Other")
train_data[numeric_columns] = train_data[numeric_columns].fillna(-1)
test_data[categorical_columns] = test_data[categorical_columns].fillna("Other")
test_data[numeric_columns] = test_data[numeric_columns].fillna(-1)
x_train = train_data[numeric_columns + categorical_columns]
x_test = test_data[numeric_columns + categorical_columns]

y_train = np.log(train_data['SalePrice'])

In [9]:
y_pred = []

M = 20
TEST_SIZE = 0.2

models = [CatBoostRegressor(verbose=False, depth=6, n_estimators=465, learning_rate=0.06) for i in range(M)]

for k, model in enumerate(tqdm(models)):
    x_tr, _, y_tr, _ = train_test_split(x_train, y_train, test_size=TEST_SIZE, random_state=k)
    pool_train = Pool(x_tr, y_tr, cat_features=categorical_columns)
    pool_test = Pool(x_test, cat_features=categorical_columns)
    model.fit(pool_train)
    y_pred.append(model.predict(pool_test)) 
    
test_targets = np.exp(np.mean(y_pred, axis=0))
    
submit_bagging = pd.DataFrame()
submit_bagging['Id'] = test_data['Id']
submit_bagging['SalePrice'] = test_targets
print(submit_bagging)

submit_bagging.to_csv('/kaggle/working/hypertuned_20_catboosts_bagging_point2_test_split.csv', index=False)

100%|██████████| 20/20 [03:20<00:00, 10.02s/it]

        Id      SalePrice
0     1461  117963.972322
1     1462  157796.649545
2     1463  183460.097828
3     1464  192129.425290
4     1465  189206.119503
...    ...            ...
1454  2915   83811.534474
1455  2916   83967.171190
1456  2917  163140.641144
1457  2918  116515.686215
1458  2919  219268.905765

[1459 rows x 2 columns]





Above ensemble scored with 0.12297 public score which is better than previous high score.

Now with test size = 0.1


In [25]:
y_pred2 = []

TEST_SIZE = 0.1

models = [CatBoostRegressor(verbose=False, depth=6, n_estimators=465, learning_rate=0.06) for i in range(M)]

for k, model in enumerate(tqdm(models)):
    x_tr, _, y_tr, _ = train_test_split(x_train, y_train, test_size=TEST_SIZE, random_state=k)
    pool_train = Pool(x_tr, y_tr, cat_features=categorical_columns)
    pool_test = Pool(x_test, cat_features=categorical_columns)
    model.fit(pool_train)
    y_pred2.append(model.predict(pool_test)) 
    
test_targets2 = np.exp(np.mean(y_pred2, axis=0))
    
submit_bagging2 = pd.DataFrame()
submit_bagging2['Id'] = test_data['Id']
submit_bagging2['SalePrice'] = test_targets2
print(submit_bagging2)

submit_bagging2.to_csv('/kaggle/working/hypertuned_20_catboosts_bagging_point1_test_split.csv', index=False)

100%|██████████| 20/20 [03:29<00:00, 10.49s/it]

        Id      SalePrice
0     1461  117778.201319
1     1462  157380.532174
2     1463  183697.289624
3     1464  191683.811679
4     1465  188308.457056
...    ...            ...
1454  2915   83998.271852
1455  2916   83818.862894
1456  2917  163618.786982
1457  2918  116526.724487
1458  2919  221166.170050

[1459 rows x 2 columns]





Test size = 0.1 helped to achieve even better public score: 0.12252

In [26]:
y_pred3 = []

TEST_SIZE = 0.05

models = [CatBoostRegressor(verbose=False, depth=6, n_estimators=465, learning_rate=0.06) for i in range(M)]

for k, model in enumerate(tqdm(models)):
    x_tr, _, y_tr, _ = train_test_split(x_train, y_train, test_size=TEST_SIZE, random_state=k)
    pool_train = Pool(x_tr, y_tr, cat_features=categorical_columns)
    pool_test = Pool(x_test, cat_features=categorical_columns)
    model.fit(pool_train)
    y_pred3.append(model.predict(pool_test)) 
    
test_targets3 = np.exp(np.mean(y_pred3, axis=0))
    
submit_bagging3 = pd.DataFrame()
submit_bagging3['Id'] = test_data['Id']
submit_bagging3['SalePrice'] = test_targets3
print(submit_bagging3)

submit_bagging3.to_csv('/kaggle/working/hypertuned_20_catboosts_bagging_point05_test_split.csv', index=False)

100%|██████████| 20/20 [03:27<00:00, 10.38s/it]

        Id      SalePrice
0     1461  117322.385497
1     1462  156434.997498
2     1463  185267.941341
3     1464  192854.722216
4     1465  186536.924132
...    ...            ...
1454  2915   84129.796504
1455  2916   83835.703668
1456  2917  163518.741341
1457  2918  115262.171182
1458  2919  218610.678871

[1459 rows x 2 columns]





Decreasing test size to 0.05 led to (once again) better public score: 0.12206

### Pseudo-labeling

In [58]:
# Add small noise into test data in a naive way (some of features marked as numeric are indeed categorical + is adding gaussian noise to integer features actually ok?)

mu, sigma = 0, 0.1
x_test_noise = np.random.normal(mu, sigma, [x_test[numeric_columns].shape[0], x_test[numeric_columns].shape[1]])
print(x_test_noise)

[[-0.06842232  0.12753385  0.03372705 ...  0.00083066  0.05923981
  -0.04419231]
 [-0.14130591 -0.18630972 -0.00788724 ... -0.06577228 -0.01146207
  -0.05498295]
 [-0.0903799   0.00292425  0.01902806 ... -0.19040073 -0.0036391
  -0.06787715]
 ...
 [-0.01931702 -0.04084132 -0.0092156  ... -0.10523636  0.10983165
  -0.11015892]
 [-0.04112879 -0.07369342 -0.05583205 ... -0.20370248  0.05042826
   0.06755908]
 [-0.09663089  0.00553424 -0.00743605 ... -0.03590264 -0.04623228
   0.00459231]]


In [59]:
x_test_with_noise_only_numeric = x_test[numeric_columns].add(x_test_noise)
x_test_with_noise_only_numeric

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold
0,19.982531,79.899160,11622.060420,5.104476,5.892204,1960.982031,1960.899791,-0.068245,468.137332,143.925993,...,730.108305,140.010865,-0.015466,-0.249374,0.141507,119.854618,0.123432,0.126994,6.139137,2010.031937
1,19.895882,80.914054,14267.024890,6.196708,6.243939,1958.063501,1958.136205,108.102152,923.124646,-0.168530,...,312.104208,392.840573,36.018326,0.011181,0.144343,-0.134929,0.132068,12500.033542,6.169430,2010.180999
2,59.997782,74.109877,13829.901750,4.980579,4.981760,1997.067720,1997.964467,0.003767,791.202736,0.045564,...,482.033281,212.059918,33.998076,0.061241,0.056476,-0.137348,-0.093361,0.100499,2.793589,2009.936158
3,60.041995,78.019383,9977.916582,6.039517,5.887540,1998.005386,1998.073876,20.077054,602.034272,-0.082253,...,469.944775,359.979975,35.796310,0.046981,-0.017503,0.044244,0.035779,0.081908,5.909417,2010.276533
4,120.028970,42.857917,5004.867762,8.060135,5.027041,1992.064337,1991.956733,0.147972,262.895629,0.137657,...,506.017151,0.013124,82.002617,0.124750,0.180190,143.975725,0.022608,0.045278,0.867247,2009.931632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,159.888495,21.006196,1936.094715,3.849836,7.106053,1969.959371,1970.024793,0.176569,0.080942,0.039146,...,0.034546,-0.060355,-0.052504,-0.033531,0.007570,0.096949,-0.080423,-0.054637,5.882058,2006.192757
1455,160.034686,20.903897,1894.189414,4.136896,4.909377,1969.905702,1969.869376,0.083226,252.178471,-0.048714,...,285.878617,-0.023740,24.041754,-0.131853,-0.108481,-0.191348,-0.220076,0.040330,3.855388,2006.113798
1456,19.967012,160.056096,19999.900762,5.015645,6.910174,1959.990160,1996.182373,-0.063737,1224.191589,-0.217492,...,576.185789,474.010862,0.107829,-0.024383,-0.097727,0.076429,-0.182209,0.069762,9.034610,2005.980332
1457,85.148309,61.990892,10440.873660,4.986973,4.995743,1992.010435,1992.012120,0.027295,337.012916,-0.058021,...,-0.121104,80.002153,32.021600,-0.025568,0.072406,0.071310,-0.129825,699.934691,6.989575,2006.038652


In [62]:
x_test_with_noise = pd.concat([x_test_with_noise_only_numeric, x_test[categorical_columns]], axis=1)
x_test_with_noise

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
0,19.982531,79.899160,11622.060420,5.104476,5.892204,1960.982031,1960.899791,-0.068245,468.137332,143.925993,...,Attchd,Unf,TA,TA,Y,Other,MnPrv,Other,WD,Normal
1,19.895882,80.914054,14267.024890,6.196708,6.243939,1958.063501,1958.136205,108.102152,923.124646,-0.168530,...,Attchd,Unf,TA,TA,Y,Other,Other,Gar2,WD,Normal
2,59.997782,74.109877,13829.901750,4.980579,4.981760,1997.067720,1997.964467,0.003767,791.202736,0.045564,...,Attchd,Fin,TA,TA,Y,Other,MnPrv,Other,WD,Normal
3,60.041995,78.019383,9977.916582,6.039517,5.887540,1998.005386,1998.073876,20.077054,602.034272,-0.082253,...,Attchd,Fin,TA,TA,Y,Other,Other,Other,WD,Normal
4,120.028970,42.857917,5004.867762,8.060135,5.027041,1992.064337,1991.956733,0.147972,262.895629,0.137657,...,Attchd,RFn,TA,TA,Y,Other,Other,Other,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,159.888495,21.006196,1936.094715,3.849836,7.106053,1969.959371,1970.024793,0.176569,0.080942,0.039146,...,Other,Other,Other,Other,Y,Other,Other,Other,WD,Normal
1455,160.034686,20.903897,1894.189414,4.136896,4.909377,1969.905702,1969.869376,0.083226,252.178471,-0.048714,...,CarPort,Unf,TA,TA,Y,Other,Other,Other,WD,Abnorml
1456,19.967012,160.056096,19999.900762,5.015645,6.910174,1959.990160,1996.182373,-0.063737,1224.191589,-0.217492,...,Detchd,Unf,TA,TA,Y,Other,Other,Other,WD,Abnorml
1457,85.148309,61.990892,10440.873660,4.986973,4.995743,1992.010435,1992.012120,0.027295,337.012916,-0.058021,...,Other,Other,Other,Other,Y,Other,MnPrv,Shed,WD,Normal


Get test's targets for data with noise

Approach #1 - predict new test targets (pseudo-labels) based on features with noise - don't apply noise to target itself

In [69]:
y_pred_for_x_test_with_noise = []

TEST_SIZE = 0.05

models = [CatBoostRegressor(verbose=False, depth=6, n_estimators=465, learning_rate=0.06) for i in range(M)]

for k, model in enumerate(tqdm(models)):
    x_tr, _, y_tr, _ = train_test_split(x_train, y_train, test_size=TEST_SIZE, random_state=k)
    pool_train = Pool(x_tr, y_tr, cat_features=categorical_columns)
    pool_test = Pool(x_test_with_noise, cat_features=categorical_columns)
    model.fit(pool_train)
    y_pred_for_x_test_with_noise.append(model.predict(pool_test)) 
    
# important! don't use exponent here
test_target_with_noise = np.mean(y_pred_for_x_test_with_noise, axis=0)

In [65]:
x_train_combined = x_train.append(x_test_with_noise, ignore_index=True)
x_train_combined

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,PoolQC,Fence,MiscFeature,SaleType,SaleCondition
0,60.000000,65.000000,8450.000000,7.000000,5.000000,2003.000000,2003.000000,196.000000,706.000000,0.000000,...,Attchd,RFn,TA,TA,Y,Other,Other,Other,WD,Normal
1,20.000000,80.000000,9600.000000,6.000000,8.000000,1976.000000,1976.000000,0.000000,978.000000,0.000000,...,Attchd,RFn,TA,TA,Y,Other,Other,Other,WD,Normal
2,60.000000,68.000000,11250.000000,7.000000,5.000000,2001.000000,2002.000000,162.000000,486.000000,0.000000,...,Attchd,RFn,TA,TA,Y,Other,Other,Other,WD,Normal
3,70.000000,60.000000,9550.000000,7.000000,5.000000,1915.000000,1970.000000,0.000000,216.000000,0.000000,...,Detchd,Unf,TA,TA,Y,Other,Other,Other,WD,Abnorml
4,60.000000,84.000000,14260.000000,8.000000,5.000000,2000.000000,2000.000000,350.000000,655.000000,0.000000,...,Attchd,RFn,TA,TA,Y,Other,Other,Other,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2914,159.888495,21.006196,1936.094715,3.849836,7.106053,1969.959371,1970.024793,0.176569,0.080942,0.039146,...,Other,Other,Other,Other,Y,Other,Other,Other,WD,Normal
2915,160.034686,20.903897,1894.189414,4.136896,4.909377,1969.905702,1969.869376,0.083226,252.178471,-0.048714,...,CarPort,Unf,TA,TA,Y,Other,Other,Other,WD,Abnorml
2916,19.967012,160.056096,19999.900762,5.015645,6.910174,1959.990160,1996.182373,-0.063737,1224.191589,-0.217492,...,Detchd,Unf,TA,TA,Y,Other,Other,Other,WD,Abnorml
2917,85.148309,61.990892,10440.873660,4.986973,4.995743,1992.010435,1992.012120,0.027295,337.012916,-0.058021,...,Other,Other,Other,Other,Y,Other,MnPrv,Shed,WD,Normal


In [71]:
test_target_with_noise = pd.Series(data=test_target_with_noise, name='SalePrice')
test_target_with_noise

0       11.672681
1       11.960642
2       12.129558
3       12.169692
4       12.136384
          ...    
1454    11.340116
1455    11.336614
1456    12.004683
1457    11.655062
1458    12.295048
Name: SalePrice, Length: 1459, dtype: float64

In [72]:
y_train_combined = y_train.append(test_target_with_noise, ignore_index=True)
y_train_combined

0       12.247694
1       12.109011
2       12.317167
3       11.849398
4       12.429216
          ...    
2914    11.340116
2915    11.336614
2916    12.004683
2917    11.655062
2918    12.295048
Name: SalePrice, Length: 2919, dtype: float64

In [81]:
y_pred4 = []

TEST_SIZE = 0.05

models = [CatBoostRegressor(verbose=False, depth=6, n_estimators=465, learning_rate=0.06) for i in range(M)]

for k, model in enumerate(tqdm(models)):
    x_tr, _, y_tr, _ = train_test_split(x_train_combined, y_train_combined, test_size=TEST_SIZE, random_state=k)
    pool_train = Pool(x_tr, y_tr, cat_features=categorical_columns)
    pool_test = Pool(x_test, cat_features=categorical_columns)
    model.fit(pool_train)
    y_pred4.append(model.predict(pool_test)) 
    
test_targets4 = np.exp(np.mean(y_pred4, axis=0))
    
submit_pseudolabeling = pd.DataFrame()
submit_pseudolabeling['Id'] = test_data['Id']
submit_pseudolabeling['SalePrice'] = test_targets4
print(submit_pseudolabeling)

submit_pseudolabeling.to_csv('/kaggle/working/pseudolabeling2.csv', index=False)

        Id      SalePrice
0     1461  118602.471575
1     1462  155363.068769
2     1463  186272.977740
3     1464  191085.684844
4     1465  186028.473926
...    ...            ...
1454  2915   83900.567863
1455  2916   83451.048089
1456  2917  164943.857252
1457  2918  117182.517799
1458  2919  218696.336499

[1459 rows x 2 columns]


The public score achieved by using pseudo-labeling approach used above: 0.12102

It is know the best score achieved

### Noisy Student based on the following paper: https://arxiv.org/pdf/1911.04252.pdf

In [12]:
def load_and_preprocess():
    train_data = pd.read_csv("/kaggle/input/house-prices-advanced-regression-techniques/train.csv")
    test_data = pd.read_csv("/kaggle/input/house-prices-advanced-regression-techniques/test.csv")

    numeric_columns = [i for i, j in zip(train_data.columns, train_data.dtypes) if j in [np.int64, np.float64] and i not in ['SalePrice', 'Id']]
    categorical_columns = [i for i, j in zip(train_data.columns, train_data.dtypes) if j not in [np.int64, np.float64]]

    train_data[categorical_columns] = train_data[categorical_columns].fillna("Other")
    train_data[numeric_columns] = train_data[numeric_columns].fillna(-1)
    test_data[categorical_columns] = test_data[categorical_columns].fillna("Other")
    test_data[numeric_columns] = test_data[numeric_columns].fillna(-1)
    x_train = train_data[numeric_columns + categorical_columns]
    x_test = test_data[numeric_columns + categorical_columns]

    y_train = np.log(train_data['SalePrice'])
    
    return x_train, y_train, x_test, numeric_columns, categorical_columns

In [91]:
MU, SIGMA = 0, 0.1
TEST_SIZE = 0.05
M = 20

def train_models(x_train, y_train, categorical_columns, print_text):
    models = [CatBoostRegressor(verbose=False, depth=6, n_estimators=465, learning_rate=0.06) for i in range(M)]
    print(print_text)
    for k, model in enumerate(tqdm(models)):
        x_tr, _, y_tr, _ = train_test_split(x_train, y_train, test_size=TEST_SIZE, random_state=k)
        pool_train = Pool(x_tr, y_tr, cat_features=categorical_columns)
        model.fit(pool_train)
        
    return models

def generate_pseudo_labels(x_test, categorical_columns, models):
    pseudolabels = []
    for k, model in enumerate(models):
        pool_test = Pool(x_test, cat_features=categorical_columns)
        pseudolabels.append(model.predict(pool_test)) 

    # important! don't use exponent here
    pseudolabels = np.mean(pseudolabels, axis=0)
    
    return np.array(pseudolabels)

def add_noise(x_test, pseudo_labels):
    x_test_noise = np.random.normal(MU, SIGMA, [x_test[numeric_columns].shape[0], x_test[numeric_columns].shape[1]])
    x_test_with_noise_only_numeric = x_test[numeric_columns].add(x_test_noise)
    x_test_with_noise = pd.concat([x_test_with_noise_only_numeric, x_test[categorical_columns]], axis=1)
    
    pseudo_labels_noise = np.random.normal(MU, SIGMA, [pseudo_labels.shape[0]])
    pseudo_labels_with_noise = pseudo_labels + pseudo_labels_noise
    
    return x_test_with_noise, pseudo_labels_with_noise

def combine_data(x_train, x_test_with_noise, y_train, pseudo_labels_with_noise):
    x_train_combined = x_train.append(x_test_with_noise, ignore_index=True)
    pseudo_labels_with_noise = pd.Series(data=pseudo_labels_with_noise, name='SalePrice')
    y_train_combined = y_train.append(pseudo_labels_with_noise, ignore_index=True)
    
    return x_train_combined, y_train_combined

def student_predict(x_test, categorical_columns, models):
    y_preds = []
    print('Final prediction...')
    for k, model in enumerate(tqdm(models)):
        pool_test = Pool(x_test, cat_features=categorical_columns)
        y_preds.append(model.predict(pool_test)) 

    y_pred = np.exp(np.mean(y_preds, axis=0))
    
    return y_pred

def noisy_student(iterations):
    x_train, y_train, x_test, numeric_columns, categorical_columns = load_and_preprocess()
    teacher_models = train_models(x_train, y_train, categorical_columns, 'Learning on initial train data...')
    print('Iterative training...')
    for i in tqdm(range(iterations)):
        pseudo_labels = generate_pseudo_labels(x_test, categorical_columns, teacher_models)
        # Approach #2 - Apply noise to both test data and pseudo-labels
        x_test_with_noise, pseudo_labels_with_noise = add_noise(x_test, pseudo_labels)
        x_train, y_train = combine_data(x_train, x_test_with_noise, y_train, pseudo_labels_with_noise)
        student_models = train_models(x_train, y_train, categorical_columns, 'Learning on data with noise...')
        teacher_models = student_models
    y_pred = student_predict(x_test, categorical_columns, teacher_models)
    
    return y_pred

In [48]:
y_pred = noisy_student(10)
y_pred

Learning on initial train data...


100%|██████████| 20/20 [03:05<00:00,  9.28s/it]


Iterative training...


  0%|          | 0/10 [00:00<?, ?it/s]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:15<04:58, 15.72s/it][A
 10%|█         | 2/20 [00:31<04:44, 15.83s/it][A
 15%|█▌        | 3/20 [00:47<04:29, 15.85s/it][A
 20%|██        | 4/20 [01:03<04:11, 15.74s/it][A
 25%|██▌       | 5/20 [01:19<03:57, 15.83s/it][A
 30%|███       | 6/20 [01:34<03:40, 15.76s/it][A
 35%|███▌      | 7/20 [01:50<03:25, 15.78s/it][A
 40%|████      | 8/20 [02:06<03:08, 15.69s/it][A
 45%|████▌     | 9/20 [02:21<02:53, 15.73s/it][A
 50%|█████     | 10/20 [02:37<02:36, 15.69s/it][A
 55%|█████▌    | 11/20 [02:53<02:21, 15.77s/it][A
 60%|██████    | 12/20 [03:08<02:05, 15.70s/it][A
 65%|██████▌   | 13/20 [03:24<01:49, 15.70s/it][A
 70%|███████   | 14/20 [03:40<01:34, 15.74s/it][A
 75%|███████▌  | 15/20 [03:55<01:18, 15.61s/it][A
 80%|████████  | 16/20 [04:11<01:02, 15.63s/it][A
 85%|████████▌ | 17/20 [04:27<00:46, 15.66s/it][A
 90%|█████████ | 18/20 [04:43<00:31, 15.73s/it][A
 95%|█████████▌| 19/20 [04:58<00:15, 15.66s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:19<06:02, 19.08s/it][A
 10%|█         | 2/20 [00:37<05:38, 18.83s/it][A
 15%|█▌        | 3/20 [00:56<05:20, 18.86s/it][A
 20%|██        | 4/20 [01:15<05:01, 18.85s/it][A
 25%|██▌       | 5/20 [01:34<04:44, 18.96s/it][A
 30%|███       | 6/20 [01:53<04:26, 19.04s/it][A
 35%|███▌      | 7/20 [02:12<04:06, 18.95s/it][A
 40%|████      | 8/20 [02:31<03:48, 19.05s/it][A
 45%|████▌     | 9/20 [02:50<03:28, 18.94s/it][A
 50%|█████     | 10/20 [03:10<03:11, 19.11s/it][A
 55%|█████▌    | 11/20 [03:29<02:51, 19.11s/it][A
 60%|██████    | 12/20 [03:47<02:32, 19.01s/it][A
 65%|██████▌   | 13/20 [04:07<02:14, 19.15s/it][A
 70%|███████   | 14/20 [04:26<01:54, 19.12s/it][A
 75%|███████▌  | 15/20 [04:46<01:36, 19.27s/it][A
 80%|████████  | 16/20 [05:05<01:17, 19.32s/it][A
 85%|████████▌ | 17/20 [05:24<00:57, 19.31s/it][A
 90%|█████████ | 18/20 [05:43<00:38, 19.18s/it][A
 95%|█████████▌| 19/20 [06:02<00:19, 19.22s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:22<07:02, 22.22s/it][A
 10%|█         | 2/20 [00:44<06:40, 22.23s/it][A
 15%|█▌        | 3/20 [01:06<06:20, 22.36s/it][A
 20%|██        | 4/20 [01:29<05:58, 22.41s/it][A
 25%|██▌       | 5/20 [01:52<05:38, 22.57s/it][A
 30%|███       | 6/20 [02:14<05:13, 22.41s/it][A
 35%|███▌      | 7/20 [02:36<04:51, 22.46s/it][A
 40%|████      | 8/20 [02:59<04:29, 22.46s/it][A
 45%|████▌     | 9/20 [03:21<04:06, 22.37s/it][A
 50%|█████     | 10/20 [03:43<03:42, 22.26s/it][A
 55%|█████▌    | 11/20 [04:06<03:21, 22.34s/it][A
 60%|██████    | 12/20 [04:28<02:57, 22.21s/it][A
 65%|██████▌   | 13/20 [04:50<02:36, 22.34s/it][A
 70%|███████   | 14/20 [05:13<02:14, 22.36s/it][A
 75%|███████▌  | 15/20 [05:35<01:51, 22.33s/it][A
 80%|████████  | 16/20 [05:57<01:28, 22.23s/it][A
 85%|████████▌ | 17/20 [06:19<01:06, 22.28s/it][A
 90%|█████████ | 18/20 [06:42<00:44, 22.35s/it][A
 95%|█████████▌| 19/20 [07:04<00:22, 22.29s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:25<07:56, 25.06s/it][A
 10%|█         | 2/20 [00:50<07:35, 25.33s/it][A
 15%|█▌        | 3/20 [01:16<07:13, 25.50s/it][A
 20%|██        | 4/20 [01:41<06:45, 25.35s/it][A
 25%|██▌       | 5/20 [02:06<06:19, 25.32s/it][A
 30%|███       | 6/20 [02:31<05:52, 25.17s/it][A
 35%|███▌      | 7/20 [02:57<05:28, 25.27s/it][A
 40%|████      | 8/20 [03:22<05:03, 25.26s/it][A
 45%|████▌     | 9/20 [03:47<04:38, 25.35s/it][A
 50%|█████     | 10/20 [04:12<04:12, 25.29s/it][A
 55%|█████▌    | 11/20 [04:38<03:48, 25.35s/it][A
 60%|██████    | 12/20 [05:04<03:23, 25.45s/it][A
 65%|██████▌   | 13/20 [05:38<03:16, 28.07s/it][A
 70%|███████   | 14/20 [06:04<02:44, 27.42s/it][A
 75%|███████▌  | 15/20 [06:30<02:16, 27.23s/it][A
 80%|████████  | 16/20 [06:57<01:47, 26.92s/it][A
 85%|████████▌ | 17/20 [07:22<01:19, 26.54s/it][A
 90%|█████████ | 18/20 [07:48<00:52, 26.39s/it][A
 95%|█████████▌| 19/20 [08:14<00:26, 26.27s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:28<09:08, 28.86s/it][A
 10%|█         | 2/20 [00:57<08:42, 29.01s/it][A
 15%|█▌        | 3/20 [01:27<08:14, 29.06s/it][A
 20%|██        | 4/20 [01:56<07:44, 29.05s/it][A
 25%|██▌       | 5/20 [02:25<07:18, 29.23s/it][A
 30%|███       | 6/20 [02:55<06:50, 29.31s/it][A
 35%|███▌      | 7/20 [03:24<06:20, 29.26s/it][A
 40%|████      | 8/20 [03:53<05:51, 29.28s/it][A
 45%|████▌     | 9/20 [04:22<05:21, 29.26s/it][A
 50%|█████     | 10/20 [04:52<04:53, 29.39s/it][A
 55%|█████▌    | 11/20 [05:21<04:24, 29.38s/it][A
 60%|██████    | 12/20 [05:51<03:54, 29.36s/it][A
 65%|██████▌   | 13/20 [06:20<03:25, 29.34s/it][A
 70%|███████   | 14/20 [06:49<02:56, 29.33s/it][A
 75%|███████▌  | 15/20 [07:25<02:36, 31.33s/it][A
 80%|████████  | 16/20 [07:55<02:02, 30.70s/it][A
 85%|████████▌ | 17/20 [08:24<01:30, 30.28s/it][A
 90%|█████████ | 18/20 [08:53<01:00, 30.06s/it][A
 95%|█████████▌| 19/20 [09:23<00:29, 29.82s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:32<10:16, 32.44s/it][A
 10%|█         | 2/20 [01:06<09:57, 33.18s/it][A
 15%|█▌        | 3/20 [01:39<09:25, 33.27s/it][A
 20%|██        | 4/20 [02:12<08:51, 33.22s/it][A
 25%|██▌       | 5/20 [02:44<08:13, 32.90s/it][A
 30%|███       | 6/20 [03:17<07:39, 32.83s/it][A
 35%|███▌      | 7/20 [03:49<07:04, 32.64s/it][A
 40%|████      | 8/20 [04:23<06:36, 33.07s/it][A
 45%|████▌     | 9/20 [04:56<06:01, 32.83s/it][A
 50%|█████     | 10/20 [05:28<05:26, 32.63s/it][A
 55%|█████▌    | 11/20 [06:00<04:52, 32.54s/it][A
 60%|██████    | 12/20 [06:33<04:19, 32.46s/it][A
 65%|██████▌   | 13/20 [07:05<03:47, 32.52s/it][A
 70%|███████   | 14/20 [07:46<03:30, 35.07s/it][A
 75%|███████▌  | 15/20 [08:19<02:51, 34.36s/it][A
 80%|████████  | 16/20 [08:52<02:15, 33.85s/it][A
 85%|████████▌ | 17/20 [09:24<01:40, 33.49s/it][A
 90%|█████████ | 18/20 [09:57<01:06, 33.27s/it][A
 95%|█████████▌| 19/20 [10:29<00:33, 33.04s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:35<11:19, 35.78s/it][A
 10%|█         | 2/20 [01:11<10:44, 35.83s/it][A
 15%|█▌        | 3/20 [01:47<10:11, 35.97s/it][A
 20%|██        | 4/20 [02:23<09:36, 36.00s/it][A
 25%|██▌       | 5/20 [02:59<09:00, 36.01s/it][A
 30%|███       | 6/20 [03:35<08:23, 35.95s/it][A
 35%|███▌      | 7/20 [04:11<07:47, 35.96s/it][A
 40%|████      | 8/20 [04:47<07:10, 35.86s/it][A
 45%|████▌     | 9/20 [05:23<06:35, 35.93s/it][A
 50%|█████     | 10/20 [05:59<06:00, 36.01s/it][A
 55%|█████▌    | 11/20 [06:40<05:38, 37.56s/it][A
 60%|██████    | 12/20 [07:17<04:59, 37.40s/it][A
 65%|██████▌   | 13/20 [07:53<04:19, 37.05s/it][A
 70%|███████   | 14/20 [08:30<03:41, 36.90s/it][A
 75%|███████▌  | 15/20 [09:07<03:04, 36.95s/it][A
 80%|████████  | 16/20 [09:44<02:27, 36.93s/it][A
 85%|████████▌ | 17/20 [10:21<01:50, 36.88s/it][A
 90%|█████████ | 18/20 [10:57<01:13, 36.68s/it][A
 95%|█████████▌| 19/20 [11:34<00:36, 36.94s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:39<12:34, 39.68s/it][A
 10%|█         | 2/20 [01:19<11:54, 39.71s/it][A
 15%|█▌        | 3/20 [01:59<11:16, 39.77s/it][A
 20%|██        | 4/20 [02:38<10:34, 39.67s/it][A
 25%|██▌       | 5/20 [03:17<09:49, 39.30s/it][A
 30%|███       | 6/20 [03:56<09:11, 39.39s/it][A
 35%|███▌      | 7/20 [04:39<08:46, 40.48s/it][A
 40%|████      | 8/20 [05:22<08:15, 41.32s/it][A
 45%|████▌     | 9/20 [06:02<07:29, 40.86s/it][A
 50%|█████     | 10/20 [06:42<06:45, 40.51s/it][A
 55%|█████▌    | 11/20 [07:21<06:01, 40.20s/it][A
 60%|██████    | 12/20 [08:01<05:19, 40.00s/it][A
 65%|██████▌   | 13/20 [08:40<04:38, 39.73s/it][A
 70%|███████   | 14/20 [09:20<03:58, 39.71s/it][A
 75%|███████▌  | 15/20 [09:59<03:17, 39.60s/it][A
 80%|████████  | 16/20 [10:39<02:38, 39.67s/it][A
 85%|████████▌ | 17/20 [11:18<01:58, 39.48s/it][A
 90%|█████████ | 18/20 [11:57<01:18, 39.25s/it][A
 95%|█████████▌| 19/20 [12:36<00:39, 39.27s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:44<13:59, 44.17s/it][A
 10%|█         | 2/20 [01:26<12:55, 43.10s/it][A
 15%|█▌        | 3/20 [02:15<12:58, 45.78s/it][A
 20%|██        | 4/20 [02:58<11:53, 44.57s/it][A
 25%|██▌       | 5/20 [03:40<10:58, 43.92s/it][A
 30%|███       | 6/20 [04:23<10:07, 43.40s/it][A
 35%|███▌      | 7/20 [05:05<09:20, 43.11s/it][A
 40%|████      | 8/20 [05:48<08:33, 42.83s/it][A
 45%|████▌     | 9/20 [06:29<07:46, 42.44s/it][A
 50%|█████     | 10/20 [07:12<07:04, 42.43s/it][A
 55%|█████▌    | 11/20 [07:54<06:21, 42.36s/it][A
 60%|██████    | 12/20 [08:36<05:38, 42.35s/it][A
 65%|██████▌   | 13/20 [09:19<04:56, 42.39s/it][A
 70%|███████   | 14/20 [10:01<04:14, 42.36s/it][A
 75%|███████▌  | 15/20 [10:44<03:32, 42.48s/it][A
 80%|████████  | 16/20 [11:26<02:49, 42.43s/it][A
 85%|████████▌ | 17/20 [12:15<02:13, 44.34s/it][A
 90%|█████████ | 18/20 [12:58<01:27, 43.91s/it][A
 95%|█████████▌| 19/20 [13:40<00:43, 43.50s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:45<14:22, 45.39s/it][A
 10%|█         | 2/20 [01:31<13:40, 45.60s/it][A
 15%|█▌        | 3/20 [02:17<13:00, 45.90s/it][A
 20%|██        | 4/20 [03:03<12:15, 45.94s/it][A
 25%|██▌       | 5/20 [03:48<11:22, 45.48s/it][A
 30%|███       | 6/20 [04:34<10:40, 45.72s/it][A
 35%|███▌      | 7/20 [05:20<09:54, 45.77s/it][A
 40%|████      | 8/20 [06:05<09:09, 45.76s/it][A
 45%|████▌     | 9/20 [06:51<08:22, 45.67s/it][A
 50%|█████     | 10/20 [07:38<07:40, 46.02s/it][A
 55%|█████▌    | 11/20 [08:30<07:10, 47.86s/it][A
 60%|██████    | 12/20 [09:16<06:19, 47.43s/it][A
 65%|██████▌   | 13/20 [10:02<05:28, 46.95s/it][A
 70%|███████   | 14/20 [10:48<04:40, 46.74s/it][A
 75%|███████▌  | 15/20 [11:35<03:53, 46.63s/it][A
 80%|████████  | 16/20 [12:21<03:06, 46.62s/it][A
 85%|████████▌ | 17/20 [13:07<02:19, 46.40s/it][A
 90%|█████████ | 18/20 [13:53<01:32, 46.22s/it][A
 95%|█████████▌| 19/20 [14:39<00:46, 46.16s/it]

Final prediction...


100%|██████████| 20/20 [00:00<00:00, 62.31it/s]


array([121639.93643978, 158096.87800949, 179482.89102149, ...,
       165988.88233308, 119944.79208449, 213709.33588139])

In [49]:
submit_noisy_student = pd.DataFrame()
submit_noisy_student['Id'] = test_data['Id']
submit_noisy_student['SalePrice'] = y_pred

submit_noisy_student.to_csv('/kaggle/working/noisy_student.csv', index=False)

Training noisy student for 10 iterations resulted in the public score = 0.12272

In [50]:
y_pred_3_iters = noisy_student(3)
submit_noisy_student_3_iters = pd.DataFrame()
submit_noisy_student_3_iters['Id'] = test_data['Id']
submit_noisy_student_3_iters['SalePrice'] = y_pred_3_iters

submit_noisy_student_3_iters.to_csv('/kaggle/working/noisy_student_3_iters.csv', index=False)

Learning on initial train data...


100%|██████████| 20/20 [03:10<00:00,  9.53s/it]


Iterative training...


  0%|          | 0/3 [00:00<?, ?it/s]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:16<05:06, 16.11s/it][A
 10%|█         | 2/20 [00:32<04:50, 16.12s/it][A
 15%|█▌        | 3/20 [00:48<04:36, 16.29s/it][A
 20%|██        | 4/20 [01:04<04:20, 16.26s/it][A
 25%|██▌       | 5/20 [01:21<04:05, 16.34s/it][A
 30%|███       | 6/20 [01:37<03:46, 16.17s/it][A
 35%|███▌      | 7/20 [01:53<03:32, 16.35s/it][A
 40%|████      | 8/20 [02:10<03:16, 16.38s/it][A
 45%|████▌     | 9/20 [02:27<03:00, 16.45s/it][A
 50%|█████     | 10/20 [02:43<02:43, 16.38s/it][A
 55%|█████▌    | 11/20 [02:59<02:28, 16.48s/it][A
 60%|██████    | 12/20 [03:16<02:11, 16.40s/it][A
 65%|██████▌   | 13/20 [03:33<01:57, 16.81s/it][A
 70%|███████   | 14/20 [03:51<01:41, 16.91s/it][A
 75%|███████▌  | 15/20 [04:07<01:23, 16.64s/it][A
 80%|████████  | 16/20 [04:23<01:06, 16.65s/it][A
 85%|████████▌ | 17/20 [04:40<00:49, 16.63s/it][A
 90%|█████████ | 18/20 [04:57<00:33, 16.77s/it][A
 95%|█████████▌| 19/20 [05:13<00:16, 16.66s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:19<06:13, 19.68s/it][A
 10%|█         | 2/20 [00:39<05:57, 19.89s/it][A
 15%|█▌        | 3/20 [00:59<05:36, 19.80s/it][A
 20%|██        | 4/20 [01:18<05:15, 19.69s/it][A
 25%|██▌       | 5/20 [01:38<04:55, 19.68s/it][A
 30%|███       | 6/20 [01:57<04:33, 19.55s/it][A
 35%|███▌      | 7/20 [02:18<04:17, 19.82s/it][A
 40%|████      | 8/20 [02:38<03:58, 19.87s/it][A
 45%|████▌     | 9/20 [02:57<03:37, 19.81s/it][A
 50%|█████     | 10/20 [03:18<03:20, 20.02s/it][A
 55%|█████▌    | 11/20 [03:38<02:59, 19.94s/it][A
 60%|██████    | 12/20 [03:57<02:38, 19.87s/it][A
 65%|██████▌   | 13/20 [04:18<02:19, 19.96s/it][A
 70%|███████   | 14/20 [04:37<01:59, 19.86s/it][A
 75%|███████▌  | 15/20 [04:57<01:38, 19.77s/it][A
 80%|████████  | 16/20 [05:17<01:19, 19.81s/it][A
 85%|████████▌ | 17/20 [05:37<00:59, 19.86s/it][A
 90%|█████████ | 18/20 [05:57<00:39, 19.90s/it][A
 95%|█████████▌| 19/20 [06:17<00:20, 20.06s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:22<07:14, 22.84s/it][A
 10%|█         | 2/20 [00:46<06:58, 23.23s/it][A
 15%|█▌        | 3/20 [01:09<06:31, 23.04s/it][A
 20%|██        | 4/20 [01:32<06:07, 22.99s/it][A
 25%|██▌       | 5/20 [01:54<05:43, 22.88s/it][A
 30%|███       | 6/20 [02:17<05:20, 22.87s/it][A
 35%|███▌      | 7/20 [02:39<04:54, 22.66s/it][A
 40%|████      | 8/20 [03:02<04:32, 22.71s/it][A
 45%|████▌     | 9/20 [03:25<04:10, 22.81s/it][A
 50%|█████     | 10/20 [03:47<03:45, 22.51s/it][A
 55%|█████▌    | 11/20 [04:10<03:24, 22.68s/it][A
 60%|██████    | 12/20 [04:33<03:02, 22.80s/it][A
 65%|██████▌   | 13/20 [04:56<02:39, 22.83s/it][A
 70%|███████   | 14/20 [05:19<02:16, 22.77s/it][A
 75%|███████▌  | 15/20 [05:42<01:54, 22.81s/it][A
 80%|████████  | 16/20 [06:05<01:31, 22.98s/it][A
 85%|████████▌ | 17/20 [06:29<01:10, 23.41s/it][A
 90%|█████████ | 18/20 [06:52<00:46, 23.28s/it][A
 95%|█████████▌| 19/20 [07:15<00:23, 23.22s/it]

Final prediction...


100%|██████████| 20/20 [00:00<00:00, 56.23it/s]


Applying noisy student approach with 3 iterations resulted in worse public score than for 10 iterations: 0.12450

Let's try:
1. scaling features to [0, 1] range and the target with the same type of scaler and then use inverse_transform to bring back log values
2. apply Gaussian noise with smaller sigma

In [99]:
from sklearn.preprocessing import MinMaxScaler
MU, SIGMA = 0, 0.001

def scale(x_train, x_test, y_train, numeric_columns):
    scaler = MinMaxScaler().fit(x_train[numeric_columns])
    x_train[numeric_columns] = scaler.transform(x_train[numeric_columns])
    x_test[numeric_columns] = scaler.transform(x_test[numeric_columns])
    target_scaler = MinMaxScaler().fit(y_train.values.reshape(-1, 1))
    y_train = target_scaler.transform(y_train.values.reshape(-1, 1))
    
    return x_train, x_test, y_train, target_scaler
    
def add_noise_scale_version(x_test, pseudo_labels):
    x_test_noise = np.random.normal(MU, SIGMA, [x_test[numeric_columns].shape[0], x_test[numeric_columns].shape[1]])
    x_test_with_noise_only_numeric = x_test[numeric_columns].add(x_test_noise)
    x_test_with_noise = pd.concat([x_test_with_noise_only_numeric, x_test[categorical_columns]], axis=1)
    
    pseudo_labels_noise = np.random.normal(MU, SIGMA, [pseudo_labels.shape[0]])
    pseudo_labels_with_noise = pseudo_labels + pseudo_labels_noise
    
    return x_test_with_noise, pseudo_labels_with_noise

def combine_data_scale_version(x_train, x_test_with_noise, y_train, pseudo_labels_with_noise):
    x_train_combined = x_train.append(x_test_with_noise, ignore_index=True)
    pseudo_labels_with_noise = pd.Series(data=pseudo_labels_with_noise, name='SalePrice')
    if type(y_train) != pd.Series:
        y_train = pd.Series(data=y_train.flatten(), name='SalePrice')
    y_train_combined = y_train.append(pseudo_labels_with_noise, ignore_index=True)
    return x_train_combined, y_train_combined

def student_predict_scale_version(x_test, categorical_columns, models, target_scaler):
    y_preds = []
    print('Final prediction...')
    for k, model in enumerate(tqdm(models)):
        pool_test = Pool(x_test, cat_features=categorical_columns)
        y_preds.append(model.predict(pool_test)) 

    y_pred_mean = np.mean(y_preds, axis=0)
    y_pred_inverse_scaled = target_scaler.inverse_transform(y_pred_mean.reshape(-1, 1))
    y_pred = np.array(np.exp(y_pred_inverse_scaled).flatten())
    
    return y_pred

def noisy_student_scale_version(iterations):
    x_train, y_train, x_test, numeric_columns, categorical_columns = load_and_preprocess()
    x_train, x_test, y_train, target_scaler = scale(x_train, x_test, y_train, numeric_columns)
    teacher_models = train_models(x_train, y_train, categorical_columns, 'Learning on initial train data...')
    print('Iterative training...')
    for i in tqdm(range(iterations)):
        pseudo_labels = generate_pseudo_labels(x_test, categorical_columns, teacher_models)
        x_test_with_noise, pseudo_labels_with_noise = add_noise_scale_version(x_test, pseudo_labels)
        x_train, y_train = combine_data_scale_version(x_train, x_test_with_noise, y_train, pseudo_labels_with_noise)
        student_models = train_models(x_train, y_train, categorical_columns, 'Learning on data with noise...')
        teacher_models = student_models
    y_pred = student_predict_scale_version(x_test, categorical_columns, teacher_models, target_scaler)
    
    return y_pred

In [100]:
y_pred_3_iters_scaled = noisy_student_scale_version(3)
submit_noisy_student_3_iters_scaled = pd.DataFrame()
submit_noisy_student_3_iters_scaled['Id'] = test_data['Id']
submit_noisy_student_3_iters_scaled['SalePrice'] = y_pred_3_iters_scaled

submit_noisy_student_3_iters_scaled.to_csv('/kaggle/working/noisy_student_3_iters_scaled.csv', index=False)

Learning on initial train data...


100%|██████████| 20/20 [03:09<00:00,  9.45s/it]


Iterative training...


  0%|          | 0/3 [00:00<?, ?it/s]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:15<05:00, 15.81s/it][A
 10%|█         | 2/20 [00:31<04:45, 15.87s/it][A
 15%|█▌        | 3/20 [00:47<04:31, 15.98s/it][A
 20%|██        | 4/20 [01:03<04:14, 15.88s/it][A
 25%|██▌       | 5/20 [01:19<04:00, 16.00s/it][A
 30%|███       | 6/20 [01:35<03:43, 15.94s/it][A
 35%|███▌      | 7/20 [01:51<03:28, 16.05s/it][A
 40%|████      | 8/20 [02:07<03:12, 16.06s/it][A
 45%|████▌     | 9/20 [02:24<02:57, 16.14s/it][A
 50%|█████     | 10/20 [02:40<02:40, 16.08s/it][A
 55%|█████▌    | 11/20 [02:56<02:25, 16.17s/it][A
 60%|██████    | 12/20 [03:12<02:09, 16.21s/it][A
 65%|██████▌   | 13/20 [03:29<01:53, 16.26s/it][A
 70%|███████   | 14/20 [03:44<01:36, 16.09s/it][A
 75%|███████▌  | 15/20 [04:00<01:20, 16.05s/it][A
 80%|████████  | 16/20 [04:16<01:04, 16.04s/it][A
 85%|████████▌ | 17/20 [04:33<00:48, 16.09s/it][A
 90%|█████████ | 18/20 [04:49<00:32, 16.09s/it][A
 95%|█████████▌| 19/20 [05:05<00:16, 16.15s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:19<06:10, 19.50s/it][A
 10%|█         | 2/20 [00:38<05:49, 19.41s/it][A
 15%|█▌        | 3/20 [00:58<05:28, 19.30s/it][A
 20%|██        | 4/20 [01:17<05:10, 19.43s/it][A
 25%|██▌       | 5/20 [01:36<04:50, 19.40s/it][A
 30%|███       | 6/20 [01:56<04:32, 19.43s/it][A
 35%|███▌      | 7/20 [02:15<04:12, 19.43s/it][A
 40%|████      | 8/20 [02:35<03:55, 19.59s/it][A
 45%|████▌     | 9/20 [02:55<03:35, 19.61s/it][A
 50%|█████     | 10/20 [03:14<03:15, 19.51s/it][A
 55%|█████▌    | 11/20 [03:34<02:56, 19.62s/it][A
 60%|██████    | 12/20 [03:54<02:37, 19.69s/it][A
 65%|██████▌   | 13/20 [04:13<02:17, 19.62s/it][A
 70%|███████   | 14/20 [04:33<01:57, 19.59s/it][A
 75%|███████▌  | 15/20 [04:52<01:37, 19.48s/it][A
 80%|████████  | 16/20 [05:12<01:18, 19.50s/it][A
 85%|████████▌ | 17/20 [05:31<00:58, 19.57s/it][A
 90%|█████████ | 18/20 [05:51<00:38, 19.50s/it][A
 95%|█████████▌| 19/20 [06:10<00:19, 19.52s/it]

Learning on data with noise...



  0%|          | 0/20 [00:00<?, ?it/s][A
  5%|▌         | 1/20 [00:22<07:04, 22.34s/it][A
 10%|█         | 2/20 [00:45<06:45, 22.56s/it][A
 15%|█▌        | 3/20 [01:07<06:24, 22.64s/it][A
 20%|██        | 4/20 [01:30<06:00, 22.50s/it][A
 25%|██▌       | 5/20 [01:52<05:38, 22.58s/it][A
 30%|███       | 6/20 [02:15<05:17, 22.65s/it][A
 35%|███▌      | 7/20 [02:37<04:52, 22.53s/it][A
 40%|████      | 8/20 [03:00<04:31, 22.62s/it][A
 45%|████▌     | 9/20 [03:23<04:09, 22.66s/it][A
 50%|█████     | 10/20 [03:45<03:45, 22.53s/it][A
 55%|█████▌    | 11/20 [04:07<03:21, 22.44s/it][A
 60%|██████    | 12/20 [04:30<02:59, 22.41s/it][A
 65%|██████▌   | 13/20 [04:52<02:37, 22.51s/it][A
 70%|███████   | 14/20 [05:15<02:14, 22.48s/it][A
 75%|███████▌  | 15/20 [05:37<01:52, 22.51s/it][A
 80%|████████  | 16/20 [06:00<01:30, 22.58s/it][A
 85%|████████▌ | 17/20 [06:23<01:08, 22.67s/it][A
 90%|█████████ | 18/20 [06:46<00:45, 22.61s/it][A
 95%|█████████▌| 19/20 [07:08<00:22, 22.63s/it]

Final prediction...


100%|██████████| 20/20 [00:00<00:00, 58.85it/s]


Previous best public score was: 0.12102

This time, with applied scaling, noisy student helped to achieve the new best score: 0.12032
