### 1. Re-code the house price machine learning

In [1]:
%matplotlib inline

from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import random

data = load_boston()
df = pd.DataFrame(data['data'], columns=data['feature_names'])
y = data['target']
print(df.describe())
print(data['DESCR'])
X = df.TAX # I choose TAX column

             CRIM          ZN       INDUS        CHAS         NOX          RM  \
count  506.000000  506.000000  506.000000  506.000000  506.000000  506.000000   
mean     3.613524   11.363636   11.136779    0.069170    0.554695    6.284634   
std      8.601545   23.322453    6.860353    0.253994    0.115878    0.702617   
min      0.006320    0.000000    0.460000    0.000000    0.385000    3.561000   
25%      0.082045    0.000000    5.190000    0.000000    0.449000    5.885500   
50%      0.256510    0.000000    9.690000    0.000000    0.538000    6.208500   
75%      3.677083   12.500000   18.100000    0.000000    0.624000    6.623500   
max     88.976200  100.000000   27.740000    1.000000    0.871000    8.780000   

              AGE         DIS         RAD         TAX     PTRATIO           B  \
count  506.000000  506.000000  506.000000  506.000000  506.000000  506.000000   
mean    68.574901    3.795043    9.549407  408.237154   18.455534  356.674032   
std     28.148861    2.1057

###### 1. Random Choose Method to get optimal *k* and *b*

For example, you can change the loss function: $Loss = \frac{1}{n} sum({y_i - \hat{y_i}})^2$ to $Loss = \frac{1}{n} sum(|{y_i - \hat{y_i}}|)$

And you can change the learning rate and observe the performance.

In [2]:
def RMSE(y, y_hat):
    return np.sqrt(sum((y - y_hat) ** 2) / len(y))

def MAE(y, y_hat):
    return sum(abs(y - y_hat)) / len(y)

def loss_random(X, y, n, loss=RMSE):
    loss_min = float('inf')
    k_best, b_best = 0, 0
    for i in range(n):
        k = random.random() * 200 - 100
        b = random.random() * 200 - 100
        y_hat = k * X + b
        loss_new = loss(y, y_hat)
        if loss_new < loss_min:
            loss_min = loss_new
            k_best, b_best = k, b
            print(f"round: {i}, k: {k_best}, b: {b_best}, {loss}: {loss_min}")
        
    return (k_best, b_best)
loss_random(X, y, 2000, RMSE)

round: 0, k: -52.21484440486681, b: 18.727173057065457, <function RMSE at 0x1a243d8268>: 23059.696240507194
round: 4, k: 32.1217577035228, b: 21.186776752304908, <function RMSE at 0x1a243d8268>: 14185.209594485268
round: 5, k: 21.05428647636647, b: 38.37085229148897, <function RMSE at 0x1a243d8268>: 9313.755694614949
round: 12, k: -18.058332217231438, b: -4.34034739229125, <function RMSE at 0x1a243d8268>: 7997.688880689101
round: 14, k: -8.1165378326139, b: -10.377906378691577, <function RMSE at 0x1a243d8268>: 3613.0532205029103
round: 21, k: -2.9129618462161915, b: 39.36231499788255, <function RMSE at 0x1a243d8268>: 1269.1782763681588
round: 62, k: 0.35959947318562513, b: 67.11121181401577, <function RMSE at 0x1a243d8268>: 202.23236339495398
round: 79, k: 0.19496091989539366, b: -83.07873225516292, <function RMSE at 0x1a243d8268>: 46.06161719389221
round: 919, k: -0.10359578858391671, b: 65.98604164581744, <function RMSE at 0x1a243d8268>: 15.486512053722299


(-0.10359578858391671, 65.98604164581744)

###### 2.Supervised Direction to get optimal *k* and *b*

In [3]:
def loss_spvs_dr(X, y, n, alpha=0.1, loss=RMSE):
    loss_min = float('inf')
    direction = [(1, 1), (1, -1), (-1, 1), (-1, -1)]
    
    k = random.random() * 200 - 100
    b = random.random() * 200 - 100
    
    for i in range(n):
        dr_k, dr_b = random.choice(direction) # random walk. Can we walk 4 directions and compare which one is the best?
        k_new = k + dr_k * alpha
        b_new = b + dr_b * alpha
        y_hat = k_new * X + b_new
        loss_new = loss(y, y_hat)
        if loss_new < loss_min:
            k, b = k_new, b_new
            k_best, b_best = k_new, b_new
            loss_min = loss_new
            print(f"round: {i}, k: {k_best}, b: {b_best}, {loss}: {loss_min}")
    return (k_best, b_best)
loss_spvs_dr(X, y, 2000)

round: 0, k: 85.62835249791164, b: -60.781826187134826, <function RMSE at 0x1a243d8268>: 37737.69560982518
round: 1, k: 85.52835249791164, b: -60.88182618713483, <function RMSE at 0x1a243d8268>: 37693.443714791094
round: 2, k: 85.42835249791165, b: -60.98182618713483, <function RMSE at 0x1a243d8268>: 37649.19181992983
round: 6, k: 85.32835249791165, b: -60.88182618713483, <function RMSE at 0x1a243d8268>: 37605.12474459724
round: 7, k: 85.22835249791166, b: -60.98182618713483, <function RMSE at 0x1a243d8268>: 37560.872849919964
round: 8, k: 85.12835249791166, b: -61.08182618713483, <function RMSE at 0x1a243d8268>: 37516.62095541693
round: 9, k: 85.02835249791167, b: -61.18182618713483, <function RMSE at 0x1a243d8268>: 37472.369061088786
round: 12, k: 84.92835249791167, b: -61.28182618713483, <function RMSE at 0x1a243d8268>: 37428.11716693611
round: 13, k: 84.82835249791168, b: -61.18182618713483, <function RMSE at 0x1a243d8268>: 37384.05009165034
round: 14, k: 84.72835249791169, b: -61.

round: 204, k: 75.6283524979122, b: -61.98182618713484, <function RMSE at 0x1a243d8268>: 33320.63850430311
round: 206, k: 75.52835249791221, b: -62.081826187134844, <function RMSE at 0x1a243d8268>: 33276.386621164806
round: 216, k: 75.42835249791221, b: -62.181826187134845, <function RMSE at 0x1a243d8268>: 33232.134738253924
round: 217, k: 75.32835249791222, b: -62.28182618713485, <function RMSE at 0x1a243d8268>: 33187.88285557139
round: 218, k: 75.22835249791223, b: -62.38182618713485, <function RMSE at 0x1a243d8268>: 33143.6309731181
round: 223, k: 75.12835249791223, b: -62.48182618713485, <function RMSE at 0x1a243d8268>: 33099.379090895
round: 230, k: 75.02835249791224, b: -62.38182618713485, <function RMSE at 0x1a243d8268>: 33055.312016911106
round: 232, k: 74.92835249791224, b: -62.28182618713485, <function RMSE at 0x1a243d8268>: 33011.244942934005
round: 235, k: 74.82835249791225, b: -62.181826187134845, <function RMSE at 0x1a243d8268>: 32967.17786896369
round: 236, k: 74.7283524

round: 447, k: 64.32835249791285, b: -61.08182618713483, <function RMSE at 0x1a243d8268>: 28331.44953403507
round: 448, k: 64.22835249791285, b: -61.18182618713483, <function RMSE at 0x1a243d8268>: 28287.19766760724
round: 454, k: 64.12835249791286, b: -61.08182618713483, <function RMSE at 0x1a243d8268>: 28243.130596501338
round: 455, k: 64.02835249791286, b: -61.18182618713483, <function RMSE at 0x1a243d8268>: 28198.878730458964
round: 457, k: 63.92835249791286, b: -61.28182618713483, <function RMSE at 0x1a243d8268>: 28154.626864738777
round: 458, k: 63.82835249791286, b: -61.18182618713483, <function RMSE at 0x1a243d8268>: 28110.55979378189
round: 459, k: 63.72835249791286, b: -61.28182618713483, <function RMSE at 0x1a243d8268>: 28066.307928452516
round: 461, k: 63.62835249791286, b: -61.381826187134834, <function RMSE at 0x1a243d8268>: 28022.056063449134
round: 463, k: 63.528352497912856, b: -61.28182618713483, <function RMSE at 0x1a243d8268>: 27977.98899264516
round: 465, k: 63.428

round: 683, k: 52.928352497912705, b: -61.08182618713483, <function RMSE at 0x1a243d8268>: 23297.270997386233
round: 684, k: 52.828352497912704, b: -61.18182618713483, <function RMSE at 0x1a243d8268>: 23253.01915931922
round: 686, k: 52.7283524979127, b: -61.28182618713483, <function RMSE at 0x1a243d8268>: 23208.76732174976
round: 687, k: 52.6283524979127, b: -61.381826187134834, <function RMSE at 0x1a243d8268>: 23164.51548468067
round: 688, k: 52.5283524979127, b: -61.28182618713483, <function RMSE at 0x1a243d8268>: 23120.44842131186
round: 690, k: 52.4283524979127, b: -61.18182618713483, <function RMSE at 0x1a243d8268>: 23076.381357993272
round: 693, k: 52.3283524979127, b: -61.08182618713483, <function RMSE at 0x1a243d8268>: 23032.31429472522
round: 698, k: 52.228352497912695, b: -60.98182618713483, <function RMSE at 0x1a243d8268>: 22988.247231507958
round: 699, k: 52.128352497912694, b: -61.08182618713483, <function RMSE at 0x1a243d8268>: 22943.99539555119
round: 700, k: 52.0283524

round: 916, k: 42.428352497912556, b: -61.18182618713483, <function RMSE at 0x1a243d8268>: 18660.437696627298
round: 918, k: 42.328352497912554, b: -61.28182618713483, <function RMSE at 0x1a243d8268>: 18616.185904355076
round: 920, k: 42.22835249791255, b: -61.18182618713483, <function RMSE at 0x1a243d8268>: 18572.118857089903
round: 923, k: 42.12835249791255, b: -61.28182618713483, <function RMSE at 0x1a243d8268>: 18527.867065991868
round: 924, k: 42.02835249791255, b: -61.18182618713483, <function RMSE at 0x1a243d8268>: 18483.80001920195
round: 925, k: 41.92835249791255, b: -61.08182618713483, <function RMSE at 0x1a243d8268>: 18439.73297255415
round: 928, k: 41.82835249791255, b: -60.98182618713483, <function RMSE at 0x1a243d8268>: 18395.66592604947
round: 929, k: 41.728352497912546, b: -60.88182618713483, <function RMSE at 0x1a243d8268>: 18351.598879688958
round: 932, k: 41.628352497912545, b: -60.781826187134826, <function RMSE at 0x1a243d8268>: 18307.53183347364
round: 933, k: 41.

round: 1168, k: 29.628352497912374, b: -61.381826187134834, <function RMSE at 0x1a243d8268>: 13007.852010610639
round: 1169, k: 29.528352497912373, b: -61.28182618713483, <function RMSE at 0x1a243d8268>: 12963.785022541073
round: 1170, k: 29.42835249791237, b: -61.381826187134834, <function RMSE at 0x1a243d8268>: 12919.533355850856
round: 1173, k: 29.32835249791237, b: -61.481826187134835, <function RMSE at 0x1a243d8268>: 12875.281691235496
round: 1175, k: 29.22835249791237, b: -61.58182618713484, <function RMSE at 0x1a243d8268>: 12831.030028716475
round: 1176, k: 29.128352497912367, b: -61.481826187134835, <function RMSE at 0x1a243d8268>: 12786.963044634997
round: 1177, k: 29.028352497912365, b: -61.58182618713484, <function RMSE at 0x1a243d8268>: 12742.711385392524
round: 1178, k: 28.928352497912364, b: -61.68182618713484, <function RMSE at 0x1a243d8268>: 12698.45972830572
round: 1183, k: 28.828352497912363, b: -61.58182618713484, <function RMSE at 0x1a243d8268>: 12654.392747216718
r

round: 1418, k: 17.128352497912196, b: -59.88182618713481, <function RMSE at 0x1a243d8268>: 7489.336238832073
round: 1421, k: 17.028352497912195, b: -59.981826187134814, <function RMSE at 0x1a243d8268>: 7445.084991618532
round: 1422, k: 16.928352497912194, b: -60.081826187134816, <function RMSE at 0x1a243d8268>: 7400.833753030963
round: 1425, k: 16.828352497912192, b: -60.18182618713482, <function RMSE at 0x1a243d8268>: 7356.582523225037
round: 1426, k: 16.72835249791219, b: -60.081826187134816, <function RMSE at 0x1a243d8268>: 7312.515821769571
round: 1427, k: 16.62835249791219, b: -59.981826187134814, <function RMSE at 0x1a243d8268>: 7268.449124861993
round: 1428, k: 16.528352497912188, b: -60.081826187134816, <function RMSE at 0x1a243d8268>: 7224.197916936425
round: 1429, k: 16.428352497912186, b: -59.981826187134814, <function RMSE at 0x1a243d8268>: 7180.131231291272
round: 1434, k: 16.328352497912185, b: -60.081826187134816, <function RMSE at 0x1a243d8268>: 7135.880039512794
round

round: 1662, k: 5.428352497912218, b: -59.1818261871348, <function RMSE at 0x1a243d8268>: 2323.5129294683943
round: 1663, k: 5.328352497912219, b: -59.0818261871348, <function RMSE at 0x1a243d8268>: 2279.450692265072
round: 1665, k: 5.228352497912219, b: -58.9818261871348, <function RMSE at 0x1a243d8268>: 2235.3886458499887
round: 1667, k: 5.1283524979122195, b: -59.0818261871348, <function RMSE at 0x1a243d8268>: 2191.1431546638123
round: 1670, k: 5.02835249791222, b: -58.9818261871348, <function RMSE at 0x1a243d8268>: 2147.0815499594737
round: 1671, k: 4.92835249791222, b: -58.8818261871348, <function RMSE at 0x1a243d8268>: 2103.0201745558875
round: 1673, k: 4.8283524979122205, b: -58.7818261871348, <function RMSE at 0x1a243d8268>: 2058.9590431740467
round: 1676, k: 4.728352497912221, b: -58.8818261871348, <function RMSE at 0x1a243d8268>: 2014.7146331427857
round: 1677, k: 4.628352497912221, b: -58.9818261871348, <function RMSE at 0x1a243d8268>: 1970.4705625737945
round: 1678, k: 4.52

(0.12835249791222086, -59.88182618713481)

#### Walk through all 4 directions and find the smallest loss

import numpy as np

def loss_supervise(X, y, n, alpha, loss=RMSE):
    loss_min = float('inf')
    direction = [(1, 1), (1, -1), (-1, 1), (-1, -1)]
    
    k = random.random() * 200 - 100
    b = random.random() * 200 - 100
    
    for i in range(n):
        loss_complete = []
        k_b = []
        for j in range(4):
            k_b.append((k+direction[j][0]*alpha, b+direction[j][1]*alpha))
            loss_complete.append(loss(y, (X*k_b[j][0]+k_b[j][1])))
        k_new, b_new = k_b[loss_complete.index(min(loss_complete))]
        loss_new = min(loss_complete)
        if loss_new < loss_min:
            k, b = minimum[0], minimum[1]
            k_best, b_best = k_new, b_new
            print(f"round: {i}, k: {k_best}, b: {b_best}, {loss}: {loss_min}")
    return (k_best, b_best)
loss_supervise(X, y, 2000, 0.01)

In [56]:
X = ["a", "b", "c", "d", "e", "f", "g", "h", "i"]
Y = [ 0,   1,   1,    0,   1,   2,   2,   0,   1]

Z = [x for _,x in sorted(zip(Y,X))]
print(sorted(zip(Y,X)))

[(0, 'a'), (0, 'd'), (0, 'h'), (1, 'b'), (1, 'c'), (1, 'e'), (1, 'i'), (2, 'f'), (2, 'g')]


###### 3.Gradient Descent to get optimal *k* and *b*

###### 4. Try different Loss function and learning rate. 