### 1. Re-code the house price machine learning

In [27]:
%matplotlib inline

from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import random

data = load_boston()
df = pd.DataFrame(data['data'], columns=data['feature_names'])
y = data['target']
print(df.describe())
print(data['DESCR'])
X = df.TAX # I choose TAX column

             CRIM          ZN       INDUS        CHAS         NOX          RM  \
count  506.000000  506.000000  506.000000  506.000000  506.000000  506.000000   
mean     3.613524   11.363636   11.136779    0.069170    0.554695    6.284634   
std      8.601545   23.322453    6.860353    0.253994    0.115878    0.702617   
min      0.006320    0.000000    0.460000    0.000000    0.385000    3.561000   
25%      0.082045    0.000000    5.190000    0.000000    0.449000    5.885500   
50%      0.256510    0.000000    9.690000    0.000000    0.538000    6.208500   
75%      3.677083   12.500000   18.100000    0.000000    0.624000    6.623500   
max     88.976200  100.000000   27.740000    1.000000    0.871000    8.780000   

              AGE         DIS         RAD         TAX     PTRATIO           B  \
count  506.000000  506.000000  506.000000  506.000000  506.000000  506.000000   
mean    68.574901    3.795043    9.549407  408.237154   18.455534  356.674032   
std     28.148861    2.1057

###### 1. Random Choose Method to get optimal *k* and *b*

For example, you can change the loss function: $Loss = \frac{1}{n} sum({y_i - \hat{y_i}})^2$ to $Loss = \frac{1}{n} sum(|{y_i - \hat{y_i}}|)$

And you can change the learning rate and observe the performance.

In [44]:
def RMSE(y, y_hat):
    return np.sqrt(sum((y - y_hat) ** 2) / len(y))

def MAE(y, y_hat):
    return sum(abs(y - y_hat)) / len(y)

def loss_random(X, y, n, loss=RMSE):
    loss_min = float('inf')
    k_best, b_best = 0, 0
    for i in range(n):
        k = random.random() * 200 - 100
        b = random.random() * 200 - 100
        y_hat = k * X + b
        loss_new = loss(y, y_hat)
        if loss_new < loss_min:
            loss_min = loss_new
            k_best, b_best = k, b
            print(f"round: {i}, k: {k_best}, b: {b_best}, {loss}: {loss_min}")
        
    return (k_best, b_best)
loss_random(X, y, 2000, RMSE)

round: 0, k: -16.69646277382857, b: -27.95723532723848, <function RMSE at 0x1a2073fea0>: 7418.150982970514
round: 12, k: -4.596673770915601, b: 41.98816144406197, <function RMSE at 0x1a2073fea0>: 2010.2606861898134
round: 46, k: -2.324492411555596, b: -81.95830501958878, <function RMSE at 0x1a2073fea0>: 1122.3260447817884
round: 51, k: -0.47955611651275376, b: -17.52458878764935, <function RMSE at 0x1a2073fea0>: 248.04129045422698
round: 82, k: -0.1777292529506127, b: -40.10346627424492, <function RMSE at 0x1a2073fea0>: 137.83725430746264
round: 204, k: 0.041844666725566526, b: -15.092182883644185, <function RMSE at 0x1a2073fea0>: 24.833625646333044


(0.041844666725566526, -15.092182883644185)

###### 2.Supervised Direction to get optimal *k* and *b*

In [53]:
def loss_spvs_dr(X, y, n, alpha=0.1, loss=RMSE):
    loss_min = float('inf')
    direction = [(1, 1), (1, -1), (-1, 1), (-1, -1)]
    
    #print(dr_k, dr_b)
    k = random.random() * 200 - 100
    b = random.random() * 200 - 100
    
    for i in range(n):
        dr_k, dr_b = random.choice(direction) # random walk. Can we walk 4 directions and compare which one is the best?
        k_new = k + dr_k * alpha
        b_new = b + dr_b * alpha
        y_hat = k_new * X + b_new
        loss_new = loss(y, y_hat)
        if loss_new < loss_min:
            k, b = k_new, b_new
            k_best, b_best = k_new, b_new
            loss_min = loss_new
            print(f"round: {i}, k: {k_best}, b: {b_best}, {loss}: {loss_min}")
    return (k_best, b_best)
loss_spvs_dr(X, y, 2000)

round: 0, k: 52.56492667917299, b: 56.207149538263856, <function RMSE at 0x1a2073fea0>: 23245.187059135635
round: 1, k: 52.46492667917299, b: 56.107149538263855, <function RMSE at 0x1a2073fea0>: 23200.935098419715
round: 2, k: 52.36492667917299, b: 56.00714953826385, <function RMSE at 0x1a2073fea0>: 23156.683137733682
round: 6, k: 52.26492667917299, b: 56.107149538263855, <function RMSE at 0x1a2073fea0>: 23112.616098115388
round: 7, k: 52.164926679172986, b: 56.207149538263856, <function RMSE at 0x1a2073fea0>: 23068.54905863808
round: 9, k: 52.064926679172984, b: 56.107149538263855, <function RMSE at 0x1a2073fea0>: 23024.29709790185
round: 11, k: 51.96492667917298, b: 56.00714953826385, <function RMSE at 0x1a2073fea0>: 22980.04513719565
round: 13, k: 51.86492667917298, b: 56.107149538263855, <function RMSE at 0x1a2073fea0>: 22935.978097780146
round: 16, k: 51.76492667917298, b: 56.00714953826385, <function RMSE at 0x1a2073fea0>: 22891.726137064015
round: 17, k: 51.66492667917298, b: 55

round: 268, k: 37.364926679172775, b: 56.00714953826385, <function RMSE at 0x1a2073fea0>: 16532.75846471688
round: 271, k: 37.264926679172774, b: 56.107149538263855, <function RMSE at 0x1a2073fea0>: 16488.69143660447
round: 272, k: 37.16492667917277, b: 56.00714953826385, <function RMSE at 0x1a2073fea0>: 16444.43947581091
round: 273, k: 37.06492667917277, b: 55.90714953826385, <function RMSE at 0x1a2073fea0>: 16400.187515059137
round: 274, k: 36.96492667917277, b: 55.80714953826385, <function RMSE at 0x1a2073fea0>: 16355.935554349488
round: 277, k: 36.86492667917277, b: 55.70714953826385, <function RMSE at 0x1a2073fea0>: 16311.683593682308
round: 278, k: 36.76492667917277, b: 55.60714953826385, <function RMSE at 0x1a2073fea0>: 16267.431633057939
round: 281, k: 36.664926679172765, b: 55.507149538263846, <function RMSE at 0x1a2073fea0>: 16223.17967247674
round: 282, k: 36.564926679172764, b: 55.407149538263845, <function RMSE at 0x1a2073fea0>: 16178.92771193905
round: 283, k: 36.46492667

round: 516, k: 24.9649266791726, b: 55.407149538263845, <function RMSE at 0x1a2073fea0>: 11056.426999093002
round: 517, k: 24.864926679172598, b: 55.507149538263846, <function RMSE at 0x1a2073fea0>: 11012.359993915643
round: 518, k: 24.764926679172596, b: 55.60714953826385, <function RMSE at 0x1a2073fea0>: 10968.292989311552
round: 519, k: 24.664926679172595, b: 55.70714953826385, <function RMSE at 0x1a2073fea0>: 10924.225985287687
round: 520, k: 24.564926679172594, b: 55.80714953826385, <function RMSE at 0x1a2073fea0>: 10880.158981851087
round: 522, k: 24.464926679172592, b: 55.90714953826385, <function RMSE at 0x1a2073fea0>: 10836.091979008928
round: 523, k: 24.36492667917259, b: 55.80714953826385, <function RMSE at 0x1a2073fea0>: 10791.84002300469
round: 524, k: 24.26492667917259, b: 55.70714953826385, <function RMSE at 0x1a2073fea0>: 10747.588067103648
round: 525, k: 24.164926679172588, b: 55.80714953826385, <function RMSE at 0x1a2073fea0>: 10703.521065037754
round: 526, k: 24.0649

round: 744, k: 13.264926679172483, b: 56.50714953826386, <function RMSE at 0x1a2073fea0>: 5890.787834263876
round: 748, k: 13.164926679172483, b: 56.40714953826386, <function RMSE at 0x1a2073fea0>: 5846.535917819209
round: 749, k: 13.064926679172483, b: 56.30714953826386, <function RMSE at 0x1a2073fea0>: 5802.284002169119
round: 750, k: 12.964926679172484, b: 56.40714953826386, <function RMSE at 0x1a2073fea0>: 5758.21709684428
round: 751, k: 12.864926679172484, b: 56.30714953826386, <function RMSE at 0x1a2073fea0>: 5713.965183238226
round: 753, k: 12.764926679172484, b: 56.40714953826386, <function RMSE at 0x1a2073fea0>: 5669.898281824789
round: 754, k: 12.664926679172485, b: 56.30714953826386, <function RMSE at 0x1a2073fea0>: 5625.6463703738
round: 756, k: 12.564926679172485, b: 56.207149538263856, <function RMSE at 0x1a2073fea0>: 5581.394459828009
round: 758, k: 12.464926679172486, b: 56.30714953826386, <function RMSE at 0x1a2073fea0>: 5537.327563866111
round: 759, k: 12.364926679172

round: 961, k: 0.9649266791725127, b: 56.40714953826386, <function RMSE at 0x1a2073fea0>: 459.22241841561254
round: 963, k: 0.8649266791725128, b: 56.50714953826386, <function RMSE at 0x1a2073fea0>: 415.17302627015476
round: 964, k: 0.7649266791725128, b: 56.40714953826386, <function RMSE at 0x1a2073fea0>: 370.94119485710297
round: 965, k: 0.6649266791725128, b: 56.50714953826386, <function RMSE at 0x1a2073fea0>: 326.9016681325169
round: 966, k: 0.5649266791725128, b: 56.60714953826386, <function RMSE at 0x1a2073fea0>: 282.8707225572
round: 980, k: 0.46492667917251285, b: 56.50714953826386, <function RMSE at 0x1a2073fea0>: 238.66557213725363
round: 984, k: 0.36492667917251287, b: 56.60714953826386, <function RMSE at 0x1a2073fea0>: 194.66976856919814
round: 986, k: 0.2649266791725129, b: 56.50714953826386, <function RMSE at 0x1a2073fea0>: 150.5267611744161
round: 988, k: 0.16492667917251289, b: 56.40714953826386, <function RMSE at 0x1a2073fea0>: 106.4742518324002
round: 989, k: 0.064926

(-0.035073320827487126, 56.207149538263856)

###### 3.Gradient Descent to get optimal *k* and *b*

###### 4. Try different Loss function and learning rate. 