### 1. Re-code the house price machine learning

In [1]:
%matplotlib inline

from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random

data = load_boston()
df = pd.DataFrame(data['data'], columns=data['feature_names'])
y = data['target']
print(df.describe())
print(data['DESCR'])
X = df.RM # I choose RM column

             CRIM          ZN       INDUS        CHAS         NOX          RM  \
count  506.000000  506.000000  506.000000  506.000000  506.000000  506.000000   
mean     3.613524   11.363636   11.136779    0.069170    0.554695    6.284634   
std      8.601545   23.322453    6.860353    0.253994    0.115878    0.702617   
min      0.006320    0.000000    0.460000    0.000000    0.385000    3.561000   
25%      0.082045    0.000000    5.190000    0.000000    0.449000    5.885500   
50%      0.256510    0.000000    9.690000    0.000000    0.538000    6.208500   
75%      3.677083   12.500000   18.100000    0.000000    0.624000    6.623500   
max     88.976200  100.000000   27.740000    1.000000    0.871000    8.780000   

              AGE         DIS         RAD         TAX     PTRATIO           B  \
count  506.000000  506.000000  506.000000  506.000000  506.000000  506.000000   
mean    68.574901    3.795043    9.549407  408.237154   18.455534  356.674032   
std     28.148861    2.1057

###### 1. Random Choose Method to get optimal *k* and *b*

For example, you can change the loss function: $Loss = \frac{1}{n} sum({y_i - \hat{y_i}})^2$ to $Loss = \frac{1}{n} sum(|{y_i - \hat{y_i}}|)$

And you can change the learning rate and observe the performance.

In [2]:
def RMSE(y, y_hat):
    return np.sqrt(sum((y - y_hat) ** 2) / len(y))

def MAE(y, y_hat):
    return sum(abs(y - y_hat)) / len(y)

def loss_random(X, y, n, loss=RMSE):
    loss_min = float('inf')
    k_best, b_best = 0, 0
    for i in range(n):
        k = random.random() * 200 - 100
        b = random.random() * 200 - 100
        y_hat = k * X + b
        loss_new = loss(y, y_hat)
        if loss_new < loss_min:
            loss_min = loss_new
            k_best, b_best = k, b
            print(f"round: {i}, k: {k_best}, b: {b_best}, {loss.__name__}: {loss_min}")
        
    return (k_best, b_best)
loss_random(X, y, 2000, RMSE)

round: 0, k: -93.47460897201204, b: 57.24890414358583, RMSE: 557.4465305704119
round: 2, k: -58.93146386636698, b: 13.24212067936837, RMSE: 382.70193348408975
round: 5, k: 68.4493864994761, b: -66.80398445192765, RMSE: 343.4422580701553
round: 6, k: -39.2883111054688, b: 68.54493330274761, RMSE: 203.8586284524064
round: 9, k: -7.641604056387024, b: -35.506524200073514, RMSE: 106.91727901102759
round: 14, k: -12.229433197405655, b: 9.586883295020684, RMSE: 91.28226389987262
round: 20, k: 4.110585580796936, b: 15.524501346279877, RMSE: 20.255004481099533
round: 91, k: -4.380751006737199, b: 34.9596617845755, RMSE: 19.008266943132945
round: 148, k: 14.304633513895439, b: -60.65701320899386, RMSE: 10.097243570912582
round: 154, k: 18.75900147032297, b: -97.63741447586858, RMSE: 9.732971795374883
round: 314, k: 15.369696862423595, b: -77.15311928076139, RMSE: 8.515960279643165
round: 341, k: 10.508295443299858, b: -39.481987312573395, RMSE: 7.79637568999096
round: 835, k: 6.727259737120022,

(6.151423817717642, -16.888353408628646)

###### 2.Supervised Direction to get optimal *k* and *b*

In [3]:
# This is a combination of supervising and random walking.
def loss_spvs_dr(X, y, n, alpha=0.1, loss=RMSE):
    loss_min = float('inf')
    direction = [(1, 1), (1, -1), (-1, 1), (-1, -1)]
    
    k = random.random() * 200 - 100
    b = random.random() * 200 - 100
    
    for i in range(n):
        dr_k, dr_b = random.choice(direction) # random walk. Can we walk 4 directions and compare which one is the best?
        k_new = k + dr_k * alpha
        b_new = b + dr_b * alpha
        y_hat = k_new * X + b_new
        loss_new = loss(y, y_hat)
        if loss_new < loss_min:
            k, b = k_new, b_new
            k_best, b_best = k_new, b_new
            loss_min = loss_new
            print(f"round: {i}, k: {k_best}, b: {b_best}, {loss.__name__}: {loss_min}")
    return (k_best, b_best)
loss_spvs_dr(X, y, 2000)

round: 0, k: 6.264060877492545, b: 7.237477305715634, RMSE: 25.040573482952997
round: 1, k: 6.164060877492545, b: 7.337477305715634, RMSE: 24.538782204802303
round: 2, k: 6.064060877492546, b: 7.437477305715634, RMSE: 24.038339063894515
round: 4, k: 5.964060877492546, b: 7.537477305715633, RMSE: 23.53933004406191
round: 7, k: 5.8640608774925465, b: 7.437477305715634, RMSE: 22.851335063044957
round: 11, k: 5.764060877492547, b: 7.337477305715634, RMSE: 22.16614854392371
round: 12, k: 5.664060877492547, b: 7.437477305715634, RMSE: 21.673126893557463
round: 14, k: 5.5640608774925475, b: 7.337477305715634, RMSE: 20.993570797066255
round: 19, k: 5.464060877492548, b: 7.237477305715634, RMSE: 20.31764681615696
round: 22, k: 5.364060877492548, b: 7.337477305715634, RMSE: 19.832322968313132
round: 23, k: 5.264060877492549, b: 7.437477305715634, RMSE: 19.34951414401277
round: 24, k: 5.164060877492549, b: 7.337477305715634, RMSE: 18.684517649525592
round: 25, k: 5.064060877492549, b: 7.237477305

(2.5640608774925515, 7.137477305715635)

#### Walk through all 4 directions and find the smallest loss

In [4]:
# Here I try completely supervised direction: walk all 4 directions at the same time, 
# then select the one with smallest loss.
def calculate_loss(X, y, n, alpha=0.01, loss=RMSE):
    '''
    calculate the loss for all 4 directions and select the smallest one
    '''
    loss_min = float('inf')
    direction = [(1, 1), (1, -1), (-1, 1), (-1, -1)]
    
    k = random.random() * 200 - 100
    b = random.random() * 200 - 100
    
    for i in range(n):
        loss_complete = []
        best_data = []
        k_b = []
        for coord in direction:
            dr_k, dr_b = coord
            k_new = k + dr_k * alpha
            b_new = b + dr_b * alpha
            k_b.append((k_new, b_new))
            y_hat = k_new * X + b_new
            loss_complete.append(loss(y, y_hat))
        best_data = sorted(zip(loss_complete, k_b), key=lambda x: x[0])[0]
        loss_new, (k_n, b_n) = best_data
        if loss_new < loss_min:
            k, b = k_n, b_n
            k_best, b_best = k_new, b_new
            loss_min = loss_new
            print(f"round: {i}, k: {k}, b: {b}, {loss.__name__}: {loss_min}")
    return (k_best, b_best)
calculate_loss(X, y, 2000)

round: 0, k: -55.45617257826849, b: 36.33249501544683, RMSE: 337.84007000841245
round: 1, k: -55.446172578268495, b: 36.34249501544683, RMSE: 337.76695449446294
round: 2, k: -55.4361725782685, b: 36.35249501544683, RMSE: 337.6938390100695
round: 3, k: -55.4261725782685, b: 36.362495015446825, RMSE: 337.6207235552517
round: 4, k: -55.4161725782685, b: 36.37249501544682, RMSE: 337.5476081300284
round: 5, k: -55.4061725782685, b: 36.38249501544682, RMSE: 337.4744927344189
round: 6, k: -55.396172578268505, b: 36.39249501544682, RMSE: 337.40137736844247
round: 7, k: -55.38617257826851, b: 36.40249501544682, RMSE: 337.3282620321188
round: 8, k: -55.37617257826851, b: 36.412495015446815, RMSE: 337.2551467254666
round: 9, k: -55.36617257826851, b: 36.42249501544681, RMSE: 337.1820314485053
round: 10, k: -55.35617257826851, b: 36.43249501544681, RMSE: 337.1089162012546
round: 11, k: -55.346172578268515, b: 36.44249501544681, RMSE: 337.0358009837332
round: 12, k: -55.33617257826852, b: 36.452495

round: 122, k: -54.236172578268736, b: 37.55249501544659, RMSE: 328.9202012802461
round: 123, k: -54.22617257826874, b: 37.562495015446586, RMSE: 328.8470895189604
round: 124, k: -54.21617257826874, b: 37.572495015446584, RMSE: 328.77397778970254
round: 125, k: -54.20617257826874, b: 37.58249501544658, RMSE: 328.7008660924929
round: 126, k: -54.196172578268744, b: 37.59249501544658, RMSE: 328.6277544273532
round: 127, k: -54.186172578268746, b: 37.60249501544658, RMSE: 328.5546427943048
round: 128, k: -54.17617257826875, b: 37.61249501544658, RMSE: 328.4815311933692
round: 129, k: -54.16617257826875, b: 37.622495015446574, RMSE: 328.4084196245678
round: 130, k: -54.15617257826875, b: 37.63249501544657, RMSE: 328.33530808792233
round: 131, k: -54.146172578268754, b: 37.64249501544657, RMSE: 328.2621965834538
round: 132, k: -54.136172578268756, b: 37.65249501544657, RMSE: 328.189085111184
round: 133, k: -54.12617257826876, b: 37.66249501544657, RMSE: 328.1159736711344
round: 134, k: -54.

round: 256, k: -52.896172578269, b: 38.89249501544632, RMSE: 319.1235192926996
round: 257, k: -52.886172578269004, b: 38.90249501544632, RMSE: 319.05041202122624
round: 258, k: -52.876172578269006, b: 38.91249501544632, RMSE: 318.97730478482185
round: 259, k: -52.86617257826901, b: 38.922495015446316, RMSE: 318.90419758351055
round: 260, k: -52.85617257826901, b: 38.932495015446314, RMSE: 318.8310904173165
round: 261, k: -52.84617257826901, b: 38.94249501544631, RMSE: 318.7579832862636
round: 262, k: -52.836172578269014, b: 38.95249501544631, RMSE: 318.68487619037626
round: 263, k: -52.826172578269016, b: 38.96249501544631, RMSE: 318.61176912967886
round: 264, k: -52.81617257826902, b: 38.972495015446306, RMSE: 318.5386621041952
round: 265, k: -52.80617257826902, b: 38.982495015446304, RMSE: 318.4655551139501
round: 266, k: -52.79617257826902, b: 38.9924950154463, RMSE: 318.3924481589672
round: 267, k: -52.786172578269024, b: 39.0024950154463, RMSE: 318.319341239271
round: 268, k: -52.

round: 387, k: -51.58617257826926, b: 40.20249501544606, RMSE: 309.5467744392397
round: 388, k: -51.576172578269265, b: 40.21249501544606, RMSE: 309.4736719755662
round: 389, k: -51.56617257826927, b: 40.22249501544606, RMSE: 309.40056955031866
round: 390, k: -51.55617257826927, b: 40.232495015446055, RMSE: 309.3274671635248
round: 391, k: -51.54617257826927, b: 40.24249501544605, RMSE: 309.2543648152118
round: 392, k: -51.53617257826927, b: 40.25249501544605, RMSE: 309.1812625054073
round: 393, k: -51.526172578269275, b: 40.26249501544605, RMSE: 309.1081602341377
round: 394, k: -51.51617257826928, b: 40.27249501544605, RMSE: 309.0350580014313
round: 395, k: -51.50617257826928, b: 40.282495015446045, RMSE: 308.96195580731535
round: 396, k: -51.49617257826928, b: 40.29249501544604, RMSE: 308.88885365181676
round: 397, k: -51.48617257826928, b: 40.30249501544604, RMSE: 308.8157515349632
round: 398, k: -51.476172578269285, b: 40.31249501544604, RMSE: 308.7426494567822
round: 399, k: -51.4

round: 535, k: -50.10617257826956, b: 41.68249501544577, RMSE: 298.72804275170824
round: 536, k: -50.09617257826956, b: 41.692495015445765, RMSE: 298.65494628573117
round: 537, k: -50.08617257826956, b: 41.70249501544576, RMSE: 298.58184986250916
round: 538, k: -50.07617257826956, b: 41.71249501544576, RMSE: 298.5087534820743
round: 539, k: -50.066172578269565, b: 41.72249501544576, RMSE: 298.4356571444576
round: 540, k: -50.05617257826957, b: 41.73249501544576, RMSE: 298.36256084969085
round: 541, k: -50.04617257826957, b: 41.742495015445755, RMSE: 298.28946459780514
round: 542, k: -50.03617257826957, b: 41.75249501544575, RMSE: 298.2163683888324
round: 543, k: -50.02617257826957, b: 41.76249501544575, RMSE: 298.14327222280406
round: 544, k: -50.016172578269575, b: 41.77249501544575, RMSE: 298.0701760997517
round: 545, k: -50.00617257826958, b: 41.78249501544575, RMSE: 297.9970800197065
round: 546, k: -49.99617257826958, b: 41.792495015445745, RMSE: 297.92398398270075
round: 547, k: -

round: 672, k: -48.73617257826983, b: 43.052495015445494, RMSE: 288.7142388361628
round: 673, k: -48.72617257826983, b: 43.06249501544549, RMSE: 288.6411485337282
round: 674, k: -48.716172578269834, b: 43.07249501544549, RMSE: 288.56805827865503
round: 675, k: -48.706172578269836, b: 43.08249501544549, RMSE: 288.49496807097967
round: 676, k: -48.69617257826984, b: 43.092495015445486, RMSE: 288.4218779107377
round: 677, k: -48.68617257826984, b: 43.102495015445484, RMSE: 288.34878779796566
round: 678, k: -48.67617257826984, b: 43.11249501544548, RMSE: 288.2756977326988
round: 679, k: -48.666172578269844, b: 43.12249501544548, RMSE: 288.202607714974
round: 680, k: -48.656172578269846, b: 43.13249501544548, RMSE: 288.1295177448273
round: 681, k: -48.64617257826985, b: 43.142495015445476, RMSE: 288.05642782229523
round: 682, k: -48.63617257826985, b: 43.152495015445474, RMSE: 287.9833379474133
round: 683, k: -48.62617257826985, b: 43.16249501544547, RMSE: 287.91024812021834
round: 684, k: 

round: 801, k: -47.44617257827009, b: 44.34249501544524, RMSE: 279.28599383469685
round: 802, k: -47.43617257827009, b: 44.352495015445236, RMSE: 279.21290995236654
round: 803, k: -47.42617257827009, b: 44.36249501544523, RMSE: 279.1398261223593
round: 804, k: -47.41617257827009, b: 44.37249501544523, RMSE: 279.06674234471615
round: 805, k: -47.406172578270095, b: 44.38249501544523, RMSE: 278.99365861947854
round: 806, k: -47.3961725782701, b: 44.39249501544523, RMSE: 278.9205749466877
round: 807, k: -47.3861725782701, b: 44.402495015445226, RMSE: 278.84749132638444
round: 808, k: -47.3761725782701, b: 44.412495015445224, RMSE: 278.77440775861055
round: 809, k: -47.3661725782701, b: 44.42249501544522, RMSE: 278.70132424340693
round: 810, k: -47.356172578270105, b: 44.43249501544522, RMSE: 278.62824078081536
round: 811, k: -47.34617257827011, b: 44.44249501544522, RMSE: 278.5551573708769
round: 812, k: -47.33617257827011, b: 44.452495015445216, RMSE: 278.48207401363334
round: 813, k: -4

round: 937, k: -46.08617257827036, b: 45.70249501544497, RMSE: 269.3470836283224
round: 938, k: -46.07617257827036, b: 45.712495015444965, RMSE: 269.27400725776727
round: 939, k: -46.06617257827036, b: 45.72249501544496, RMSE: 269.20093094554585
round: 940, k: -46.05617257827036, b: 45.73249501544496, RMSE: 269.12785469170524
round: 941, k: -46.046172578270365, b: 45.74249501544496, RMSE: 269.05477849629307
round: 942, k: -46.03617257827037, b: 45.75249501544496, RMSE: 268.9817023593569
round: 943, k: -46.02617257827037, b: 45.762495015444955, RMSE: 268.90862628094465
round: 944, k: -46.01617257827037, b: 45.77249501544495, RMSE: 268.83555026110383
round: 945, k: -46.00617257827037, b: 45.78249501544495, RMSE: 268.7624742998823
round: 946, k: -45.996172578270375, b: 45.79249501544495, RMSE: 268.6893983973279
round: 947, k: -45.98617257827038, b: 45.80249501544495, RMSE: 268.61632255348843
round: 948, k: -45.97617257827038, b: 45.812495015444945, RMSE: 268.54324676841185
round: 949, k: 

round: 1064, k: -44.81617257827061, b: 46.972495015444714, RMSE: 260.06686768609575
round: 1065, k: -44.80617257827061, b: 46.98249501544471, RMSE: 259.9937991220224
round: 1066, k: -44.796172578270614, b: 46.99249501544471, RMSE: 259.9207306227548
round: 1067, k: -44.786172578270616, b: 47.00249501544471, RMSE: 259.84766218834676
round: 1068, k: -44.77617257827062, b: 47.012495015444706, RMSE: 259.7745938188543
round: 1069, k: -44.76617257827062, b: 47.022495015444704, RMSE: 259.7015255143312
round: 1070, k: -44.75617257827062, b: 47.0324950154447, RMSE: 259.6284572748326
round: 1071, k: -44.746172578270624, b: 47.0424950154447, RMSE: 259.5553891004134
round: 1072, k: -44.736172578270626, b: 47.0524950154447, RMSE: 259.4823209911285
round: 1073, k: -44.72617257827063, b: 47.062495015444696, RMSE: 259.40925294703317
round: 1074, k: -44.71617257827063, b: 47.072495015444694, RMSE: 259.3361849681822
round: 1075, k: -44.70617257827063, b: 47.08249501544469, RMSE: 259.2631170546312
round: 

round: 1182, k: -43.636172578270845, b: 48.15249501544448, RMSE: 251.44523955689232
round: 1183, k: -43.62617257827085, b: 48.16249501544448, RMSE: 251.37217903433927
round: 1184, k: -43.61617257827085, b: 48.172495015444476, RMSE: 251.29911858349124
round: 1185, k: -43.60617257827085, b: 48.18249501544447, RMSE: 251.22605820441063
round: 1186, k: -43.59617257827085, b: 48.19249501544447, RMSE: 251.15299789716005
round: 1187, k: -43.586172578270855, b: 48.20249501544447, RMSE: 251.07993766180243
round: 1188, k: -43.57617257827086, b: 48.21249501544447, RMSE: 251.0068774984004
round: 1189, k: -43.56617257827086, b: 48.222495015444466, RMSE: 250.9338174070168
round: 1190, k: -43.55617257827086, b: 48.232495015444464, RMSE: 250.8607573877143
round: 1191, k: -43.54617257827086, b: 48.24249501544446, RMSE: 250.78769744055637
round: 1192, k: -43.536172578270865, b: 48.25249501544446, RMSE: 250.71463756560595
round: 1193, k: -43.52617257827087, b: 48.26249501544446, RMSE: 250.64157776292606
r

round: 1312, k: -42.3361725782711, b: 49.45249501544422, RMSE: 241.94799610924434
round: 1313, k: -42.326172578271105, b: 49.46249501544422, RMSE: 241.87494546016055
round: 1314, k: -42.31617257827111, b: 49.47249501544422, RMSE: 241.80189489156396
round: 1315, k: -42.30617257827111, b: 49.482495015444215, RMSE: 241.728844403528
round: 1316, k: -42.29617257827111, b: 49.49249501544421, RMSE: 241.65579399612537
round: 1317, k: -42.28617257827111, b: 49.50249501544421, RMSE: 241.58274366942925
round: 1318, k: -42.276172578271115, b: 49.51249501544421, RMSE: 241.509693423513
round: 1319, k: -42.26617257827112, b: 49.52249501544421, RMSE: 241.43664325844983
round: 1320, k: -42.25617257827112, b: 49.532495015444205, RMSE: 241.36359317431325
round: 1321, k: -42.24617257827112, b: 49.5424950154442, RMSE: 241.2905431711768
round: 1322, k: -42.23617257827112, b: 49.5524950154442, RMSE: 241.21749324911377
round: 1323, k: -42.226172578271125, b: 49.5624950154442, RMSE: 241.14444340819819
round: 1

round: 1443, k: -41.026172578271364, b: 50.76249501544396, RMSE: 232.3790742210703
round: 1444, k: -41.016172578271366, b: 50.77249501544396, RMSE: 232.3060347710858
round: 1445, k: -41.00617257827137, b: 50.782495015443956, RMSE: 232.23299541194973
round: 1446, k: -40.99617257827137, b: 50.792495015443954, RMSE: 232.15995614374816
round: 1447, k: -40.98617257827137, b: 50.80249501544395, RMSE: 232.08691696656692
round: 1448, k: -40.976172578271374, b: 50.81249501544395, RMSE: 232.01387788049215
round: 1449, k: -40.966172578271376, b: 50.82249501544395, RMSE: 231.94083888560982
round: 1450, k: -40.95617257827138, b: 50.832495015443946, RMSE: 231.867799982006
round: 1451, k: -40.94617257827138, b: 50.842495015443944, RMSE: 231.794761169767
round: 1452, k: -40.93617257827138, b: 50.85249501544394, RMSE: 231.7217224489793
round: 1453, k: -40.926172578271384, b: 50.86249501544394, RMSE: 231.6486838197295
round: 1454, k: -40.916172578271386, b: 50.87249501544394, RMSE: 231.5756452821039
rou

round: 1580, k: -39.65617257827164, b: 52.13249501544369, RMSE: 222.3735534468478
round: 1581, k: -39.64617257827164, b: 52.142495015443686, RMSE: 222.30052728965708
round: 1582, k: -39.63617257827164, b: 52.152495015443684, RMSE: 222.22750123614261
round: 1583, k: -39.62617257827164, b: 52.16249501544368, RMSE: 222.1544752864066
round: 1584, k: -39.616172578271645, b: 52.17249501544368, RMSE: 222.08144944055138
round: 1585, k: -39.60617257827165, b: 52.18249501544368, RMSE: 222.00842369867954
round: 1586, k: -39.59617257827165, b: 52.192495015443676, RMSE: 221.93539806089365
round: 1587, k: -39.58617257827165, b: 52.202495015443674, RMSE: 221.86237252729634
round: 1588, k: -39.57617257827165, b: 52.21249501544367, RMSE: 221.78934709799105
round: 1589, k: -39.566172578271654, b: 52.22249501544367, RMSE: 221.7163217730802
round: 1590, k: -39.55617257827166, b: 52.23249501544367, RMSE: 221.64329655266727
round: 1591, k: -39.54617257827166, b: 52.242495015443666, RMSE: 221.57027143685573


round: 1719, k: -38.26617257827191, b: 53.52249501544341, RMSE: 212.22395882915382
round: 1720, k: -38.256172578271915, b: 53.53249501544341, RMSE: 212.1509481258466
round: 1721, k: -38.24617257827192, b: 53.54249501544341, RMSE: 212.07793754181907
round: 1722, k: -38.23617257827192, b: 53.552495015443405, RMSE: 212.00492707719405
round: 1723, k: -38.22617257827192, b: 53.5624950154434, RMSE: 211.93191673209498
round: 1724, k: -38.21617257827192, b: 53.5724950154434, RMSE: 211.85890650664575
round: 1725, k: -38.206172578271925, b: 53.5824950154434, RMSE: 211.78589640097
round: 1726, k: -38.19617257827193, b: 53.5924950154434, RMSE: 211.71288641519124
round: 1727, k: -38.18617257827193, b: 53.602495015443395, RMSE: 211.63987654943415
round: 1728, k: -38.17617257827193, b: 53.61249501544339, RMSE: 211.56686680382248
round: 1729, k: -38.16617257827193, b: 53.62249501544339, RMSE: 211.49385717848102
round: 1730, k: -38.156172578271935, b: 53.63249501544339, RMSE: 211.42084767353438
round: 

round: 1855, k: -36.906172578272184, b: 54.88249501544314, RMSE: 202.29565111752976
round: 1856, k: -36.896172578272186, b: 54.89249501544314, RMSE: 202.22265784125773
round: 1857, k: -36.88617257827219, b: 54.90249501544314, RMSE: 202.14966470271028
round: 1858, k: -36.87617257827219, b: 54.912495015443135, RMSE: 202.0766717020364
round: 1859, k: -36.86617257827219, b: 54.92249501544313, RMSE: 202.0036788393857
round: 1860, k: -36.856172578272194, b: 54.93249501544313, RMSE: 201.9306861149078
round: 1861, k: -36.846172578272196, b: 54.94249501544313, RMSE: 201.85769352875258
round: 1862, k: -36.8361725782722, b: 54.95249501544313, RMSE: 201.78470108107038
round: 1863, k: -36.8261725782722, b: 54.962495015443125, RMSE: 201.7117087720114
round: 1864, k: -36.8161725782722, b: 54.97249501544312, RMSE: 201.63871660172614
round: 1865, k: -36.806172578272204, b: 54.98249501544312, RMSE: 201.5657245703652
round: 1866, k: -36.796172578272206, b: 54.99249501544312, RMSE: 201.49273267808002
roun

round: 1983, k: -35.62617257827244, b: 56.162495015442886, RMSE: 192.95368453345816
round: 1984, k: -35.61617257827244, b: 56.172495015442884, RMSE: 192.88071017689845
round: 1985, k: -35.60617257827244, b: 56.18249501544288, RMSE: 192.80773597905983
round: 1986, k: -35.596172578272444, b: 56.19249501544288, RMSE: 192.73476194012216
round: 1987, k: -35.586172578272446, b: 56.20249501544288, RMSE: 192.66178806026642
round: 1988, k: -35.57617257827245, b: 56.212495015442876, RMSE: 192.5888143396734
round: 1989, k: -35.56617257827245, b: 56.222495015442874, RMSE: 192.5158407785239
round: 1990, k: -35.55617257827245, b: 56.23249501544287, RMSE: 192.44286737699946
round: 1991, k: -35.546172578272454, b: 56.24249501544287, RMSE: 192.3698941352819
round: 1992, k: -35.536172578272456, b: 56.25249501544287, RMSE: 192.29692105355304
round: 1993, k: -35.52617257827246, b: 56.262495015442866, RMSE: 192.22394813199494
round: 1994, k: -35.51617257827246, b: 56.272495015442864, RMSE: 192.150975370790

(-35.486172578272466, 56.30249501544286)

###### 3.Gradient Descent to get optimal *k* and *b*

#### Equations:

$$ RMSE = \frac{1}{n}\sum{(y - (kx+b))^2} = \frac{1}{n}\sum(y^2 -2y(kx+b) + (kx+b)^2)) = \frac{1}{n}\sum{(y^2 - 2yxk - 2yb + k^2x^2 + 2kxb + b^2)}$$

$$ \frac{\partial{_{loss}}}{\partial{_k}} = \frac{2}{n}(-y + kx + b)x = \frac{2}{n}(-y + \hat{y})x$$

$$ \frac{\partial{_{loss}}}{\partial{_b}} = \frac{2}{n}(-y + kx + b) = \frac{2}{n}(-y + \hat{y})$$

In [11]:
def partial_k(x, y, y_hat):
    n = len(y)
    gradient = 0
    for x_i, y_i, y_hat_i in zip(list(x), list(y), list(y_hat)):
        gradient += (y_i - y_hat_i) * x_i
    return -2 / n * gradient

def partial_b(y, y_hat):
    n = len(y)
    gradient = 0
    for y_i, y_hat_i in zip(list(y), list(y_hat)):
        gradient += (y_i - y_hat_i)
    return -2 / n * gradient

def gradient(X, y, n, alpha=0.01, loss=RMSE):
    loss_min = float('inf')
    
    k = random.random() * 200 - 100
    b = random.random() * 200 - 100
    
    for i in range(n):
        y_hat = k * X + b
        loss_new = RMSE(y, y_hat)
        if loss_new < loss_min:
            loss_min = loss_new
            if i % 1000 == 0:
                print(f"round: {i}, k: {k}, b: {b}, {loss.__name__}: {loss_min}")
        k_gradient = partial_k(X, y, y_hat)
        b_gradient = partial_b(y, y_hat)
        k += -k_gradient * alpha
        b += -b_gradient * alpha
    return (k, b)
k,b = gradient(X, y, 20000)

round: 0, k: 52.397681181660886, b: -74.09556886762492, RMSE: 234.7410725196278
round: 1000, k: 14.678474641721944, b: -70.14254080826981, RMSE: 7.687862711373472
round: 2000, k: 13.486435678438841, b: -62.559840290689664, RMSE: 7.2927184802221445
round: 3000, k: 12.549214454250526, b: -56.59806548245125, RMSE: 7.037366715207764
round: 4000, k: 11.812339528073794, b: -51.91071711866698, RMSE: 6.874775545923734
round: 5000, k: 11.232983616348436, b: -48.22536579457636, RMSE: 6.772315581532689
round: 6000, k: 10.777474464757873, b: -45.32781832681967, RMSE: 6.708195915920566
round: 7000, k: 10.419337811651424, b: -43.0496688128178, RMSE: 6.668251123059673
round: 8000, k: 10.137758699902712, b: -41.25851085392343, RMSE: 6.643438590746953
round: 9000, k: 9.916371708413642, b: -39.85024207525349, RMSE: 6.628053933088022
round: 10000, k: 9.742309791333946, b: -38.743013679718445, RMSE: 6.6185258140797645
round: 11000, k: 9.605456443385737, b: -37.87247338340012, RMSE: 6.612629012592769
round

###### 4. Try different Loss function and learning rate. 

$$ MAE = \frac{1}{n}\sum{|y - \hat{y}|} = \frac{1}{n}{\sum{|y - (kx+b)}|} $$

$$ \frac{\partial{loss}}{\partial{k}} = \frac{1}{n}{\sum{\left\{
\begin{array}{rcl}
-x       &      & {y - \hat{y} > 0}\\
x     &      & {y - \hat{y} < 0}
\end{array} \right.}} $$

$$ \frac{\partial{loss}}{\partial{b}} = \frac{1}{n}{\sum {\left\{
\begin{array}{rcl}
-1       &      & {y - \hat{y} > 0}\\
1     &      & {y - \hat{y} < 0}
\end{array} \right.}} $$

In [6]:
# gradient(X, y, 20000, alpha=0.1) # overflow
# gradient(X, y, 20000, alpha=1) # overflow

def partial_k(x, y, y_hat):
    n = len(x)
    gradient = 0
    for xi, y_i, y_hat_i in zip(x, y, y_hat):
        if y_i - y_hat_i > 0:
            gradient += xi
        else:
            gradient -= xi
    return -1 / n * gradient

def partial_b(y, y_hat):
    n = len(y)
    gradient = 0
    for y_i, y_hat_i in zip(y, y_hat):
        if y_i - y_hat_i > 0:
            gradient += 1
        else:
            gradient -= 1
    return -1 / n
# MAE with default alpha
gradient(X, y, 20000, loss=MAE) 





round: 0, k: 20.846376470188773, b: -25.6255453046531, MAE: 83.52400764118069


(7.628581015643654, -25.23028838767904)

In [None]:
# MAE with alpha = 0.1
gradient(X, y, 20000, loss=MAE, alpha=0.1)

In [None]:
# MAE with alpha = 0.5
gradient(X, y, 20000, loss=MAE, alpha=1)