In [60]:
import numpy as np

In [134]:
class GradUtils():
    def mse(ypred, y): #X: m*n, y: m * 1, w: -1 * n        
        error = ypred - y #.reshape(-1, 1)
        sq_error = error*error
        loss = np.sum(sq_error,axis=0)
        return loss
    def prepare_deltas(w, delta):
        numw = w.shape[-1]
        a = np.zeros((numw, numw))
        np.fill_diagonal(a, delta)
        return a
    def compute_grad(model, X, y, w, detlas, lossf, delta):
        ypred = model.call(X, w)
        initial_loss = lossf(ypred, y)
        w_ = detlas + w
        ypred = model.call(X, w_)
        new_loss = lossf(ypred, y)
        grads = (new_loss - initial_loss)/delta
        return grads
    def new_weights(model, X, y, w, eta, detlas, lossf, delta):
        grads = GradUtils.compute_grad(model, X, y, w, detlas, lossf, delta)
        return w - eta * grads

class MyRegressModel():
    
    def __init__(self, eta=0.001):
        self.weights = None
        self.eta = eta
    
    def predict(self, X):
        m = X.shape[0]
        ones = np.ones((m, 1))
        X1 = np.hstack((X, ones))
        return self.call(X1, self.weights)
    
    def call(self, X1, w):
        return X1.dot(w.T)
    def __init_weights(self, n):
        self.weights = np.random.random((1, n+1))

    def fit(self, X, y, epoch=20,):
        m = X.shape[0]
        n = X.shape[1]
        ones = np.ones((m, 1))
        X = np.hstack((X, ones))
        self.__init_weights(n)
        y = y.reshape(-1, 1)
        delta = 1e-10
        deltas = GradUtils.prepare_deltas(self.weights, delta)
        for i in range(epoch):
            self.weights = GradUtils.new_weights(self, X, y, self.weights, self.eta, deltas, GradUtils.mse, delta)
            ypred = self.call(X, self.weights)
            initial_loss = GradUtils.mse(ypred, y)
            print(i, initial_loss, self.weights)

In [135]:
def generateData(m, n=1):
    np.random.seed(10)
    w = np.random.randint(10, 200, n+1)
    print("Coeffs",w)
    X = np.random.random((m,n))
    ones = np.ones((m, 1))
    X1 = np.hstack((X, ones))
    y = X1.dot(w.T) + 2*np.random.random((m)) - 2*np.random.random((m))
    return X, y

In [136]:
X, y = generateData(100, 4)

Coeffs [ 19 135  25  74 123]


In [138]:
# 18.94056802 134.81298346  25.00977052  74.13115675 123.19866976

In [139]:
X.shape

(100, 4)

In [140]:
y

array([226.09208817, 213.71513219, 213.85850591, 307.34842165,
       242.03284948, 253.16342836, 296.57901523, 270.41959311,
       167.30537303, 244.26527962, 227.29988532, 216.14249171,
       268.59600441, 347.09716758, 270.54096669, 154.17839605,
       195.97323483, 214.87772357, 325.1684672 , 268.4219055 ,
       192.09956502, 252.16955208, 187.594811  , 298.81467974,
       236.84667186, 298.74422759, 208.56871305, 232.32524666,
       214.58193917, 187.46748635, 226.91793957, 233.85176343,
       243.55447374, 231.79496446, 279.71383871, 247.17918639,
       259.71082274, 222.24023557, 243.53781191, 174.46390093,
       275.3886344 , 313.36796795, 285.63352484, 262.13206458,
       233.40083535, 290.78005584, 163.11801018, 194.37099604,
       262.89352098, 282.47728619, 259.70200038, 236.82564587,
       341.39408401, 145.61822383, 216.70288297, 246.74346985,
       220.24293456, 307.27318152, 223.28920304, 250.03811631,
       220.64022739, 310.41750264, 313.42977425, 250.23

In [141]:
reg = MyRegressModel()

In [142]:
reg.fit(X, y, epoch=1000)

0 [2279028.17681384] [[25.56129345 26.76341279 23.75818429 23.53530893 48.88618596]]
1 [876925.31810703] [[40.58818319 42.93117269 37.835125   37.36079255 78.36720206]]
2 [361899.78867535] [[49.34960031 53.13497565 46.16231297 45.83815629 96.22996904]]
3 [172004.65391263] [[ 54.3193704   59.71476964  51.01159321  51.07102501 107.05775813]]
4 [101207.08793588] [[ 57.00041526  64.09460508  53.75928584  54.33909413 113.62561955]]
5 [74048.23520808] [[ 58.30324823  67.13639192  55.23863354  56.4139062  117.61633678]]
6 [62901.75922998] [[ 58.77734963  69.36268943  55.95167739  57.76621568 120.04723422]]
7 [57654.01756761] [[ 58.75639487  71.08956521  56.20415312  58.6796394  121.53458547]]
8 [54604.00663715] [[ 58.4420735   72.50852247  56.18043349  59.32610824 122.4503375 ]]
9 [52396.68133104] [[ 57.95567573  73.73444856  55.99176791  59.80886802 123.01997222]]
10 [50532.81329018] [[ 57.37054322  74.83828409  55.7056045   60.19180167 123.3799866 ]]
11 [48829.85580396] [[ 56.73105931  75.8

277 [82.10972088] [[ 18.88198979 133.47669307  25.34917829  73.88760627 123.85976495]]
278 [81.74391654] [[ 18.87933421 133.49322015  25.34136545  73.89033646 123.85533586]]
279 [81.38833889] [[ 18.87676631 133.50952554  25.33368548  73.89301933 123.85093419]]
280 [81.04260056] [[ 18.87427643 133.52561734  25.32614321  73.89566923 123.84655852]]
281 [80.70662332] [[ 18.87185959 133.54147879  25.31872415  73.89828146 123.8422046 ]]
282 [80.37989239] [[ 18.86950826 133.55712935  25.31143028  73.90085462 123.83787313]]
283 [80.06228812] [[ 18.86722756 133.57256759  25.30426617  73.9033887  123.8335681 ]]
284 [79.75350356] [[ 18.86502289 133.58779779  25.29722454  73.90589081 123.82929319]]
285 [79.45329121] [[ 18.86287989 133.60282009  25.29030627  73.90835383 123.82503803]]
286 [79.16148534] [[ 18.86079629 133.6176295   25.28350283  73.91077295 123.82080277]]
287 [78.87771725] [[ 18.85878873 133.63224537  25.27682415  73.91316705 123.81659778]]
288 [78.60174939] [[ 18.85684127 133.646666

555 [68.06568934] [[ 18.90129837 134.73980826  24.97248534  74.10750163 123.28681299]]
556 [68.06466963] [[ 18.90158528 134.74046011  24.97268614  74.10767599 123.28614636]]
557 [68.0636651] [[ 18.90187106 134.74110045  24.97288864  74.1078488  123.28547902]]
558 [68.06267416] [[ 18.90216139 134.74174008  24.97309186  74.10802331 123.28482234]]
559 [68.06169997] [[ 18.90244589 134.74237275  24.9732884   74.1081954  123.2841675 ]]
560 [68.06074447] [[ 18.90272301 134.7429922   24.97348266  74.10836252 123.28351181]]
561 [68.059805] [[ 18.90299273 134.74360867  24.9736782   74.10852651 123.28286294]]
562 [68.05887483] [[ 18.90327737 134.74422812  24.97388468  74.1087033  123.2822334 ]]
563 [68.05795678] [[ 18.90355477 134.74484047  24.97408648  74.10887098 123.28159761]]
564 [68.05705626] [[ 18.90382207 134.74544173  24.97428728  74.10903896 123.28096309]]
565 [68.05617326] [[ 18.90409037 134.74603659  24.97448253  74.10919968 123.28033611]]
566 [68.05530047] [[ 18.90436493 134.74662876 

827 [67.99955315] [[ 18.9365923  134.8067745   25.00509499  74.12871786 123.20745063]]
828 [67.99953611] [[ 18.93663465 134.80684086  25.00513762  74.12874373 123.20735939]]
829 [67.99951909] [[ 18.93667174 134.80690694  25.00518295  74.12876703 123.20726532]]
830 [67.99950262] [[ 18.93671011 134.80696507  25.00522644  74.12879133 123.20716996]]
831 [67.99948641] [[ 18.93674592 134.80702631  25.00527291  74.12881251 123.20707773]]
832 [67.99946961] [[ 18.93678827 134.80709282  25.00532108  74.12884335 123.20698806]]
833 [67.9994537] [[ 18.93683204 134.80716004  25.00536542  74.1288712  123.2069055 ]]
834 [67.99943837] [[ 18.93686771 134.8072183   25.00541289  74.12889379 123.2068181 ]]
835 [67.99942232] [[ 18.93690636 134.80728524  25.0054585   74.12891568 123.20672758]]
836 [67.99940668] [[ 18.93694701 134.80734947  25.00550923  74.12894026 123.20664288]]
837 [67.99939199] [[ 18.93698666 134.80740717  25.00555443  74.12896428 123.20655961]]
838 [67.99937677] [[ 18.93702517 134.8074689

In [143]:
X[0]

array([0.74880388, 0.49850701, 0.22479665, 0.19806286])

In [144]:
y[0]

226.09208817275746

In [145]:
reg

<__main__.MyRegressModel at 0x7f0145363cc0>

In [154]:
X[:5]

array([[0.74880388, 0.49850701, 0.22479665, 0.19806286],
       [0.76053071, 0.16911084, 0.08833981, 0.68535982],
       [0.95339335, 0.00394827, 0.51219226, 0.81262096],
       [0.61252607, 0.72175532, 0.29187607, 0.91777412],
       [0.71457578, 0.54254437, 0.14217005, 0.37334076]])

In [155]:
y[:5]

array([226.09208817, 213.71513219, 213.85850591, 307.34842165,
       242.03284948])

In [156]:
reg.predict(X[:5])

array([[224.89135657],
       [213.41771746],
       [214.83909077],
       [307.43767854],
       [241.10695259]])

In [157]:
!ls *.csv

test.csv  train.csv


In [158]:
import pandas as pd

In [159]:
df = pd.read_csv('train.csv')

In [162]:
df.head()

Unnamed: 0,ID,Price,Levy,Manufacturer,Model,Prod. year,Category,Leather interior,Fuel type,Engine volume,Mileage,Cylinders,Gear box type,Drive wheels,Doors,Wheel,Color,Airbags
0,45654403,13328,1399,LEXUS,RX 450,2010,Jeep,Yes,Hybrid,3.5,186005 km,6.0,Automatic,4x4,04-May,Left wheel,Silver,12
1,44731507,16621,1018,CHEVROLET,Equinox,2011,Jeep,No,Petrol,3.0,192000 km,6.0,Tiptronic,4x4,04-May,Left wheel,Black,8
2,45774419,8467,-,HONDA,FIT,2006,Hatchback,No,Petrol,1.3,200000 km,4.0,Variator,Front,04-May,Right-hand drive,Black,2
3,45769185,3607,862,FORD,Escape,2011,Jeep,Yes,Hybrid,2.5,168966 km,4.0,Automatic,4x4,04-May,Left wheel,White,0
4,45809263,11726,446,HONDA,FIT,2014,Hatchback,Yes,Petrol,1.3,91901 km,4.0,Automatic,Front,04-May,Left wheel,Silver,4


In [181]:
y = np.array(df["Price"])

In [164]:
df[["Prod. year", "Mileage", "Cylinders", "Airbags"]]

Unnamed: 0,Prod. year,Mileage,Cylinders,Airbags
0,2010,186005 km,6.0,12
1,2011,192000 km,6.0,8
2,2006,200000 km,4.0,2
3,2011,168966 km,4.0,0
4,2014,91901 km,4.0,4
...,...,...,...,...
19232,1999,300000 km,4.0,5
19233,2011,161600 km,4.0,8
19234,2010,116365 km,4.0,4
19235,2007,51258 km,4.0,4


In [183]:
result = []
for m in df["Mileage"]:
    result.append(int(m.split()[0]))

In [184]:
result

[186005,
 192000,
 200000,
 168966,
 91901,
 160931,
 258909,
 216118,
 398069,
 128500,
 184467,
 0,
 350000,
 138038,
 76000,
 74146,
 121840,
 54317,
 295059,
 364523,
 112645,
 187000,
 39709,
 230000,
 0,
 210000,
 0,
 169000,
 158787,
 420550,
 180000,
 175056,
 200000,
 287274,
 250000,
 162000,
 10200,
 158000,
 103490,
 382746,
 203200,
 61057,
 11200,
 141000,
 160259,
 239342,
 128000,
 100734,
 141500,
 147000,
 87768,
 264014,
 132542,
 1600,
 87000,
 260000,
 5323,
 87112,
 204318,
 22000,
 83000,
 170000,
 312118,
 130000,
 200279,
 719847,
 154672,
 110969,
 225510,
 210000,
 148105,
 255456,
 111643,
 84506,
 288000,
 75000,
 94362,
 175000,
 263190,
 193614,
 264877,
 27656,
 16000,
 0,
 195053,
 121362,
 69191,
 83000,
 40772,
 172000,
 433811,
 350000,
 148213,
 103246,
 105394,
 160000,
 333333,
 0,
 214000,
 123346,
 80827,
 64464,
 91760,
 275240,
 300000,
 318400,
 91200,
 137802,
 151586,
 107000,
 210408,
 100567,
 86000,
 122740,
 144000,
 150000,
 100000,
 1

In [185]:
df["mil_int"] = result

In [188]:
mydf = df[["Prod. year", "mil_int", "Cylinders", "Airbags"]]

In [191]:
X = np.array(mydf)

In [192]:
X.shape

(19237, 4)

In [193]:
y.shape

(19237,)

In [200]:
y = y.reshape(-1, 1)

In [201]:
X, y

(array([[2.01000e+03, 1.86005e+05, 6.00000e+00, 1.20000e+01],
        [2.01100e+03, 1.92000e+05, 6.00000e+00, 8.00000e+00],
        [2.00600e+03, 2.00000e+05, 4.00000e+00, 2.00000e+00],
        ...,
        [2.01000e+03, 1.16365e+05, 4.00000e+00, 4.00000e+00],
        [2.00700e+03, 5.12580e+04, 4.00000e+00, 4.00000e+00],
        [2.01200e+03, 1.86923e+05, 4.00000e+00, 1.20000e+01]]), array([[13328],
        [16621],
        [ 8467],
        ...,
        [26108],
        [ 5331],
        [  470]]))

In [205]:
reg = MyRegressModel(0.000000001)
reg.fit(X, y, epoch=10)

0 [4.65605446e+39] [[-3.07191715e+04 -1.01590726e+10 -1.91999753e+04 -1.91991856e+04
  -1.91997511e+04]]
1 [3.79780666e+72] [[-2.90142197e+26 -2.90142197e+26 -2.90142197e+26 -2.90142197e+26
  -2.90142197e+26]]
2 [1.77758099e+137] [[-6.27710174e+58 -6.27710174e+58 -6.27710174e+58 -6.27710174e+58
  -6.27710174e+58]]
3 [4.81312982e+267] [[-1.03289995e+124 -1.03289995e+124 -1.03289995e+124 -1.03289995e+124
  -1.03289995e+124]]
4 [inf] [[-2.29111231e+254 -2.29111231e+254 -2.29111231e+254 -2.29111231e+254
  -2.29111231e+254]]
5 [nan] [[nan nan nan nan nan]]
6 [nan] [[nan nan nan nan nan]]
7 [nan] [[nan nan nan nan nan]]
8 [nan] [[nan nan nan nan nan]]
9 [nan] [[nan nan nan nan nan]]


  after removing the cwd from sys.path.


In [206]:
import pandas as pd