In [9]:
import numpy as np
import pandas as pd
from math import sqrt

In [10]:
class UnlimitedDataWorks:

    def __init__(self, deg):
        self.exp = []
        for i in range(deg+1):
            for j in range(deg+1):
                if i+j <= deg:
                    self.exp.append((i, j))

    def train_test_split(self, dataframe):
        self.data = pd.DataFrame([])
        self.count = -1
        for (a, b) in self.exp:
            self.count += 1
            res = ((dataframe["lat"] ** a) * (dataframe["lon"] ** b))
            self.data.insert(self.count, "col" + str(a) + str(b), res, True)

        self.count += 1
        normalize = lambda x: ((x - x.min()) / (x.max() - x.min()))
        dataframe = normalize(dataframe)
        self.data = normalize(self.data)
        self.data["col00"] = [1.0]*len(self.data)
        
        # generate a 70-20-10 split on the data:
        X = self.data[:304113]
        Y = dataframe["alt"][:304113]
        xval = self.data[304113:391088]
        yval = dataframe["alt"][304113:391088]
        x = self.data[391088:]
        y = dataframe["alt"][391088:]   
        return (X, Y, xval, yval, x, y)

In [11]:
class RegressionModel:
    def __init__(self, N, X, Y, x, y, xval, yval):
        """
        X :: training data                  (304113 x 3)
        x :: testing data                   (43786 x 3)
        Y :: training target values         (304113 x 1)
        y :: testing target values          (43786 x 1)
        xval :: validation data             (86975 x 3)
        yval :: validation training data    (86975 X 1)
        """
        self.N = N
        self.X = np.array(X)
        self.Y = np.array(Y)
        self.x = np.array(x)
        self.y = np.array(y)
        self.xval = np.array(xval)
        self.yval = np.array(yval)

    def score(self, weights):
        """
        the following method helps us find the
        R2 (R-squared) error of a given training data
        wrt the generated weights
        """
        ss_tot = sum(np.square(np.mean(self.y) - self.y))
        ss_res = sum(np.square((self.x @ weights) - self.y))
        rmse = sqrt(ss_res/len(self.x))
        r2 = (1-(ss_res / ss_tot))
        return [r2*100, rmse]

    def gradient_descent(self):
        """
        train till error is almost constant
        """
        lr = 8e-7
        prev_err, count = 1e10, 0
        W = np.random.randn(self.N)
        while True:
            diff = ((self.X @ W) - self.Y)
            err = 0.5 * (diff @ diff)
            grad = (self.X.T @ diff)
            if count % 500 == 0:
                    print("epoch =", count, "| err_diff =", prev_err-err)
                    print("error = ", err, "||", W)
                    print("score =", self.score(W), end="\n\n")
            W -= lr * grad
            if abs(prev_err-err) <= 1e-4:
                break
            prev_err = err
            count += 1
        print(count, err)
        print(W, self.score(W), end="\n\n")

    def stocastic_gradient_descent(self, epochs):
        """
        train till error is almost constant
        """
        lr = 0.05
        W = np.random.randn(self.N)
        for _ in range(epochs):
            diff = ((self.X @ W) - self.Y)
            err = 0.5 * (diff @ diff)
            count = np.random.randint(0, len(self.X))
            W -= lr * (((self.X[count] @ W) - self.Y[count]) * self.X[count])
            if _ % 500 == 0:
                print("epoch =", _)
                print("error =", err, "||", W)
                print("score =", self.score(W), end="\n\n")

    def gradient_descent_L1_reg(self):
        """
        attempts a L1 regularization on the data
        considering 10% of training data as validation data
        """
        W_fin = np.array([])
        lr, l1_fin = 5e-7, 0
        MVLE = 1e10
        L1_vals = np.linspace(0.0, 1.0, 11)
        sgn = lambda x: (x / abs(x))
        for l1 in L1_vals:
            prev_err, count = 1e10, 0
            W = np.random.randn(self.N)
            while True:
                diff = ((self.X @ W) - self.Y)
                err = 0.5 * ((diff @ diff) + l1*sum([abs(w) for w in W]))
                if count % 500 == 0:
                    print("L1 hyperparamter =", l1, end=", ")
                    print("epoch =", count, "| err_diff =", prev_err-err)
                    print("error = ", err, "||", W)
                    print("score =", self.score(W), end="\n\n")
                sgn_w = np.array([sgn(w) for w in W])
                W -= lr * ((self.X.T @ diff) + 0.5*l1*sgn_w)
                if abs(prev_err-err) <= 0.005:
                    break
                prev_err = err
                count += 1
            VLD = ((self.xval @ W) - self.yval)
            VLE = 0.5 * ((VLD.T @ VLD) + l1*sum([abs(w) for w in W]))
            if VLE < MVLE:
                W_fin = W
                l1_fin = l1
                MVLE = VLE
        print(MVLE, l1_fin, W_fin)

    def gradient_descent_L2_reg(self):
        """
        attempts a L2 regularization on the data
        considering 10% of training data as validation data
        """
        W_fin = np.array([])
        lr, l2_fin = 5e-7, 0
        MVLE = 1e10
        L2_vals = np.linspace(0.0, 1.0, 11)
        for l2 in L2_vals:
            prev_err, count = 1e10, 0
            W = np.random.randn(self.N)
            while True:
                diff = ((self.X @ W) - self.Y)
                err = 0.5 * ((diff @ diff) + l2*sum([w*w for w in W]))
                if count % 500 == 0:
                    print("L2 hyperparamter =", l2, end=", ")
                    print("epoch =", count, "| err_diff =", prev_err-err)
                    print("error = ", err, "||", W)
                    print("score =", self.score(W), end="\n\n")
                W -= lr * ((self.X.T @ diff) + l2*W)
                if abs(prev_err-err) <= 0.005:
                    break
                prev_err = err
                count += 1
            VLD = ((self.xval @ W) - self.yval)
            VLE = 0.5 * ((VLD.T @ VLD) + l2 * (W.T @ W))
            if VLE < MVLE:
                W_fin = W
                l2_fin = l2
                MVLE = VLE
        print(MVLE, l2_fin, W_fin)

    def fit(self):
        """
        solves for optimal weights using system of
        N linear equations; AW = B, hence, W = inv(A)*B
        """
        B = self.X.T @ self.Y
        A = self.X.T @ self.X
        W = (np.linalg.inv(A)) @ B
        print(W, self.score(W))

In [None]:
columns = ["junk", "lat", "lon", "alt"]
raw_df = pd.read_csv("3D_spatial_network.txt", sep=',', header=None,
                     names=columns).drop("junk", 1)

pre_processor = UnlimitedDataWorks(deg=1)
X_train, Y_train, x_val, y_val, x_test, y_test = pre_processor.train_test_split(raw_df)

model = RegressionModel(N=pre_processor.count,
                        X=X_train,
                        Y=Y_train,
                        x=x_test,
                        y=y_test,
                        xval=x_val,
                        yval=y_val)

model.fit()
# model.gradient_descent()
model.stocastic_gradient_descent(10**5)
# model.gradient_descent_L1_reg()
# model.gradient_descent_L2_reg()

[ 0.20678245 -0.09449779  0.09619455] [1.9635247274214174, 0.11561193419957151]
epoch = 0
error = 8481.204189929202 || [ 0.58220523 -0.02266402 -0.8307247 ]
score = [-261.5902816370181, 0.2220328315862905]

epoch = 500
error = 3743.155145946948 || [ 0.40814595  0.06471092 -0.36095417]
score = [-59.20784108398172, 0.14732995267597948]

epoch = 1000
error = 2942.0329498860415 || [ 0.32016694  0.02219271 -0.15185929]
score = [-29.331649613993484, 0.13278873426478838]

epoch = 1500
error = 2681.798872784396 || [ 0.25886919 -0.05892098 -0.05526904]
score = [0.7225150804402114, 0.11634137864796809]

epoch = 2000
error = 2610.832026113496 || [ 0.24325865 -0.02199671  0.02389749]
score = [-16.215823511927518, 0.12587557132587743]

epoch = 2500
error = 2769.1783479631263 || [ 0.22918881 -0.05471022  0.06755266]
score = [-10.558290682356187, 0.12277345582464574]

epoch = 3000
error = 2686.5017622807263 || [ 0.23502813 -0.0548831   0.06080798]
score = [-11.825622246823375, 0.12347512783982335]

e

epoch = 31000
error = 2808.8981560399443 || [ 0.22753635 -0.07847292  0.12329236]
score = [-23.523783864217584, 0.12977293177772714]

epoch = 31500
error = 2611.5947736933404 || [ 0.20167475 -0.09503092  0.06704673]
score = [4.501537944629453, 0.11410561381631422]

epoch = 32000
error = 2597.736923192754 || [ 0.23001736 -0.09111275  0.09980461]
score = [-10.107318901856166, 0.12252280098676618]

epoch = 32500
error = 2695.3588287563034 || [ 0.21135154 -0.10436186  0.12207163]
score = [-3.200524099120261, 0.11861777521214682]

epoch = 33000
error = 2608.2121057367603 || [ 0.22184019 -0.10120361  0.11962153]
score = [-8.82311537507736, 0.12180620151807475]

epoch = 33500
error = 2590.6646056163518 || [ 0.21105221 -0.06900807  0.12821878]
score = [-16.884719959091644, 0.12623729881859505]

epoch = 34000
error = 2599.8881300175817 || [ 0.20385566 -0.06999742  0.105636  ]
score = [-4.261730168855471, 0.11922608586837301]

epoch = 34500
error = 2576.336269650068 || [ 0.19252672 -0.08237102  

epoch = 62000
error = 2732.300041806355 || [ 0.23069954 -0.07100042  0.10040999]
score = [-18.52617000091563, 0.12712060416493418]

epoch = 62500
error = 2812.0211169624017 || [ 0.24575194 -0.09771942  0.10514412]
score = [-21.64168829665991, 0.12878048084768715]

epoch = 63000
error = 2601.0717624084996 || [ 0.19813416 -0.10617138  0.07554113]
score = [4.31536237536565, 0.11421678489447024]

epoch = 63500
error = 2631.4835701973207 || [ 0.23407468 -0.12172502  0.08597728]
score = [-0.21221242056803646, 0.11688779011072248]

epoch = 64000
error = 2723.0316869927574 || [ 0.23009567 -0.07713908  0.08155095]
score = [-8.380548039059565, 0.1215582653785719]

epoch = 64500
error = 2570.6872826613326 || [ 0.20444358 -0.10506844  0.11537718]
score = [1.0164882700435585, 0.11616900017614795]

epoch = 65000
error = 2581.9832439979095 || [ 0.21197419 -0.11183475  0.11821036]
score = [-0.7946005010339707, 0.11722694756724968]

epoch = 65500
error = 2692.0443919600557 || [ 0.19568236 -0.11874455  

epoch = 93000
error = 2573.7805596987423 || [ 0.20065737 -0.11503333  0.09445692]
score = [4.69183220247108, 0.11399187129994458]

epoch = 93500
error = 2590.7426826691512 || [ 0.1945691  -0.11264774  0.10465677]
score = [4.421132714850562, 0.11415363949939189]

epoch = 94000
error = 2604.3841802285815 || [ 0.19781625 -0.09492116  0.07550571]
score = [4.435859664641317, 0.11414484466990658]

epoch = 94500
error = 2576.2531566862267 || [ 0.19777484 -0.06367949  0.07783394]
score = [1.9146464050102674, 0.11564075109249704]

epoch = 95000
error = 2583.780459112977 || [ 0.22792668 -0.07236943  0.074627  ]
score = [-6.407890319912446, 0.1204469311511927]

epoch = 95500
error = 3093.480118078717 || [ 0.25562714 -0.05017979  0.07150087]
score = [-36.975522102703586, 0.13665650360922163]

epoch = 96000
error = 2611.556533828137 || [ 0.22172681 -0.07429736  0.05742678]
score = [0.7938644763627156, 0.11629956464016833]

epoch = 96500
error = 2607.780598351713 || [ 0.23038681 -0.12069058  0.06279

epoch = 124000
error = 2575.709746585203 || [ 0.22340021 -0.11577277  0.11346217]
score = [-3.577308277029556, 0.11883411414016412]

epoch = 124500
error = 2739.510652931164 || [ 0.23626508 -0.10416014  0.09505603]
score = [-8.11793581363418, 0.12141090476704022]

epoch = 125000
error = 2783.661060850567 || [ 0.21555998 -0.08318936  0.11413125]
score = [-9.004765265470516, 0.12190781989870642]

epoch = 125500
error = 2581.744824547246 || [ 0.20802082 -0.08822406  0.11074203]
score = [-2.7220305350192486, 0.11834246757434595]

epoch = 126000
error = 2828.7185943778686 || [ 0.23097109 -0.07450893  0.11286658]
score = [-23.109990227902767, 0.12955538557314372]

epoch = 126500
error = 2661.3864756357116 || [ 0.20165122 -0.0913791   0.10378539]
score = [1.5297187471958962, 0.11586744006862312]

epoch = 127000
error = 2580.4542619403587 || [ 0.22059175 -0.11781745  0.08150295]
score = [3.7654078183214534, 0.11454454933117163]

epoch = 127500
error = 2576.5253311483416 || [ 0.19154943 -0.0952

epoch = 155000
error = 2615.105999866636 || [ 0.18996701 -0.05502478  0.10218644]
score = [-1.8802224730783301, 0.11785656117504294]

epoch = 155500
error = 3024.081736677266 || [ 0.22744356 -0.05896795  0.10046492]
score = [-21.221336162777305, 0.12855777760308137]

epoch = 156000
error = 2675.1317122305722 || [ 0.19356113 -0.0686781   0.0565831 ]
score = [3.603630778495126, 0.11464078757772439]

epoch = 156500
error = 2592.9607925325904 || [ 0.20724821 -0.09119556  0.06004   ]
score = [4.546166405592745, 0.11407894871143658]

epoch = 157000
error = 2581.4292673415207 || [ 0.2309031  -0.08473866  0.04841815]
score = [1.277283359936876, 0.11601586211147401]

epoch = 157500
error = 2665.7617008934903 || [ 0.22964902 -0.09527875  0.08607959]
score = [-4.217771667542758, 0.11920094935575412]

epoch = 158000
error = 2574.761292815218 || [ 0.21922156 -0.10746072  0.08305738]
score = [2.704016246763652, 0.11517448514182417]

epoch = 158500
error = 2614.433921147883 || [ 0.22756563 -0.1107980

epoch = 186000
error = 2587.0829005691044 || [ 0.21524759 -0.1092066   0.08689377]
score = [3.3065251945236795, 0.11481732023228401]

epoch = 186500
error = 3125.5495601922403 || [ 0.22241565 -0.07316829  0.13767465]
score = [-29.108002161466583, 0.13267387176925413]

epoch = 187000
error = 2577.0939676153057 || [ 0.2021975  -0.08551143  0.10616293]
score = [-0.12113647569704789, 0.11683466242524367]

epoch = 187500
error = 2704.4844611005165 || [ 0.18645879 -0.10289266  0.07712513]
score = [2.5172312314545087, 0.11528498586420556]

epoch = 188000
error = 2578.3863620697093 || [ 0.21225574 -0.0943547   0.09771934]
score = [0.05182188340072669, 0.11673370339282252]

epoch = 188500
error = 2594.3368033934094 || [ 0.20594247 -0.11626031  0.08912203]
score = [4.751278035311202, 0.1139563161169439]

epoch = 189000
error = 2647.54693129989 || [ 0.18095953 -0.1152271   0.10257471]
score = [2.8562085059472775, 0.11508437081000297]

epoch = 189500
error = 2580.5620639678145 || [ 0.18306461 -0.0

epoch = 217000
error = 2684.5931964724346 || [ 0.18709851 -0.10866965  0.08466791]
score = [3.001856677904069, 0.11499806516216433]

epoch = 217500
error = 2582.8053477441013 || [ 0.20314057 -0.09124215  0.09935914]
score = [1.8550780730052563, 0.11567586072329102]

epoch = 218000
error = 2606.720612262845 || [ 0.18791116 -0.08945271  0.07848386]
score = [3.7013981855031908, 0.11458263717248782]

epoch = 218500
error = 2678.0318287045566 || [ 0.20781244 -0.12290022  0.06851296]
score = [4.256923871953655, 0.11425165799076442]

epoch = 219000
error = 2637.3623812918077 || [ 0.20620033 -0.1086944   0.12358514]
score = [-0.5502636649180159, 0.11708477605835557]

epoch = 219500
error = 2571.9420660880505 || [ 0.19691967 -0.1030411   0.10634684]
score = [3.563708781209518, 0.11466452402907223]

epoch = 220000
error = 2738.282106362032 || [ 0.18319781 -0.11399102  0.0848673 ]
score = [1.4082097383379177, 0.11593890628699131]

epoch = 220500
error = 3060.6881741814095 || [ 0.23994702 -0.11813

epoch = 248000
error = 2656.664930485133 || [ 0.19633143 -0.11236044  0.08565249]
score = [4.388559632077471, 0.11417308950600694]

epoch = 248500
error = 2680.218774149605 || [ 0.2204268  -0.06208332  0.07424331]
score = [-5.414190362140503, 0.11988320955087473]

epoch = 249000
error = 2588.1877947111243 || [ 0.19178271 -0.08554687  0.07593731]
score = [4.0308960719880105, 0.11438643968316697]

epoch = 249500
error = 2573.655633968429 || [ 0.19617321 -0.07256701  0.08048396]
score = [2.9922989374576714, 0.11500373070661074]

epoch = 250000
error = 3148.3340887675736 || [ 0.25780563 -0.07373656  0.15036414]
score = [-81.81243445222515, 0.15744200571196748]

epoch = 250500
error = 2569.9301946896076 || [ 0.20288935 -0.09025128  0.09305129]
score = [2.670860017364096, 0.11519410787558367]

epoch = 251000
error = 2578.348246681354 || [ 0.21417916 -0.10839757  0.10532706]
score = [0.5258918732130802, 0.1164565311409783]

epoch = 251500
error = 2585.996246658389 || [ 0.19249777 -0.06414315 

epoch = 279000
error = 2614.730795163954 || [ 0.20960478 -0.05853253  0.09408587]
score = [-6.94215753070313, 0.12074893071410463]

epoch = 279500
error = 3150.057590775233 || [ 0.23845603 -0.089577    0.12893937]
score = [-31.263363455538794, 0.13377673336581114]

epoch = 280000
error = 2577.1534779452454 || [ 0.22049183 -0.11392555  0.10910988]
score = [-1.5672391763278615, 0.11767539005203208]

epoch = 280500
error = 2573.4887568633485 || [ 0.20833835 -0.0989838   0.08760867]
score = [3.3773231753634336, 0.1147752784969873]

epoch = 281000
error = 2592.728666822442 || [ 0.22069083 -0.1221617   0.06567908]
score = [4.8674116573346415, 0.11388682334095336]

epoch = 281500
error = 2957.2425652915003 || [ 0.17506009 -0.10398672  0.06057371]
score = [-5.633625303732703, 0.12000792183577833]

epoch = 282000
error = 2574.5307296301216 || [ 0.21407939 -0.06856297  0.06704348]
score = [0.3869709993581272, 0.1165378216339198]

epoch = 282500
error = 2902.7768365236784 || [ 0.18468118 -0.05957

epoch = 310000
error = 2586.7447988086824 || [ 0.21041303 -0.10535797  0.07241833]
score = [4.70143487107687, 0.11398612859308906]

epoch = 310500
error = 2644.5008615294 || [ 0.19130529 -0.08073482  0.07882496]
score = [3.910519835069348, 0.11445815596207863]

epoch = 311000
error = 2679.4038183079892 || [ 0.20953372 -0.07452412  0.08908397]
score = [-1.2994935900258353, 0.11752018322431125]

epoch = 311500
error = 2581.406044383613 || [ 0.21372907 -0.10335311  0.06687572]
score = [4.618515105617183, 0.11403570776532573]

epoch = 312000
error = 2657.3134016772765 || [ 0.22579496 -0.0712062   0.06914565]
score = [-4.075122934905373, 0.11911934289552782]

epoch = 312500
error = 2600.244139159742 || [ 0.19384199 -0.08177517  0.06833817]
score = [4.005055010478409, 0.11440183874365675]

epoch = 313000
error = 2764.2062797115555 || [ 0.24164764 -0.0967434   0.07461108]
score = [-6.884830408201004, 0.12071656220376197]

epoch = 313500
error = 2636.2043908342353 || [ 0.23853983 -0.09692146  

epoch = 341000
error = 2595.8969159032467 || [ 0.19568022 -0.08053407  0.07208499]
score = [4.0528497026483645, 0.1143733555697561]

epoch = 341500
error = 2583.5537009049576 || [ 0.19162119 -0.0738895   0.08059886]
score = [3.479848784713069, 0.1147143687344606]

epoch = 342000
error = 2575.7051175994816 || [ 0.1998532  -0.07931175  0.084929  ]
score = [2.8016365063045945, 0.11511669146770279]

epoch = 342500
error = 2768.4666480692586 || [ 0.19313891 -0.10184712  0.05764839]
score = [1.7762223226326812, 0.11572232199259029]

epoch = 343000
error = 2925.685299079809 || [ 0.19478723 -0.11893331  0.05978303]
score = [0.19368470816323313, 0.11665083017981426]

epoch = 343500
error = 2604.106417869848 || [ 0.19780289 -0.11290525  0.08990768]
score = [4.619004110003089, 0.11403541544427002]

epoch = 344000
error = 2610.3378826738176 || [ 0.21075168 -0.09597252  0.11305949]
score = [-2.5906874049360873, 0.11826678545540342]

