In [9]:
# load features

import numpy as np

# with open('features.npy', 'rb') as f:
with open('features_short_new.npy', 'rb') as f:
    features = np.load(f)

In [2]:
import pandas as pd

# load only last column of a csv file
labels = pd.read_csv('data/train.csv', usecols=[22]).values.reshape(-1)

In [4]:
labels.shape

(3000,)

In [60]:
labels = labels.reshape(-1, 1)

In [53]:
labels.shape

torch.Size([64])

In [3]:
# import mean_squared_log_error
from sklearn.metrics import mean_squared_log_error

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [5]:
def rmsle(y_true, y_pred):
    return np.sqrt(mean_squared_log_error(y_true, y_pred))

In [13]:
# use mlp for revenue prediction

from sklearn.neural_network import MLPRegressor

In [24]:
mlp_regr = MLPRegressor(hidden_layer_sizes=(1000, 200, 200, 50, 50), max_iter=40, alpha=0.00001,
                    solver='adam', verbose=10,  random_state=21, early_stopping=False, tol=0.00001, n_iter_no_change=5)  # best

mlp_regr.fit(X_train, y_train)

preds = mlp_regr.predict(X_test)

rmsle(y_test, preds)

Iteration 1, loss = 10959152420755202.00000000
Iteration 2, loss = 5714702516873238.00000000
Iteration 3, loss = 4324062301870460.50000000
Iteration 4, loss = 4277536412974506.50000000
Iteration 5, loss = 4406643460095309.50000000
Iteration 6, loss = 4253841160107158.00000000
Iteration 7, loss = 4224070372485407.50000000
Iteration 8, loss = 4248647569966187.50000000
Iteration 9, loss = 4251279468394391.00000000
Iteration 10, loss = 4247901426517860.50000000
Iteration 11, loss = 4333332539829557.00000000
Iteration 12, loss = 4397680134887775.50000000
Iteration 13, loss = 4241420667515744.50000000
Training loss did not improve more than tol=0.000010 for 5 consecutive epochs. Stopping.


2.595710952306413

In [19]:
# use pytorch to train the regressor
import torch
import torch.nn as nn
import torch.nn.functional as F

class RevenuePredictor(nn.Module):
    def __init__(self, input_size, drop_prob=0.2):
        super(RevenuePredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, 800)
        self.fc2 = nn.Linear(800, 200)
        # self.fc3 = nn.Linear(200, 200)
        self.fc3 = nn.Linear(200, 100)
        # self.fc4 = nn.Linear(200, 50)
        # self.fc5 = nn.Linear(200, 50)
        # self.fc6 = nn.Linear(50, 1)
        self.fc6 = nn.Linear(100, 1)
        self.dropout = nn.Dropout(p=drop_prob)

    def forward(self, x):
        x = self.dropout(F.gelu(self.fc1(x)))
        x = F.gelu(self.fc2(x))
        x = F.relu(self.fc3(x))
        # x = F.gelu(self.fc4(x))
        # x = F.relu(self.fc5(x))
        return self.fc6(x)

    def predict(self, x):
        self.eval()
        with torch.no_grad():
            r = self.forward(x)
            self.train()
            return r
        

In [12]:
def test_torch_mode(model):
    outputs = model.predict(torch.from_numpy(X_test).float())
    try:
        return rmsle(y_test, outputs.detach().numpy())
    except:
        return None

In [7]:
def test_torch_cuda_mode(model):
    outputs = model.predict(torch.from_numpy(X_test).cuda().float())
    try:
        return rmsle(y_test, outputs.cpu().detach().numpy())
    except:
        return None

In [64]:

    
model = RevenuePredictor(X_train.shape[-1])

# define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# train the network
epochs = 100
batch_size = 64
for e in range(epochs):
    running_loss = 0
    for i in range(0, len(X_train), batch_size):
        # get the training data
        inputs = torch.from_numpy(X_train[i:i+batch_size]).float()
        label = torch.from_numpy(y_train[i:i+batch_size]).reshape(-1, 1).float()
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = torch.sqrt(criterion(outputs, label))
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_mode(model))
        # break

# # test the network
# test_loss = 0
# for i in range(0, len(X_test), batch_size):
#     # get the training data
#     inputs = torch.from_numpy(X_test[i:i+batch_size]).float()
#     labels = torch.from_numpy(y_test[i:i+batch_size]).float()

#     # forward pass
#     outputs = model.forward(inputs)
#     loss = rmsle(labels, outputs.detach().numpy())
#     test_loss += loss.item()
# print(f"Test loss: {test_loss/len(X_test)}")



Training loss: 2318762.64 0 7.889089683500408
Training loss: 2306291.7066666665 1 5.533100494291208
Training loss: 2174264.6533333333 2 3.8263469484230472
Training loss: 1605120.295 3 3.2643268913894246
Training loss: 1439742.1183333334 4 3.2150177392585078
Training loss: 1429873.835 5 3.160712108603875
Training loss: 1446965.025 6 3.129914945356873
Training loss: 1423235.7183333333 7 3.0924996165623155
Training loss: 1418685.35 8 3.0632722457715826
Training loss: 1417076.2666666666 9 3.0405920514041
Training loss: 1430459.3133333332 10 3.0380082791980456
Training loss: 1456446.89 11 3.0069040966553287
Training loss: 1401322.1866666668 12 2.975067575133062
Training loss: 1400642.6733333333 13 2.9458872570163157
Training loss: 1411213.3 14 2.9467980277662313
Training loss: 1429121.39 15 2.9315219684653857
Training loss: 1440432.5066666666 16 2.9029898706918655
Training loss: 1455684.44 17 2.8968152778574274
Training loss: 1420012.5766666667 18 2.8738651548155874
Training loss: 1411842.8

In [95]:

    
model = RevenuePredictor(X_train.shape[-1])

# define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

epochs = 200
batch_size = 800
for e in range(epochs):
    running_loss = 0
    for i in range(0, len(X_train), batch_size):
        # get the training data
        inputs = torch.from_numpy(X_train[i:i+batch_size]).float()
        label = torch.from_numpy(y_train[i:i+batch_size]).reshape(-1, 1).float()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = criterion(outputs, label)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_mode(model))

Training loss: 29668317852248.746 0 16.09964275330125
Training loss: 29668317852248.746 1 15.893583973974103
Training loss: 29668317852248.746 2 15.671121284027993
Training loss: 29668317852248.746 3 15.442167837167407
Training loss: 29668317852248.746 4 15.220846952464731
Training loss: 29668317852248.746 5 15.012378512734724
Training loss: 29668317852248.746 6 14.811386058829523
Training loss: 29668317852248.746 7 14.6119695861421
Training loss: 29668317852248.746 8 14.41223457310922
Training loss: 29668316957463.895 9 14.211040849886864
Training loss: 29668315167894.188 10 14.008978196120253
Training loss: 29668315167894.188 11 13.806926575093085
Training loss: 29668313378324.48 12 13.606269581302962
Training loss: 29668313378324.48 13 13.406191633670215
Training loss: 29668313378324.48 14 13.206753994119563
Training loss: 29668312483539.625 15 13.008300456689593
Training loss: 29668308904400.215 16 12.811181415789958
Training loss: 29668308904400.215 17 12.615882632209077
Training 

In [97]:
epochs = 400
batch_size = 800
for e in range(epochs):
    running_loss = 0
    for i in range(0, len(X_train), batch_size):
        # get the training data
        inputs = torch.from_numpy(X_train[i:i+batch_size]).float()
        label = torch.from_numpy(y_train[i:i+batch_size]).reshape(-1, 1).float()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = criterion(outputs, label)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_mode(model))

Training loss: 18215725369917.44 0 3.068542724929428
Training loss: 18172040183808.0 1 3.06846294670026
Training loss: 18128034217328.64 2 3.068413787114408
Training loss: 18083717313112.746 3 3.066577605308065
Training loss: 18039092602907.31 4 3.065083168326568
Training loss: 17994173955877.547 5 3.062306394267033
Training loss: 17948956003314.348 6 3.0584116419270133
Training loss: 17903449930028.375 7 3.0548575567048815
Training loss: 17857660657336.32 8 3.0518029343383115
Training loss: 17811592211770.027 9 3.0483435149910143
Training loss: 17765254435962.88 10 3.0444855518996867
Training loss: 17718643303383.04 11 3.0404458047800658
Training loss: 17671769551448.746 12 3.0364399175651196
Training loss: 17624640785831.254 13 3.0328092364042583
Training loss: 17577261480454.826 14 3.0284761015754325
Training loss: 17529643267522.56 15 3.0229887371179243
Training loss: 17481792857920.854 16 3.0172808366730366
Training loss: 17433709356864.854 17 3.0126988096285614
Training loss: 173

In [109]:
 model = RevenuePredictor(X_train.shape[-1])

# define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.0001)

epochs = 200
batch_size = 800
for e in range(epochs):
    running_loss = 0
    for i in range(0, len(X_train), batch_size):
        # get the training data
        inputs = torch.from_numpy(X_train[i:i+batch_size]).float()
        label = torch.from_numpy(y_train[i:i+batch_size]).reshape(-1, 1).float()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = criterion(outputs, label)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_mode(model))

Training loss: 29668031521095.68 0 10.682109533116172
Training loss: 29665692553489.066 1 10.222006808833106
Training loss: 29663943249100.8 2 9.878126898481188
Training loss: 29661785922819.414 3 9.585090572420803
Training loss: 29658893083388.586 4 9.325051769834495
Training loss: 29655482163527.68 5 9.096994946321598
Training loss: 29651522740551.68 6 8.885396035811917
Training loss: 29646726693737.812 7 8.682952515777632
Training loss: 29640970542776.32 8 8.48642949462596
Training loss: 29634018064465.92 9 8.293839412303566
Training loss: 29625505976156.16 10 8.1000147531425
Training loss: 29614926040050.348 11 7.906502690864819
Training loss: 29601846075064.32 12 7.713492088680184
Training loss: 29585671049270.613 13 7.519875086743211
Training loss: 29565655606886.4 14 7.326184799415659
Training loss: 29540914805691.734 15 7.1336038069036665
Training loss: 29510470645841.92 16 6.942561219092635
Training loss: 29472993477044.906 17 6.753206115929608
Training loss: 29426860159576.74

In [110]:

epochs = 1000
batch_size = 800
for e in range(epochs):
    running_loss = 0
    for i in range(0, len(X_train), batch_size):
        # get the training data
        inputs = torch.from_numpy(X_train[i:i+batch_size]).float()
        label = torch.from_numpy(y_train[i:i+batch_size]).reshape(-1, 1).float()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = criterion(outputs, label)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_mode(model))

Training loss: 10528609009664.0 0 3.134368727176203
Training loss: 10528611022929.92 1 3.1332326704857514
Training loss: 10528611470322.346 2 3.132098764570639
Training loss: 10528612588803.414 3 3.1309667965219425
Training loss: 10528612812499.627 4 3.1298369018768613
Training loss: 10528613259892.053 5 3.1287089535259938
Training loss: 10528613259892.053 6 3.1275830245621137
Training loss: 10528613707284.48 7 3.126458931758273
Training loss: 10528613930980.693 8 3.1253368314462815
Training loss: 10528615049461.76 9 3.1242167973055848
Training loss: 10528615049461.76 10 3.1230987552369935
Training loss: 10528614378373.12 11 3.121982822358223
Training loss: 10528615049461.76 12 3.120868948544418
Training loss: 10528615273157.973 13 3.1197571870792213
Training loss: 10528616615335.254 14 3.1186473925255283
Training loss: 10528616615335.254 15 3.1175397432645706
Training loss: 10528617510120.107 16 3.1164340861757633
Training loss: 10528617957512.533 17 3.115330683443122
Training loss: 1

In [111]:

epochs = 1000
batch_size = 800
for e in range(epochs):
    running_loss = 0
    for i in range(0, len(X_train), batch_size):
        # get the training data
        inputs = torch.from_numpy(X_train[i:i+batch_size]).float()
        label = torch.from_numpy(y_train[i:i+batch_size]).reshape(-1, 1).float()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = criterion(outputs, label)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_mode(model))

Training loss: 10527694539543.893 0 2.6364326649845373
Training loss: 10527708856101.547 1 2.6362392139518236
Training loss: 10527718698734.934 2 2.6360450043945414
Training loss: 10527720264608.426 3 2.6358506788393896
Training loss: 10527716014380.373 4 2.635656698106159
Training loss: 10527707961316.693 5 2.635463574871557
Training loss: 10527697671290.88 6 2.635271091757364
Training loss: 10527687381265.066 7 2.6350793595633824
Training loss: 10527680894074.88 8 2.6348882553193755
Training loss: 10527675972758.188 9 2.6346974741763405
Training loss: 10527673735796.053 10 2.6345068245662513
Training loss: 10527673735796.053 11 2.634316181410067
Training loss: 10527673959492.268 12 2.634125450856341
Training loss: 10527674630580.906 13 2.633934723810274
Training loss: 10527674630580.906 14 2.6337440334724334
Training loss: 10527673288403.627 15 2.633553415029524
Training loss: 10527671722530.133 16 2.633363014321915
Training loss: 10527668814479.36 17 2.6331728880416065
Training loss

In [112]:

epochs = 1000
batch_size = 800
for e in range(epochs):
    running_loss = 0
    for i in range(0, len(X_train), batch_size):
        # get the training data
        inputs = torch.from_numpy(X_train[i:i+batch_size]).float()
        label = torch.from_numpy(y_train[i:i+batch_size]).reshape(-1, 1).float()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = criterion(outputs, label)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_mode(model))

Training loss: 10526276529247.574 0 2.5235789832403137
Training loss: 10526275634462.72 1 2.5235097157910773
Training loss: 10526273844893.014 2 2.523440577605568
Training loss: 10526272279019.52 3 2.5233713101336757
Training loss: 10526272502715.732 4 2.5233020192330238
Training loss: 10526269818361.174 5 2.5232330050939304
Training loss: 10526268476183.893 6 2.523163996089291
Training loss: 10526266686614.188 7 2.5230948745339568
Training loss: 10526265120740.693 8 2.523025899322123
Training loss: 10526265344436.906 9 2.5229570857500287
Training loss: 10526262660082.346 10 2.5228881885248833
Training loss: 10526261541601.28 11 2.5228192265894496
Training loss: 10526259752031.574 12 2.522750468868988
Training loss: 10526258633550.506 13 2.5226818020755077
Training loss: 10526257291373.227 14 2.5226129955438523
Training loss: 10526255725499.732 15 2.522544325276475
Training loss: 10526253935930.027 16 2.522475874203917
Training loss: 10526252593752.746 17 2.5224072516458564
Training lo

In [113]:

epochs = 1000
batch_size = 800
for e in range(epochs):
    running_loss = 0
    for i in range(0, len(X_train), batch_size):
        # get the training data
        inputs = torch.from_numpy(X_train[i:i+batch_size]).float()
        label = torch.from_numpy(y_train[i:i+batch_size]).reshape(-1, 1).float()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = criterion(outputs, label)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_mode(model))

Training loss: 10524761211098.453 0 2.4704294441150134
Training loss: 10524760092617.387 1 2.4703893521733344
Training loss: 10524757408262.826 2 2.470349358173749
Training loss: 10524756289781.76 3 2.4703090188032837
Training loss: 10524754947604.48 4 2.4702690109999645
Training loss: 10524752039553.707 5 2.4702293329838794
Training loss: 10524751368465.066 6 2.470189332891876
Training loss: 10524749131502.934 7 2.4701490810927416
Training loss: 10524747565629.44 8 2.470108938688637
Training loss: 10524745776059.732 9 2.4700689826889373
Training loss: 10524744433882.453 10 2.47002887255016
Training loss: 10524741749527.893 11 2.4699889624169127
Training loss: 10524740183654.4 12 2.4699491277192127
Training loss: 10524738841477.12 13 2.4699088790166472
Training loss: 10524737275603.627 14 2.469868999296713
Training loss: 10524735486033.92 15 2.4698294172743003
Training loss: 10524733025375.574 16 2.4697893524252397
Training loss: 10524731459502.08 17 2.469749396371865
Training loss: 10

In [24]:
def training_loop(epochs, optimizer, model, criterion, X_train, y_train):
    for e in range(epochs):
        running_loss = 0
        # get the training data
        inputs = torch.from_numpy(X_train).cuda().float()
        label = torch.from_numpy(y_train).reshape(-1, 1).cuda().float()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = criterion(outputs, label)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_cuda_mode(model))

In [23]:
model = RevenuePredictor(X_train.shape[-1]).cuda()

# define loss function and optimizer
criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.0001)
# use adamW instead of adam
optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.0001)

epochs = 800


Training loss: 9891075249056.426 0 10.7251509121513
Training loss: 9888763124995.414 1 9.513110968898115
Training loss: 9886638905753.6 2 9.08994541346975
Training loss: 9884793859386.027 3 8.814381308961623
Training loss: 9882536317201.066 4 8.60472078740365
Training loss: 9880423730162.346 5 8.435887519064742
Training loss: 9877931754345.812 6 8.292235898539763
Training loss: 9876116235878.4 7 8.17029190653509
Training loss: 9873733423813.973 8 8.060406268737918
Training loss: 9871624415914.666 9 7.958772197344482
Training loss: 9869264868256.426 10 7.859187669851263
Training loss: 9867055644453.547 11 7.764718278834937
Training loss: 9864363236829.867 12 7.6751744298854225
Training loss: 9861752254627.84 13 7.591147302621273
Training loss: 9858782463699.627 14 7.510490638237467
Training loss: 9856116004836.693 15 7.4324440258276265
Training loss: 9852982468280.32 16 7.357828707437794
Training loss: 9849639552068.268 17 7.285972121426317
Training loss: 9846169576407.04 18 7.216968998

In [14]:
# save full model
torch.save(model, 'model.pt')

In [40]:
training_loop(1000, optimizer, model, criterion, X_train, y_train)

Training loss: 3502276052254.72 0 2.4097179241738953
Training loss: 3502152348248.7466 1 2.4098651490680423
Training loss: 3513130239918.08 2 2.4101107414188405
Training loss: 3473812498677.76 3 2.4104195690736194
Training loss: 3513595304345.6 4 2.41064640199983
Training loss: 3513284814001.493 5 2.410488445276835
Training loss: 3514902808712.533 6 2.4101992450703014
Training loss: 3503073081862.8267 7 2.4098319369060146
Training loss: 3500732772078.933 8 2.4095701312111077
Training loss: 3513320381699.4136 9 2.40940408995175
Training loss: 3506637681022.2935 10 2.40936356703281
Training loss: 3501745221140.48 11 2.409459899281883
Training loss: 3521719056029.013 12 2.4096824737157823
Training loss: 3487237850617.1733 13 2.409883247174646
Training loss: 3524616593080.32 14 2.4099206288426682
Training loss: 3502709799212.3735 15 2.4098807018300716
Training loss: 3497800785810.7734 16 2.4098864021866415
Training loss: 3533284597650.7734 17 2.409846074916649
Training loss: 3496448542201.

In [22]:
epochs = 500
batch_size = 800
for e in range(epochs):
    running_loss = 0
    for i in range(0, len(X_train), batch_size):
        # get the training data
        inputs = torch.from_numpy(X_train[i:i+batch_size]).cuda().float()
        label = torch.from_numpy(y_train[i:i+batch_size]).reshape(-1, 1).cuda().float()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = criterion(outputs, label)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_cuda_mode(model))

Training loss: 10573585594013.014 0 3.1397525732674727
Training loss: 10512898153512.96 1 3.1388177355547544
Training loss: 10543762862243.84 2 3.1377827375897898
Training loss: 10515998135637.334 3 3.13667740496998
Training loss: 10510647769606.826 4 3.1355529359430303
Training loss: 10513249803960.32 5 3.1343725817679324
Training loss: 10503932409282.56 6 3.133241553355136
Training loss: 10561778907873.28 7 3.1321661875337226
Training loss: 10481497916047.36 8 3.13107002413294
Training loss: 10487322741746.346 9 3.1300028084628617
Training loss: 10568049783821.654 10 3.128910874232627
Training loss: 10490753346874.027 11 3.127830488030291
Training loss: 10495931466820.268 12 3.126721356909152
Training loss: 10546181465702.4 13 3.125651046042983
Training loss: 10545868291003.732 14 3.124601692893663
Training loss: 10584780023616.854 15 3.1235416795716713
Training loss: 10526557939083.947 16 3.122504329703179
Training loss: 10551499395781.973 17 3.1214965065540574
Training loss: 104780

KeyboardInterrupt: 

In [37]:
epochs = 1000
batch_size = 800
for e in range(epochs):
    running_loss = 0
    for i in range(0, len(X_train), batch_size):
        # get the training data
        inputs = torch.from_numpy(X_train[i:i+batch_size]).cuda().float()
        label = torch.from_numpy(y_train[i:i+batch_size]).reshape(-1, 1).cuda().float()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = criterion(outputs, label)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_cuda_mode(model))

Training loss: 10474209893416.96 0 2.4051506778807608
Training loss: 10502994898452.48 1 2.4053545278953536
Training loss: 10525501421868.373 2 2.4054608494733123
Training loss: 10503175868689.066 3 2.4055277030230084
Training loss: 10518290798127.787 4 2.4054672985799095
Training loss: 10532129988061.867 5 2.405295121247068
Training loss: 10554358680780.8 6 2.4051941556168113
Training loss: 10585958231572.48 7 2.4050582729953316
Training loss: 10617272345995.947 8 2.4049150096052667
Training loss: 10553085178238.293 9 2.4049163651565526
Training loss: 10527354744995.84 10 2.4049523798858643
Training loss: 10552050806947.84 11 2.405048814551919
Training loss: 10498707536827.732 12 2.4051097878191046
Training loss: 10541238226780.16 13 2.4052076477480764
Training loss: 10521949126000.64 14 2.4052302620163726
Training loss: 10512537331520.854 15 2.405202078411037
Training loss: 10624627030097.92 16 2.4051851570179217
Training loss: 10475470645275.307 17 2.4050452220475402
Training loss: 

KeyboardInterrupt: 

There may be hash collision when using FeatureHasher, some information is lost and may affect the performance.

In [101]:
epochs = 400
batch_size = 800
for e in range(epochs):
    running_loss = 0
    for i in range(0, len(X_train), batch_size):
        # get the training data
        inputs = torch.from_numpy(X_train[i:i+batch_size]).float()
        label = torch.from_numpy(y_train[i:i+batch_size]).reshape(-1, 1).float()
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model.forward(inputs)
        loss = criterion(outputs, label)
        
        # backward pass
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(X_train)}", e, test_torch_mode(model))

Training loss: 1209718335515.3066 0 2.7641163156271342
Training loss: 1208368468677.9734 1 2.7641753325579703
Training loss: 1206982279168.0 2 2.764016375929691
Training loss: 1205637529381.5466 3 2.764121026263768
Training loss: 1204250724706.9866 4 2.763975926006395
Training loss: 1202902339857.0667 5 2.76418264201457
Training loss: 1201516961245.8667 6 2.763947618178719
Training loss: 1200145172179.6267 7 2.764365238948186
Training loss: 1198716787971.4133 8 2.7640209004675027
Training loss: 1197241791064.7466 9 2.7641675935415795
Training loss: 1195720433117.8667 10 2.764161095294574
Training loss: 1194124276749.6533 11 2.7635162498190837
Training loss: 1192529350710.6133 12 2.7635814133478656
Training loss: 1190931209038.5066 13 2.7633591880700314
Training loss: 1189286426705.92 14 2.76353917319399
Training loss: 1187626013600.4268 15 2.763390910472604
Training loss: 1185944181582.5066 16 2.7632072533217253
Training loss: 1184270989830.8267 17 2.7631213953333535
Training loss: 118

In [8]:
mlp_regr = MLPRegressor(hidden_layer_sizes=(800, 200, 200, 500), max_iter=40, alpha=0.00001,
                    solver='adam', verbose=10,  random_state=21, early_stopping=False, tol=0.00001, n_iter_no_change=5)  # best

mlp_regr.fit(X_train, y_train)

preds = mlp_regr.predict(X_test)

rmsle(y_test, preds)

Iteration 1, loss = 9852636169898046.00000000
Iteration 2, loss = 4782843223819727.00000000
Iteration 3, loss = 4464188199836452.00000000
Iteration 4, loss = 4733131575039396.00000000
Iteration 5, loss = 4472290547449394.50000000
Iteration 6, loss = 4269837520448216.50000000
Iteration 7, loss = 4252351230490286.00000000
Iteration 8, loss = 4281510894202930.50000000
Iteration 9, loss = 4214104031035255.50000000
Iteration 10, loss = 4258927805230352.00000000
Iteration 11, loss = 4248635872957139.00000000
Iteration 12, loss = 4312872056249307.50000000
Iteration 13, loss = 4283809214964793.00000000
Iteration 14, loss = 4302902660721231.50000000
Iteration 15, loss = 4290204102481290.00000000
Training loss did not improve more than tol=0.000010 for 5 consecutive epochs. Stopping.


2.402850552228329

In [57]:
mlp_regr = MLPRegressor(hidden_layer_sizes=(1000, 200, 200, 500), max_iter=40, alpha=0.00001,
                    solver='adam', verbose=10,  random_state=21, early_stopping=False, tol=0.00001, n_iter_no_change=5)  # best

mlp_regr.fit(X_train, y_train)

preds = mlp_regr.predict(X_test)

rmsle(y_test, preds)

Iteration 1, loss = 9854516901168444.00000000
Iteration 2, loss = 4595335552527831.00000000
Iteration 3, loss = 4469739844047478.50000000
Iteration 4, loss = 4260818295737205.00000000
Iteration 5, loss = 4339022870742403.50000000
Iteration 6, loss = 4250233669606186.50000000
Iteration 7, loss = 4242800917244683.00000000
Iteration 8, loss = 4322731268131541.50000000
Iteration 9, loss = 4260251642815720.00000000
Iteration 10, loss = 4294661196346540.50000000
Iteration 11, loss = 4227379049223653.50000000
Iteration 12, loss = 4391905475063076.50000000
Iteration 13, loss = 4374447492365332.50000000
Iteration 14, loss = 4246667755614483.00000000
Iteration 15, loss = 4341612288546292.00000000
Iteration 16, loss = 4390742857691477.50000000
Iteration 17, loss = 4422849052051387.00000000
Training loss did not improve more than tol=0.000010 for 5 consecutive epochs. Stopping.


2.3882717886877685

In [15]:
preds = mlp_regr.predict(X_test)

rmsle(y_test, preds)

2.3727823519272846

In [68]:
mlp_regr = MLPRegressor(hidden_layer_sizes=(1000, 200, ), max_iter=40, alpha=0.00001,
                    solver='adam', verbose=10,  random_state=21, early_stopping=False, tol=0.00001, n_iter_no_change=3)

mlp_regr.fit(X_train, y_train/1e5)

preds = mlp_regr.predict(X_test)

rmsle(y_test, preds*1e5)

Iteration 1, loss = 1070578721402.98205566
Iteration 2, loss = 169154981609.45535278
Iteration 3, loss = 40208648798.18979645
Iteration 4, loss = 20221177680.94396591
Iteration 5, loss = 4665536178.37637901
Iteration 6, loss = 1700233403.78227735
Iteration 7, loss = 592078433.94917178
Iteration 8, loss = 106634006.82095124
Iteration 9, loss = 35144780.76744615
Iteration 10, loss = 12643713.28618718
Iteration 11, loss = 17373984.17633588
Iteration 12, loss = 169458137.15847519
Iteration 13, loss = 105565254.15546705
Iteration 14, loss = 67669582.42899403
Training loss did not improve more than tol=0.000010 for 3 consecutive epochs. Stopping.


2.5734036178410284

In [94]:
mlp_regr = MLPRegressor(hidden_layer_sizes=(1500, 200, 200, 500), max_iter=1000, alpha=0.00001,
                    solver='adam', verbose=10,  random_state=36, early_stopping=False, tol=0.00001, n_iter_no_change=3)

mlp_regr.fit(X_train, y_train)

preds = mlp_regr.predict(X_test)

rmsle(y_test, preds)

Iteration 1, loss = 9180644562707392.00000000
Iteration 2, loss = 4774383633193965.00000000
Iteration 3, loss = 4541828974319462.00000000
Iteration 4, loss = 4263188750547031.00000000
Iteration 5, loss = 4239700343049356.00000000
Iteration 6, loss = 4328264131995082.00000000
Iteration 7, loss = 4300430317325314.50000000
Iteration 8, loss = 4255010981248534.00000000
Iteration 9, loss = 4270247582459696.50000000
Training loss did not improve more than tol=0.000010 for 3 consecutive epochs. Stopping.


2.4818402431593323

In [1]:
import pickle

In [8]:
# save the model to disk
filename = 'finalized_model_mlp.sav'
pickle.dump(mlp_regr, open(filename, 'wb'))

NameError: name 'mlp_regr' is not defined

In [2]:
# load the model from disk
mlp_regr = pickle.load(open('finalized_model_mlp.sav', 'rb'))

In [54]:
mlp_regr = MLPRegressor(hidden_layer_sizes=(1000, 200, 200), max_iter=40, alpha=0.00001,
                    solver='adam', verbose=10,  random_state=21, early_stopping=False, tol=0.00001, n_iter_no_change=3)

mlp_regr.fit(X_train, y_train)

preds = mlp_regr.predict(X_test)

rmsle(y_test, preds)

Iteration 1, loss = 9613999578834082.00000000
Iteration 2, loss = 5274506705902056.00000000
Iteration 3, loss = 4375524773283108.50000000
Iteration 4, loss = 4374893037198315.50000000
Iteration 5, loss = 4297481899681947.50000000
Iteration 6, loss = 4297198571356853.00000000
Iteration 7, loss = 4350095746821185.00000000
Iteration 8, loss = 4327564641121450.00000000
Iteration 9, loss = 4283455694872205.00000000
Iteration 10, loss = 4245628883181325.00000000
Iteration 11, loss = 4262620671572127.50000000
Iteration 12, loss = 4242173005254726.00000000
Iteration 13, loss = 4244069790362055.50000000
Iteration 14, loss = 4229229553737124.00000000
Iteration 15, loss = 4231335361051601.00000000
Iteration 16, loss = 4308477256085541.50000000
Iteration 17, loss = 4284650450555558.50000000
Iteration 18, loss = 4302296566850923.50000000
Training loss did not improve more than tol=0.000010 for 3 consecutive epochs. Stopping.


2.466465940541211

In [29]:
regr = MLPRegressor(hidden_layer_sizes=(700, 700), max_iter=1000, alpha=0.0001,
                    solver='adam', verbose=10,  random_state=21, early_stopping=True, tol=0.00001, n_iter_no_change=5)

regr.fit(X_train_scaled, y_train)

preds = regr.predict(X_test_scaled)

rmsle(y_test, preds)

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 12413326378239124.00000000
Validation score: -0.232501
Iteration 2, loss = 12413325714564238.00000000
Validation score: -0.232501
Iteration 3, loss = 12413324159517366.00000000
Validation score: -0.232500
Iteration 4, loss = 12413321346996724.00000000
Validation score: -0.232500
Iteration 5, loss = 12413316111855992.00000000
Validation score: -0.232499
Iteration 6, loss = 12413307770656950.00000000
Validation score: -0.232498
Iteration 7, loss = 12413295316480594.00000000
Validation score: -0.232497
Validation score did not improve more than tol=0.000010 for 5 consecutive epochs. Stopping.


10.903050181562582

In [10]:
regr = MLPRegressor(hidden_layer_sizes=(700, 700), max_iter=1000, alpha=0.0001,
                    solver='adam', verbose=10,  random_state=21, early_stopping=True, tol=0.0001, n_iter_no_change=2)

regr.fit(X_train, y_train)

preds = regr.predict(X_test)

rmsle(y_test, preds)

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 9467108744254146.00000000
Validation score: 0.349119
Iteration 2, loss = 5108539062161769.00000000
Validation score: 0.581433
Iteration 3, loss = 4471654366310205.50000000
Validation score: 0.635126
Iteration 4, loss = 4166449026078603.50000000
Validation score: 0.634182
Iteration 5, loss = 4142180488720378.50000000
Validation score: 0.633509
Iteration 6, loss = 4167795321421011.00000000
Validation score: 0.634597
Validation score did not improve more than tol=0.000100 for 2 consecutive epochs. Stopping.


3.194647771455093

In [13]:
regr = MLPRegressor(hidden_layer_sizes=(700, 700), max_iter=1000, alpha=0.0001,
                    solver='adam', verbose=10,  random_state=21, early_stopping=True, tol=0.0001, n_iter_no_change=2)

In [15]:
rmsle(y_test, y_pred)

3.2299880113177872

In [8]:
# use Basyian Optimization for hyperparameter tuning mlp

from bayes_opt import BayesianOptimization

def mlp_cv(hidden_layer_sizes, hidden_layer_sizes1):
    regr = MLPRegressor(hidden_layer_sizes=(int(hidden_layer_sizes), int(hidden_layer_sizes1)), max_iter=1000, alpha=0.01,
                    solver='adam', verbose=10,  random_state=21, early_stopping=True, tol=0.0006, n_iter_no_change=3)
    regr.fit(X_train_scaled, y_train)
    preds = regr.predict(X_train_scaled)
    return -rmsle(y_test, preds)

mlp_bo = BayesianOptimization(
        mlp_cv,
        {
            'hidden_layer_sizes': (100, 1000),
            'hidden_layer_sizes1': (100, 1000),
        }
    )

mlp_bo.maximize(n_iter=10, init_points=10)


|   iter    |  target   | hidden... | hidden... |
-------------------------------------------------


  y = column_or_1d(y, warn=True)


Iteration 1, loss = 11375587325816614.00000000
Validation score: -0.034541
Iteration 2, loss = 8428575149673836.00000000
Validation score: 0.349458
Iteration 3, loss = 4987947446972370.00000000
Validation score: 0.284422
Iteration 4, loss = 4622814866719912.00000000
Validation score: 0.337244
Iteration 5, loss = 4305961583378769.50000000
Validation score: 0.397142
Iteration 6, loss = 4330318488294520.50000000
Validation score: 0.399170
Iteration 7, loss = 4362668666187466.00000000
Validation score: 0.408670
Iteration 8, loss = 4374220486846312.50000000
Validation score: 0.369903
Iteration 9, loss = 4344932282600223.50000000
Validation score: 0.388153
Iteration 10, loss = 4334746168919979.50000000
Validation score: 0.398735
Iteration 11, loss = 4316292373711605.50000000
Validation score: 0.391287
Validation score did not improve more than tol=0.000600 for 3 consecutive epochs. Stopping.
| [0m1        [0m | [0m-3.087   [0m | [0m342.0    [0m | [0m395.1    [0m |


  y = column_or_1d(y, warn=True)


Iteration 1, loss = 11136908684676256.00000000
Validation score: -0.001707
Iteration 2, loss = 7864117027034809.00000000
Validation score: 0.444219
Iteration 3, loss = 4788909047674117.00000000
Validation score: 0.637308
Iteration 4, loss = 4648298797922020.00000000
Validation score: 0.638748
Iteration 5, loss = 4456929699388795.50000000
Validation score: 0.608838
Iteration 6, loss = 4452262832403810.50000000
Validation score: 0.637822
Iteration 7, loss = 4390110221490623.50000000
Validation score: 0.631820
Iteration 8, loss = 4403220870927273.00000000
Validation score: 0.632284
Validation score did not improve more than tol=0.000600 for 3 consecutive epochs. Stopping.
| [0m2        [0m | [0m-3.211   [0m | [0m275.1    [0m | [0m557.2    [0m |


  y = column_or_1d(y, warn=True)


Iteration 1, loss = 8693071227611712.00000000
Validation score: 0.421237
Iteration 2, loss = 4654546067858742.00000000
Validation score: 0.516407
Iteration 3, loss = 4120482875237545.00000000
Validation score: 0.559091
Iteration 4, loss = 4117058269346890.00000000
Validation score: 0.580572
Iteration 5, loss = 4100515385158389.50000000
Validation score: 0.580793
Iteration 6, loss = 4148582334270706.00000000
Validation score: 0.578222
Iteration 7, loss = 4015564436968071.50000000
Validation score: 0.579254
Iteration 8, loss = 4027123699335781.50000000
Validation score: 0.579672
Validation score did not improve more than tol=0.000600 for 3 consecutive epochs. Stopping.
| [95m3        [0m | [95m-2.886   [0m | [95m994.8    [0m | [95m752.0    [0m |


  y = column_or_1d(y, warn=True)


Iteration 1, loss = 9247440551976690.00000000
Validation score: 0.162022
Iteration 2, loss = 5482543490688441.00000000
Validation score: 0.539323
Iteration 3, loss = 4150719401376204.00000000
Validation score: 0.565134
Iteration 4, loss = 4024333395375183.00000000
Validation score: 0.522289
Iteration 5, loss = 3972513447051934.50000000
Validation score: 0.551838
Iteration 6, loss = 3986869655771956.00000000
Validation score: 0.547437
Iteration 7, loss = 4025876117393433.00000000
Validation score: 0.523987
Validation score did not improve more than tol=0.000600 for 3 consecutive epochs. Stopping.
| [0m4        [0m | [0m-3.248   [0m | [0m781.5    [0m | [0m208.6    [0m |


  y = column_or_1d(y, warn=True)


Iteration 1, loss = 11311973190775058.00000000
Validation score: 0.133429
Iteration 2, loss = 7646858506136882.00000000
Validation score: 0.686175
Iteration 3, loss = 4789964921571001.00000000
Validation score: 0.534328
Iteration 4, loss = 4621377652554061.00000000
Validation score: 0.709341
Iteration 5, loss = 4511785158984577.00000000
Validation score: 0.718222
Iteration 6, loss = 4571266070836014.00000000
Validation score: 0.648982
Iteration 7, loss = 4502132786099089.00000000
Validation score: 0.712301
Iteration 8, loss = 4477069476825215.00000000
Validation score: 0.695501
Iteration 9, loss = 4459296201690094.00000000
Validation score: 0.688235
Validation score did not improve more than tol=0.000600 for 3 consecutive epochs. Stopping.
| [0m5        [0m | [0m-3.152   [0m | [0m266.9    [0m | [0m955.1    [0m |


  y = column_or_1d(y, warn=True)


Iteration 1, loss = 11835810351496078.00000000
Validation score: -0.103667
Iteration 2, loss = 9038982762552266.00000000
Validation score: 0.333247
Iteration 3, loss = 5175900751564645.00000000
Validation score: 0.297662
Iteration 4, loss = 4957631243115706.00000000
Validation score: 0.209818
Iteration 5, loss = 4455529128173025.50000000
Validation score: 0.423213
Iteration 6, loss = 4542692053312009.00000000
Validation score: 0.379150
Iteration 7, loss = 4496105520335060.50000000
Validation score: 0.347333
Iteration 8, loss = 4485395506347235.50000000
Validation score: 0.357777
Iteration 9, loss = 4520350692311571.00000000
Validation score: 0.402280
Validation score did not improve more than tol=0.000600 for 3 consecutive epochs. Stopping.
| [0m6        [0m | [0m-3.158   [0m | [0m270.8    [0m | [0m587.7    [0m |


  y = column_or_1d(y, warn=True)


Iteration 1, loss = 9136602400278264.00000000
Validation score: 0.260112


  y = column_or_1d(y, warn=True)


| [0m7        [0m | [0m-3.59    [0m | [0m786.9    [0m | [0m821.3    [0m |
Iteration 1, loss = 10518599238916330.00000000
Validation score: 0.322232
Iteration 2, loss = 5007060162897217.00000000
Validation score: 0.472295
Iteration 3, loss = 5091064493678721.00000000
Validation score: 0.598908
Iteration 4, loss = 4662544059594209.00000000
Validation score: 0.598116
Iteration 5, loss = 4623396090526691.00000000
Validation score: 0.576150
Iteration 6, loss = 4462688179875994.50000000
Validation score: 0.598359


  y = column_or_1d(y, warn=True)


| [0m8        [0m | [0m-3.207   [0m | [0m659.9    [0m | [0m641.2    [0m |
Iteration 1, loss = 10251500311837104.00000000
Validation score: -0.000713
Iteration 2, loss = 6963454146581432.00000000
Validation score: 0.405383
Iteration 3, loss = 4078656998781455.50000000
Validation score: 0.537691
Iteration 4, loss = 4204342291245517.50000000
Validation score: 0.539390
Iteration 5, loss = 4008674322957336.50000000
Validation score: 0.522439
Iteration 6, loss = 3957388536890302.50000000
Validation score: 0.539712
Iteration 7, loss = 3970471676513728.50000000
Validation score: 0.539092


In [55]:
regr = MLPRegressor(hidden_layer_sizes=(800, 500, 100), max_iter=40, alpha=0.0001,
                    solver='adam', verbose=10,  random_state=21,tol=0.000000001)

regr.fit(X_train_scaled, y_train)

preds = regr.predict(X_test_scaled)

rmsle(y_test, preds)

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 11867311510283992.00000000
Iteration 2, loss = 11867007958852102.00000000
Iteration 3, loss = 11865243499438610.00000000
Iteration 4, loss = 11859052896370234.00000000
Iteration 5, loss = 11843172897697096.00000000
Iteration 6, loss = 11807633379559452.00000000
Iteration 7, loss = 11741986562520302.00000000
Iteration 8, loss = 11613994102165054.00000000
Iteration 9, loss = 11415648492407526.00000000
Iteration 10, loss = 11114399338167910.00000000
Iteration 11, loss = 10699740333967104.00000000
Iteration 12, loss = 10129325436427210.00000000
Iteration 13, loss = 9487662338565064.00000000
Iteration 14, loss = 8779340193346599.00000000
Iteration 15, loss = 8161701825609054.00000000
Iteration 16, loss = 7632102388757609.00000000
Iteration 17, loss = 7298021646151120.00000000
Iteration 18, loss = 7037393193655040.00000000
Iteration 19, loss = 6864654174271565.00000000
Iteration 20, loss = 6697552651973083.00000000
Iteration 21, loss = 6549667340940541.00000000
Iteration 



3.213741745552174

In [43]:
regr = MLPRegressor(hidden_layer_sizes=(800, 500, 100), max_iter=1000, alpha=0.0001,
                    solver='adam', verbose=10,  random_state=21,tol=0.000000001)

regr.fit(X_train_scaled, y_train)

preds = regr.predict(X_test_scaled)

rmsle(y_test, preds)

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 11867311510283992.00000000
Iteration 2, loss = 11867007958852102.00000000
Iteration 3, loss = 11865243499438610.00000000
Iteration 4, loss = 11859052896370234.00000000
Iteration 5, loss = 11843172897697096.00000000
Iteration 6, loss = 11807633379559452.00000000
Iteration 7, loss = 11741986562520302.00000000
Iteration 8, loss = 11613994102165054.00000000
Iteration 9, loss = 11415648492407526.00000000
Iteration 10, loss = 11114399338167910.00000000
Iteration 11, loss = 10699740333967104.00000000
Iteration 12, loss = 10129325436427210.00000000
Iteration 13, loss = 9487662338565064.00000000
Iteration 14, loss = 8779340193346599.00000000
Iteration 15, loss = 8161701825609054.00000000
Iteration 16, loss = 7632102388757609.00000000
Iteration 17, loss = 7298021646151120.00000000
Iteration 18, loss = 7037393193655040.00000000
Iteration 19, loss = 6864654174271565.00000000
Iteration 20, loss = 6697552651973083.00000000
Iteration 21, loss = 6549667340940541.00000000
Iteration 

2.9853199639609076

In [28]:
regr.fit(features, labels)

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 9658812395500416.00000000
Iteration 2, loss = 4880336480572720.00000000
Iteration 3, loss = 4332360837154315.00000000
Iteration 4, loss = 4194391133010503.00000000
Iteration 5, loss = 4168126972030673.50000000
Iteration 6, loss = 4134115957056925.00000000
Iteration 7, loss = 4171014934373055.00000000
Iteration 8, loss = 4133475728046373.00000000
Iteration 9, loss = 4162364366452409.00000000
Iteration 10, loss = 4101856638924171.50000000
Iteration 11, loss = 4104163450234917.50000000
Iteration 12, loss = 4117484279157487.00000000
Iteration 13, loss = 4105489633723290.50000000
Iteration 14, loss = 4116760156048518.00000000
Iteration 15, loss = 4118411532866270.50000000
Iteration 16, loss = 4136889469632305.00000000
Iteration 17, loss = 4121678957485150.00000000
Iteration 18, loss = 4110214517932114.50000000
Iteration 19, loss = 4114349507400780.00000000
Iteration 20, loss = 4104246014542306.50000000
Iteration 21, loss = 4107820770373103.50000000
Training loss did not 

In [6]:
# load test data from evaluation_features.npy
with open('evaluation_features.npy', 'rb') as f:
    evaluation_features = np.load(f)

In [5]:
import numpy as np
import pandas as pd

In [7]:
# load test data from evaluation_features.npy
with open('evaluation_features_short.npy', 'rb') as f:
    evaluation_features = np.load(f)

In [8]:
test_ids = pd.read_csv('data/test.csv', usecols=[0]).values

In [14]:
test_ids.shape

(4398, 1)

In [25]:
preds.shape

(4398,)

In [31]:
# predict revenue for test data
preds = regr.predict(evaluation_features)
# save predictions to a csv file
np.savetxt('result_mlp3.csv', np.concatenate((test_ids, preds.reshape(-1, 1)), axis=1), delimiter=',', header='id,revenue', comments='', fmt='%d')

In [56]:

preds = regr.predict(scaler.transform(evaluation_features))

np.savetxt('result_mlp5.csv', np.concatenate((test_ids, preds.reshape(-1, 1)), axis=1), delimiter=',', header='id,revenue', comments='', fmt='%d')


In [9]:
# predict revenue for test data
preds = mlp_regr.predict(evaluation_features)
# save predictions to a csv file
np.savetxt('result_mlp-f.csv', np.concatenate((test_ids, preds.reshape(-1, 1)), axis=1), delimiter=',', header='id,revenue', comments='', fmt='%d')

In [10]:


regr = MLPRegressor(hidden_layer_sizes=(500, 200), max_iter=1000, alpha=0.0001,
                    solver='adam', verbose=10,  random_state=21,tol=0.000000001)

regr.fit(X_train, y_train)

preds = regr.predict(X_test)

rmsle(y_test, preds)

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 10649169009674728.00000000
Iteration 2, loss = 7423416665176381.00000000
Iteration 3, loss = 4387045960520514.50000000
Iteration 4, loss = 4478326826968907.00000000
Iteration 5, loss = 4245883173492859.00000000
Iteration 6, loss = 4256421994430762.00000000
Iteration 7, loss = 4224290369340590.00000000
Iteration 8, loss = 4220636750145438.00000000
Iteration 9, loss = 4220286952294331.50000000
Iteration 10, loss = 4219868623491017.50000000
Iteration 11, loss = 4222761196760835.50000000
Iteration 12, loss = 4226513886461231.00000000
Iteration 13, loss = 4234880087621019.50000000
Iteration 14, loss = 4209723827266092.50000000
Iteration 15, loss = 4217978707976180.00000000
Iteration 16, loss = 4215376911955309.00000000
Iteration 17, loss = 4249808424897363.50000000
Iteration 18, loss = 4251845922937924.50000000
Iteration 19, loss = 4234977163333819.50000000
Iteration 20, loss = 4332773937400825.00000000
Iteration 21, loss = 4287544090011541.50000000
Iteration 22, loss = 

2.596870894442328

In [77]:
from sklearn.preprocessing import StandardScaler

In [42]:


# create a StandardScaler object
scaler = StandardScaler()

# fit the scaler to the training data and transform the data
X_train_scaled = scaler.fit_transform(X_train)

# transform the test data using the fitted scaler
X_test_scaled = scaler.transform(X_test)

In [42]:
# get the mean and standard deviation for each column of the training data
means = X_train.mean(axis=0)

In [43]:
# get the index where the mean is > 10000
idx = np.where(means > 10000)

In [44]:
means[idx]

array([21932807.30041667])

In [45]:
# apply log transformation to idx column
X_train_log = np.log(X_train[:, idx] + 1)

In [47]:
# save back to X_train
X_train[:, idx] = X_train_log

In [48]:
# same for test data
X_test_log = np.log(X_test[:, idx] + 1)
X_test[:, idx] = X_test_log

In [76]:
idx

(array([768]),)

In [78]:
# standardize only some columns
scaler = StandardScaler()

# fit the scaler to the training data and transform the data
X_train_scaled = scaler.fit_transform(X_train[:, 768:769])

# transform the test data using the fitted scaler
X_test_scaled = scaler.transform(X_test[:, 768:769])

# replace
X_train[:, 768:769] = X_train_scaled
X_test[:, 768:769] = X_test_scaled

StandardScaler cannot improve performance and makes the training taking a longer time to converge.

In [10]:
# use linear regression for revenue prediction
from sklearn.linear_model import LinearRegression

regr = LinearRegression()

regr.fit(X_train_scaled, y_train)

preds = regr.predict(X_test_scaled)

rmsle(y_test, preds)

ValueError: Mean Squared Logarithmic Error cannot be used when targets contain negative values.

In [19]:
# save the model to disk
import pickle
filename = 'finalized_model.sav'
pickle.dump(regr, open(filename, 'wb'))