In [1]:
import torch
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
device = torch.device('cpu')
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split

Preprocessing the data by converting catagorical data to numerical values, filling missing values by median, scaling the data and the spliting it in test and train set. 

In [2]:
train = pd.read_csv('train.csv')
train.drop('Id',axis = 1, inplace = True)
train = pd.get_dummies(train, dummy_na=True, drop_first=True)
train.fillna(train.median(),inplace=True)
trainLabels = train['SalePrice']
train.drop('SalePrice',axis = 1, inplace = True)
normalizer=MinMaxScaler()
train=normalizer.fit_transform(train)
XTrain, XTest, YTrain, YTest = train_test_split(train,trainLabels,test_size=0.2)
labelBatches=np.split(YTrain,16)
trainBatches=np.split(XTrain,16)
for i in range(len(labelBatches)):
    labelBatches[i] = torch.from_numpy(np.array(labelBatches[i])).float().view(-1, 1)
    trainBatches[i] = torch.from_numpy(trainBatches[i]).float()

In [3]:
def trainModel(model,trainBatches, labelBatches, epochs, lossCriterion):
    print(len(trainBatches))
    for e in range(1, epochs + 1):
        trainLoss = 0
        model.train()
        for i in range(len(trainBatches)):
            output = model(trainBatches[i])
            loss = lossCriterion(torch.log(labelBatches[i]), torch.log(output))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            trainLoss += loss.item()
        if (e)%25==0:
            trainLoss = trainLoss/len(trainBatches)
            print("Epoch: {}".format(e), "Training Loss: {:.5f}.. ".format(trainLoss))

In [4]:
def testModel(model, XTest, YTest, criterion):
    testLabels=torch.from_numpy(np.array(YTest)).float().view(-1, 1)
    test = torch.from_numpy(XTest).float()
    with torch.no_grad():
        model.eval()
        output = model.forward(test)
    loss=torch.sqrt(criterion(torch.log(testLabels), torch.log(output)))
    print("RMSE Error={}".format(loss.item()))

All the top performing models have same architecture, this pyramid architecture performed better than the other random ones. Since it is a regression problem the outputs need to be merged into 1 at some point, instead of drastically reducing the layers, reducing them gradually is better. Relu activation was working better than sigmoid and tanh activations as Relu does not have saturation of gradients problem which generaly accelerates the convergence and it also introduces sparsity in the NN.<br />
model1(error = 0.162) > model3(error = 0.183) > model2(0.344)<br />
model1 is better than model3 as RMSProp only uses mean of 1st moments of gradient whereas adam uses the 2nd moments also Adam has been empirically proven to work better over other optimizers.<br />
model3 is better than model2, Adam and RMSProp are very close, in model3 I introducded a weight_decay to Adam which deteriorates its performance. Weight_decay is useful when there is a lot of traing data but this is not the case here, weight_decay prevents over fitting but since there is not a lot of data it is hampering with learning. It penalizes model complexity but that is not required here.

In [5]:
model1 = torch.nn.Sequential(
          torch.nn.Linear(288, 144),
          torch.nn.ReLU(),
          torch.nn.Linear(144, 72),
          torch.nn.ReLU(),
          torch.nn.Linear(72, 36),
          torch.nn.ReLU(),
          torch.nn.Linear(36, 18),
          torch.nn.ReLU(),
          torch.nn.Linear(18,9),
          torch.nn.ReLU(),
          torch.nn.Linear(9,1),
          torch.nn.ReLU(),
        )
lossCriterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model1.parameters(), lr=0.001)  
trainModel(model1, trainBatches, labelBatches, 700, lossCriterion)
testModel(model1, XTest, YTest, lossCriterion)

16
Epoch: 25 Training Loss: 13.47132.. 
Epoch: 50 Training Loss: 1.86203.. 
Epoch: 75 Training Loss: 0.38459.. 
Epoch: 100 Training Loss: 0.13721.. 
Epoch: 125 Training Loss: 0.11786.. 
Epoch: 150 Training Loss: 0.10926.. 
Epoch: 175 Training Loss: 0.09996.. 
Epoch: 200 Training Loss: 0.09007.. 
Epoch: 225 Training Loss: 0.07970.. 
Epoch: 250 Training Loss: 0.06924.. 
Epoch: 275 Training Loss: 0.05939.. 
Epoch: 300 Training Loss: 0.05076.. 
Epoch: 325 Training Loss: 0.04370.. 
Epoch: 350 Training Loss: 0.03834.. 
Epoch: 375 Training Loss: 0.03440.. 
Epoch: 400 Training Loss: 0.03135.. 
Epoch: 425 Training Loss: 0.02881.. 
Epoch: 450 Training Loss: 0.02663.. 
Epoch: 475 Training Loss: 0.02471.. 
Epoch: 500 Training Loss: 0.02298.. 
Epoch: 525 Training Loss: 0.02138.. 
Epoch: 550 Training Loss: 0.01966.. 
Epoch: 575 Training Loss: 0.01798.. 
Epoch: 600 Training Loss: 0.01636.. 
Epoch: 625 Training Loss: 0.01482.. 
Epoch: 650 Training Loss: 0.01340.. 
Epoch: 675 Training Loss: 0.01205.. 


In [16]:
model2 = torch.nn.Sequential(
          torch.nn.Linear(288, 144),
          torch.nn.ReLU(),
          torch.nn.Linear(144, 72),
          torch.nn.ReLU(),
          torch.nn.Linear(72, 36),
          torch.nn.ReLU(),
          torch.nn.Linear(36, 18),
          torch.nn.ReLU(),
          torch.nn.Linear(18,9),
          torch.nn.ReLU(),
          torch.nn.Linear(9,1),
          torch.nn.ReLU(),
        )
lossCriterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model2.parameters(), lr=1e-4, weight_decay=1e-4)
trainModel(model2, trainBatches, labelBatches, 700, lossCriterion)
testModel(model2, XTest, YTest, lossCriterion)

16
Epoch: 25 Training Loss: 103.25434.. 
Epoch: 50 Training Loss: 64.33721.. 
Epoch: 75 Training Loss: 45.42891.. 
Epoch: 100 Training Loss: 33.81724.. 
Epoch: 125 Training Loss: 25.73164.. 
Epoch: 150 Training Loss: 19.52944.. 
Epoch: 175 Training Loss: 14.49879.. 
Epoch: 200 Training Loss: 10.68226.. 
Epoch: 225 Training Loss: 7.89629.. 
Epoch: 250 Training Loss: 5.77809.. 
Epoch: 275 Training Loss: 4.11671.. 
Epoch: 300 Training Loss: 2.85576.. 
Epoch: 325 Training Loss: 1.92507.. 
Epoch: 350 Training Loss: 1.25678.. 
Epoch: 375 Training Loss: 0.80480.. 
Epoch: 400 Training Loss: 0.50424.. 
Epoch: 425 Training Loss: 0.31141.. 
Epoch: 450 Training Loss: 0.19335.. 
Epoch: 475 Training Loss: 0.14009.. 
Epoch: 500 Training Loss: 0.12089.. 
Epoch: 525 Training Loss: 0.11488.. 
Epoch: 550 Training Loss: 0.11150.. 
Epoch: 575 Training Loss: 0.10809.. 
Epoch: 600 Training Loss: 0.10428.. 
Epoch: 625 Training Loss: 0.10002.. 
Epoch: 650 Training Loss: 0.09531.. 
Epoch: 675 Training Loss: 0.0

In [15]:
model3 = torch.nn.Sequential(
          torch.nn.Linear(288, 144),
          torch.nn.ReLU(),
          torch.nn.Linear(144, 72),
          torch.nn.ReLU(),
          torch.nn.Linear(72, 36),
          torch.nn.ReLU(),
          torch.nn.Linear(36, 18),
          torch.nn.ReLU(),
          torch.nn.Linear(18,9),
          torch.nn.ReLU(),
          torch.nn.Linear(9,1),
          torch.nn.ReLU(),
        )
optimizer = torch.optim.RMSprop(model3.parameters(),lr=1e-4,alpha = 0.98)
lossCriterion = torch.nn.MSELoss() 
trainModel(model3, trainBatches, labelBatches, 700, lossCriterion)
testModel(model3, XTest, YTest, lossCriterion)

16
Epoch: 25 Training Loss: 72.20291.. 
Epoch: 50 Training Loss: 37.09581.. 
Epoch: 75 Training Loss: 19.40721.. 
Epoch: 100 Training Loss: 9.68485.. 
Epoch: 125 Training Loss: 4.28616.. 
Epoch: 150 Training Loss: 1.48195.. 
Epoch: 175 Training Loss: 0.32120.. 
Epoch: 200 Training Loss: 0.11842.. 
Epoch: 225 Training Loss: 0.10228.. 
Epoch: 250 Training Loss: 0.08762.. 
Epoch: 275 Training Loss: 0.07437.. 
Epoch: 300 Training Loss: 0.06261.. 
Epoch: 325 Training Loss: 0.05270.. 
Epoch: 350 Training Loss: 0.04507.. 
Epoch: 375 Training Loss: 0.03989.. 
Epoch: 400 Training Loss: 0.03653.. 
Epoch: 425 Training Loss: 0.03404.. 
Epoch: 450 Training Loss: 0.03194.. 
Epoch: 475 Training Loss: 0.03011.. 
Epoch: 500 Training Loss: 0.02851.. 
Epoch: 525 Training Loss: 0.02708.. 
Epoch: 550 Training Loss: 0.02582.. 
Epoch: 575 Training Loss: 0.02468.. 
Epoch: 600 Training Loss: 0.02367.. 
Epoch: 625 Training Loss: 0.02275.. 
Epoch: 650 Training Loss: 0.02193.. 
Epoch: 675 Training Loss: 0.02118..

# Some of the other stuff I tried

sigmoid activation

In [8]:
model = torch.nn.Sequential(
          torch.nn.Linear(288, 144),
          torch.nn.Sigmoid(),
          torch.nn.Linear(144, 72),
          torch.nn.Sigmoid(),
          torch.nn.Linear(72, 36),
          torch.nn.Sigmoid(),
          torch.nn.Linear(36, 18),
          torch.nn.Sigmoid(),
          torch.nn.Linear(18,9),
          torch.nn.Sigmoid(),
          torch.nn.Linear(9,1),
          torch.nn.Sigmoid(),
        )
lossCriterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
trainModel(model, trainBatches, labelBatches, 700, lossCriterion)
testModel(model, XTest, YTest, lossCriterion)

16
Epoch: 25 Training Loss: 148.05813.. 
Epoch: 50 Training Loss: 145.94094.. 
Epoch: 75 Training Loss: 145.34897.. 
Epoch: 100 Training Loss: 145.07745.. 
Epoch: 125 Training Loss: 144.93084.. 
Epoch: 150 Training Loss: 144.85369.. 
Epoch: 175 Training Loss: 144.80767.. 
Epoch: 200 Training Loss: 144.77781.. 
Epoch: 225 Training Loss: 144.75728.. 
Epoch: 250 Training Loss: 144.74262.. 
Epoch: 275 Training Loss: 144.73182.. 
Epoch: 300 Training Loss: 144.72371.. 
Epoch: 325 Training Loss: 144.71747.. 
Epoch: 350 Training Loss: 144.71265.. 
Epoch: 375 Training Loss: 144.70886.. 
Epoch: 400 Training Loss: 144.70586.. 
Epoch: 425 Training Loss: 144.70348.. 
Epoch: 450 Training Loss: 144.70157.. 
Epoch: 475 Training Loss: 144.70002.. 
Epoch: 500 Training Loss: 144.69879.. 
Epoch: 525 Training Loss: 144.69778.. 
Epoch: 550 Training Loss: 144.69697.. 
Epoch: 575 Training Loss: 144.69631.. 
Epoch: 600 Training Loss: 144.69577.. 
Epoch: 625 Training Loss: 144.69533.. 
Epoch: 650 Training Loss:

Tanh activation

In [14]:
model = torch.nn.Sequential(
          torch.nn.Linear(288, 144),
          torch.nn.Tanh(),
          torch.nn.Linear(144, 72),
          torch.nn.Tanh(),
          torch.nn.Linear(72, 36),
          torch.nn.Tanh(),
          torch.nn.Linear(36, 18),
          torch.nn.Tanh(),
          torch.nn.Linear(18,9),
          torch.nn.Tanh(),
          torch.nn.Linear(9,1),
          torch.nn.Tanh(),
        )
lossCriterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
trainModel(model, trainBatches, labelBatches, 700, lossCriterion)
testModel(model, XTest, YTest, lossCriterion)

16
Epoch: 25 Training Loss: 145.06855.. 
Epoch: 50 Training Loss: 144.83309.. 
Epoch: 75 Training Loss: 144.76731.. 
Epoch: 100 Training Loss: 144.73894.. 
Epoch: 125 Training Loss: 144.72393.. 
Epoch: 150 Training Loss: 144.71498.. 
Epoch: 175 Training Loss: 144.70921.. 
Epoch: 200 Training Loss: 144.70528.. 
Epoch: 225 Training Loss: 144.70249.. 
Epoch: 250 Training Loss: 144.70046.. 
Epoch: 275 Training Loss: 144.69893.. 
Epoch: 300 Training Loss: 144.69777.. 
Epoch: 325 Training Loss: 144.69688.. 
Epoch: 350 Training Loss: 144.69618.. 
Epoch: 375 Training Loss: 144.69562.. 
Epoch: 400 Training Loss: 144.69519.. 
Epoch: 425 Training Loss: 144.69484.. 
Epoch: 450 Training Loss: 144.69456.. 
Epoch: 475 Training Loss: 144.69434.. 
Epoch: 500 Training Loss: 144.69415.. 
Epoch: 525 Training Loss: 144.69400.. 
Epoch: 550 Training Loss: 144.69388.. 
Epoch: 575 Training Loss: 144.69379.. 
Epoch: 600 Training Loss: 144.69370.. 
Epoch: 625 Training Loss: 144.69365.. 
Epoch: 650 Training Loss: