In [None]:
import random
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import torch,torchvision
from torch.nn import *
from tqdm import tqdm
from torch.optim import *
# Preproccessing
from sklearn.preprocessing import (
    StandardScaler,
    RobustScaler,
    MinMaxScaler,
    MaxAbsScaler,
    OneHotEncoder,
    Normalizer,
    Binarizer
)
# Decomposition
from sklearn.decomposition import PCA
from sklearn.decomposition import KernelPCA
# Feature Selection
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import RFECV
from sklearn.feature_selection import SelectFromModel
# Model Eval
from sklearn.compose import make_column_transformer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score,train_test_split
from sklearn.metrics import mean_absolute_error,mean_squared_error
# Other
import pickle
import wandb

PROJECT_NAME = 'House-Prices-Advanced-Regression-Techniques-V10'
device = 'cuda:0'
np.random.seed(21)
random.seed(21)
torch.manual_seed(21)

In [None]:
# data = pd.read_csv('./data/train.csv')
# data = data.sample(frac=1)

In [None]:
def object_to_int(data,col):
    data_col = data[col].to_dict()
    idx = -1
    labels_and_int_index = {}
    for data_col_vals in data_col.values():
        if data_col_vals not in labels_and_int_index.keys():
            idx += 1
            labels_and_int_index[data_col_vals] = idx
    new_data = []
    for data_col_vals in data_col.values():
        new_data.append(labels_and_int_index[data_col_vals])
    data[col] = new_data
    return data,idx,labels_and_int_index,new_data

In [None]:
# str_cols = []
# int_cols = []
# for col_name,num_of_missing_rows,dtype in zip(list(data.columns),data.isna().sum(),data.dtypes):
#     if dtype == object:
#         str_cols.append(col_name)
#     else:
#         int_cols.append(col_name)
# for str_col in str_cols:
#     data,idx,labels_and_int_index,new_data = object_to_int(data,str_col)
# nan_cols = []
# for col_name,num_of_missing_rows,dtype in zip(list(data.columns),data.isna().sum(),data.dtypes):
#     if num_of_missing_rows > 0:
#         nan_cols.append(col_name)
# for nan_col in nan_cols:
#     data[nan_col].fillna(data[nan_col].median(),inplace=True)

In [None]:
# data.to_csv('./data/cleaned-data.csv',index=False)

In [None]:
data = pd.read_csv('./data/cleaned-data.csv')

In [None]:
def valid(model,X,y,valid=False):
    preds = model(X)
    preds = preds.cpu().detach().numpy()
    y = y.cpu().detach().numpy()
    if valid:
        results = {
            'val mean_absolute_error':mean_absolute_error(y_true=y,y_pred=preds),
            'val mean_squared_error':mean_squared_error(y_true=y,y_pred=preds),
        }
    else:
        results = {
            'mean_absolute_error':mean_absolute_error(y_true=y,y_pred=preds),
            'mean_squared_error':mean_squared_error(y_true=y,y_pred=preds),
        }
    return results

In [None]:
def make_submission(model,name):
    data = pd.read_csv('./data/test.csv')
    ids = data['Id']
    str_cols = []
    int_cols = []
    for col_name,num_of_missing_rows,dtype in zip(list(data.columns),data.isna().sum(),data.dtypes):
        if dtype == object:
            str_cols.append(col_name)
        else:
            int_cols.append(col_name)
    for str_col in str_cols:
        data,idx,labels_and_int_index,new_data = object_to_int(data,str_col)
    nan_cols = []
    for col_name,num_of_missing_rows,dtype in zip(list(data.columns),data.isna().sum(),data.dtypes):
        if num_of_missing_rows > 0:
            nan_cols.append(col_name)
    for nan_col in nan_cols:
        data[nan_col].fillna(data[nan_col].median(),inplace=True)
    preds = model(data)
    df = pd.DataFrame({'Id':ids,'SalePrice':preds.view(-1).cpu().detach().numpy()})
    df.to_csv(f'./submissions/{name}.csv',index=False)

In [None]:
def train(model,X_train,X_test,y_train,y_test,name,epochs,batch_size,criterion,optimizer):
    wandb.init(project=PROJECT_NAME,name=name)
    for _ in tqdm(range(epochs)):
        for idx in range(0,len(X_train),batch_size):
            X_batch = X_train[idx:idx+batch_size].float().to(device)
            y_batch = y_train[idx:idx+batch_size].float().to(device)
            model.to(device)
            preds = model(X_batch)
            preds.to(device)
            loss = criterion(preds.view(-1),y_batch.view(-1))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        wandb.log({'Loss':loss.item()})
        model.to(device)
        preds = model(X_test)
        preds.to(device)
        loss = criterion(preds.view(-1),y_test.view(-1))
        wandb.log({'Val Loss':loss.item()})
        wandb.log(valid(model,X_train,y_train))
        wandb.log(valid(model,X_test,y_test,True))
    make_submission(model,name)
    return model

In [None]:
X = data.drop('SalePrice',axis=1)
y = data['SalePrice']

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.0625)

In [None]:
X_train = torch.from_numpy(np.array(X_train)).float()

In [None]:
X_test = torch.from_numpy(np.array(X_test)).float()

In [None]:
y_train = torch.from_numpy(np.array(y_train)).float()

In [None]:
y_test = torch.from_numpy(np.array(y_test)).float().to(device)

In [None]:
X_train.shape

In [None]:
class Model(Module):
    def __init__(self,hidden,activation):
        super().__init__()
        self.activation = activation
        self.linear1 = Linear(80,hidden).to(device)
        self.linear1batchnorm = BatchNorm1d(hidden).to(device)
        self.linear2 = Linear(hidden,hidden*2).to(device)
        self.linear2batchnorm = BatchNorm1d(hidden*2).to(device)
        self.linear3 = Linear(hidden*2,hidden*3).to(device)
        self.linear3batchnorm = BatchNorm1d(hidden*3).to(device)
        self.linear4 = Linear(hidden*3,hidden*4).to(device)
        self.linear4batchnorm = BatchNorm1d(hidden*4).to(device)
        self.linear5 = Linear(hidden*4,hidden*8).to(device)
        self.linear5batchnorm = BatchNorm1d(hidden*8).to(device)
        self.linear6 = Linear(hidden*8,hidden*16).to(device)
        self.linear6batchnorm = BatchNorm1d(hidden*16).to(device)
        self.linear7 = Linear(hidden*16,hidden*32).to(device)
        self.linear7batchnorm = BatchNorm1d(hidden*32).to(device)
        self.linear8 = Linear(hidden*32,hidden*64).to(device)
        self.linear8batchnorm = BatchNorm1d(hidden*64).to(device)
        self.linear9 = Linear(hidden*64,hidden*32).to(device)
        self.linear9batchnorm = BatchNorm1d(hidden*32).to(device)
        self.output = Linear(hidden*32,1).to(device)
    
    def forward(self,X):
        try:
            preds = torch.from_numpy(np.array(X.cpu())).float().to(device)
        except:
            preds = torch.from_numpy(np.array(X)).float().to(device)
        preds = self.activation(self.linear1batchnorm(self.linear1(preds)))
        preds = self.activation(self.linear2batchnorm(self.linear2(preds)))
        preds = self.activation(self.linear3batchnorm(self.linear3(preds)))
        preds = self.activation(self.linear4batchnorm(self.linear4(preds)))
        preds = self.activation(self.linear5batchnorm(self.linear5(preds)))
        preds = self.activation(self.linear6batchnorm(self.linear6(preds)))
        preds = self.activation(self.linear7batchnorm(self.linear7(preds)))
        preds = self.activation(self.linear8batchnorm(self.linear8(preds)))
        preds = self.activation(self.linear9batchnorm(self.linear9(preds)))
        preds = self.output(preds)
        return preds

In [None]:
model = Model(128,ReLU())
optimizer = Adam(model.parameters(),lr=0.001)
criterion = MSELoss()
epochs = 100
batch_size = 32

In [None]:
train(model,X_train,X_test,y_train,y_test,name='baseline',epochs=epochs,batch_size=batch_size,criterion=criterion,optimizer=optimizer)

In [None]:
hiddens = [8,16,32,64,128,256,512,1024,2048]
activations = [ELU,LeakyReLU,PReLU,ReLU,ReLU6,RReLU,SELU,CELU,GELU,SiLU,Tanh]
criterions = [MSELoss(),L1Loss()]
batch_sizes = [8,16,32,64,128,256,512]
optimizers = [Adam,AdamW,Adamax,RMSprop,Rprop]
lrs = [0.1,0.01,0.001,0.0001,0.00001]

In [None]:
for hidden in hiddens:
    model = Model(hidden,ReLU())
    optimizer = Adam(model.parameters(),lr=0.001)
    criterion = MSELoss()
    epochs = 100
    batch_size = 32
    train(model,X_train,X_test,y_train,y_test,name=f'hidden-{hidden}',epochs=epochs,batch_size=batch_size,criterion=criterion,optimizer=optimizer)

In [None]:
# for activation in activations:
#     model = Model(,activation())
#     optimizer = Adam(model.parameters(),lr=0.001)
#     criterion = MSELoss()
#     epochs = 100
#     batch_size = 32
#     train(model,X_train,X_test,y_train,y_test,name=f'activation-{activation}',epochs=epochs,batch_size=batch_size,criterion=criterion,optimizer=optimizer)

In [None]:
# for criterion in criterions:
#     model = Model(,())
#     optimizer = Adam(model.parameters(),lr=0.001)
#     criterion = criterion()
#     epochs = 100
#     batch_size = 32
#     train(model,X_train,X_test,y_train,y_test,name=f'criterion-{criterion}',epochs=epochs,batch_size=batch_size,criterion=criterion,optimizer=optimizer)

In [None]:
# for batch_size in batch_sizes:
#     model = Model(,())
#     optimizer = Adam(model.parameters(),lr=0.001)
#     criterion = ()
#     epochs = 100
#     batch_size = batch_size
#     train(model,X_train,X_test,y_train,y_test,name=f'batch_size-{batch_size}',epochs=epochs,batch_size=batch_size,criterion=criterion,optimizer=optimizer)

In [None]:
# for optimizer in optimizers:
#     model = Model(,())
#     optimizer = optimizer(model.parameters(),lr=0.001)
#     criterion = ()
#     epochs = 100
#     batch_size = batch_size
#     train(model,X_train,X_test,y_train,y_test,name=f'optimizer-{optimizer}',epochs=epochs,batch_size=batch_size,criterion=criterion,optimizer=optimizer)

In [None]:
# for lr in lrs:
#     model = Model(,())
#     optimizer = (model.parameters(),lr=lr)
#     criterion = ()
#     epochs = 100
#     batch_size = batch_size
#     train(model,X_train,X_test,y_train,y_test,name=f'lr-{lr}',epochs=epochs,batch_size=batch_size,criterion=criterion,optimizer=optimizer)