In [None]:
import torch
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

import torch.nn as nn
import torch.nn.functional as F
import copy

In [None]:
SEED = 123456
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [None]:
data = pd.read_csv("train.csv", dtype="float32")
train_X = data.drop(["Real"], axis=1)
train_y = data["Real"]

data = pd.read_csv("test.csv", dtype="float32")
test_X = data.drop(["Real"], axis=1)
test_y = data["Real"]

In [None]:
train_X = np.array(train_X.values)
train_X = torch.from_numpy(train_X)
train_X = train_X.to(device)

test_X = np.array(test_X.values)
test_X = torch.from_numpy(test_X)
test_X = test_X.to(device)

train_y = np.array(train_y.values)
train_y = torch.from_numpy(train_y)
train_y = train_y.to(device)
test_y = np.array(test_y.values)
test_y = torch.from_numpy(test_y)
test_y = test_y.to(device)

In [None]:
train = TensorDataset(train_X, train_y)
train = DataLoader(dataset=train, batch_size=64, shuffle=False)
valid = TensorDataset(test_X, test_y)
valid = DataLoader(dataset=valid, batch_size=64, shuffle=False)
dataloaders = {"train":train, "val":valid}

In [None]:
class LinearModel(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, input_size)
        self.fc2 = nn.Linear(input_size, output_size)
        
    def forward(self, data):
        hidden = F.relu(self.fc1(data))
        hidden = self.fc2(hidden)
        return hidden.squeeze()

In [None]:
model = LinearModel(input_size=len(train_X[0]), output_size=1)
crit = nn.MSELoss()
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

model = model.to(device)
crit = crit.to(device)

In [None]:
def train_model(model, dataloaders, crit, optimizer, num_epochs = 5):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 50000 * 50000
    for epoch in range(num_epochs):
        for phase in ["train", "val"]:
            running_loss = 0
            if phase == "train":
                model.train()
            else:
                model.eval()
        
            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)
                
                with torch.autograd.set_grad_enabled(phase=="train"):
                    outputs = model(inputs)
                    loss = crit(outputs, labels)

                if phase == "train":
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                running_loss += loss.item() * inputs.size(0)
            
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            if epoch % 100 == 0:
                
                print("Phase {} loss:{}".format(phase, epoch_loss))
        
        if phase == "val" and epoch_loss < best_loss:
            best_loss = epoch_loss
            best_model_wts = copy.deepcopy(model.state_dict())
        
    model.load_state_dict(best_model_wts)
    return model      

In [None]:
model = train_model(model, dataloaders, crit, optimizer, num_epochsv=10000)