In [150]:
import torch.nn as nn
import torch
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

class MLP (nn.Module):
    def __init__ (self, input_size, output_size):
        super(MLP, self).__init__()
        self.sigmoid = nn.Sigmoid()
        self.linear1 = nn.Linear(input_size, 10)
        self.linear2 = nn.Linear(10, output_size)
    
    def forward(self, x):
        x = self.linear1(x)
        x = self.sigmoid(x)
        x = self.linear2(x)
        x = self.sigmoid(x)
        return x

In [151]:
train_data = pd.read_csv("./TrainData.csv")

X = train_data.drop(columns=["h1n1_vaccine", "seasonal_vaccine"])
y = train_data[["h1n1_vaccine", "seasonal_vaccine"]]
X, y = torch.from_numpy(np.array(X, dtype=float)), torch.from_numpy(np.array(y, dtype=float))
X, y = torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
y1_train, y2_train = y_train[:, 0], y_train[:, 1]
y1_test, y2_test = y_test[:, 0], y_test[:, 1]

  X, y = torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)


In [152]:
model_1 = MLP(64, 1)
model_2 = MLP(64, 1)

loss_func_1 = nn.MSELoss()
loss_func_2 = nn.MSELoss()
optimizer_1 = optim.SGD(model_1.parameters(), lr=0.1)
optimizer_2 = optim.SGD(model_2.parameters(), lr=0.1)

for i in range(10000):
    y1_pred = model_1(X_train)
    loss1 = loss_func_1(y1_pred, y1_train.reshape(-1, 1))
    optimizer_1.zero_grad()
    loss1.backward()
    optimizer_1.step()
    y2_pred = model_2(X_train)
    loss2 = loss_func_2(y2_pred, y2_train.reshape(-1, 1))
    optimizer_2.zero_grad()
    loss2.backward(retain_graph=True)
    optimizer_2.step()
    if i % 1000 == 0:
        print("Epoch", i)
        print("h1n1_vaccine loss:", loss1.item())
        print("seasonal_vaccine loss:", loss2.item())



Epoch 0
h1n1_vaccine loss: 0.23577381670475006
seasonal_vaccine loss: 0.2524401843547821
Epoch 1000
h1n1_vaccine loss: 0.15642902255058289
seasonal_vaccine loss: 0.1910221427679062
Epoch 2000
h1n1_vaccine loss: 0.1370345950126648
seasonal_vaccine loss: 0.16728529334068298
Epoch 3000
h1n1_vaccine loss: 0.12690071761608124
seasonal_vaccine loss: 0.16214801371097565
Epoch 4000
h1n1_vaccine loss: 0.1230894923210144
seasonal_vaccine loss: 0.16009506583213806
Epoch 5000
h1n1_vaccine loss: 0.12114770710468292
seasonal_vaccine loss: 0.15909932553768158
Epoch 6000
h1n1_vaccine loss: 0.1200169250369072
seasonal_vaccine loss: 0.1585627794265747
Epoch 7000
h1n1_vaccine loss: 0.11931537836790085
seasonal_vaccine loss: 0.15823811292648315
Epoch 8000
h1n1_vaccine loss: 0.118858702480793
seasonal_vaccine loss: 0.15801191329956055
Epoch 9000
h1n1_vaccine loss: 0.11854315549135208
seasonal_vaccine loss: 0.15783409774303436


In [153]:
print("h1n1_vaccine:", roc_auc_score(y1_test.reshape(-1, 1).detach().numpy(), model_1(X_test).detach().numpy()))
print("seasonal_vaccine:", roc_auc_score(y2_test.reshape(-1, 1).detach().numpy(), model_2(X_test).detach().numpy()))

h1n1_vaccine: 0.809537986823451
seasonal_vaccine: 0.8373480678578485


In [158]:
test_features = pd.read_csv("./TestFeatures.csv")
id = np.array(test_features["respondent_id"])
X_features = np.array(test_features.drop(columns=["respondent_id"]), dtype=float)
X_features = torch.tensor(X_features, dtype=torch.float32)

y1_label = model_1(X_features).reshape(1, -1).detach().numpy()[0]
y2_label = model_2(X_features).reshape(1, -1).detach().numpy()[0]

output = pd.DataFrame({"respondent_id": id, "h1n1_vaccine": y1_label, "seasonal_vaccine": y2_label})
output.to_csv("./submission.csv", index=False)