In [2]:
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score


def read_data_from_csv(path):
    """Load datasets from CSV files.
    Args:
        path (str): Path to the CSV file.
    Returns:
        X (np.ndarray): Features of samples.
        y (np.ndarray): Labels of samples, only provided in the public
        datasets.
    """
    assert os.path.exists(path), f'File not found: {path}!'
    assert os.path.splitext(path)[-1] == '.csv', f'Unsupported file type {os.path.splitext(path)[-1]}!'
    data = pd.read_csv(path)
    column_list = data.columns.values.tolist()
    if 'Label' in column_list:
        # for the public dataset, label column is provided.
        column_list.remove('Label')
        X = data[column_list].values
        y = data['Label'].astype('int').values
        return X, y
    else:
        # for the private dataset, label column is not provided.
        X = data[column_list].values
        return X


X_public, y_public = read_data_from_csv('assignment_5_public.csv')
print('Shape of X_public:', X_public.shape)  # n_sample, m_feature (30000, 58)
print('Shape of y_public:', y_public.shape)  # n_sample (30000,)
'''
CODE HERE!
'''


class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(58, 64).cuda()
        # self.fc2 = nn.Linear(128, 64).cuda()
        self.fc3 = nn.Linear(64, 32).cuda()
        self.fc4 = nn.Linear(32, 1).cuda()
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        # x = self.fc2(x)
        # x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        x = self.sigmoid(x)
        return x


def train_model(X, y, model, criterion, optimizer, num_epochs):
    X = torch.tensor(X, dtype=torch.float32).cuda()
    X = (X - torch.min(X)) / (torch.max(X) - torch.min(X))
    y = torch.tensor(y, dtype=torch.float32).cuda().reshape(-1, 1)
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        outputs = (outputs >= 0.5).float()
        accuracy = accuracy_score(y.cpu().numpy(), outputs.cpu().numpy())
        print("epoch: ", epoch, " accuracy: ", accuracy, " loss: ", loss.item())


model = NeuralNetwork()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(X_public, y_public, model, criterion, optimizer, num_epochs=1000)


X_private = read_data_from_csv('assignment_5_private.csv')
print('Shape of X_private:', X_private.shape) # k_sample, m_feature (5000, 58)

import numpy as np
# remove and make your own predictions.
# preds = np.full(len(X_private), -1, dtype=int)

'''
CODE HERE!
e.g.,
preds = np.full(len(X_private), -1, dtype=int)
'''

X_private = torch.tensor(X_private, dtype=torch.float32).cuda()
preds = model(X_private)
preds = (preds >= 0.5).float()
preds = preds.cpu().numpy().reshape(-1)

submission = pd.DataFrame({'Label': preds})
submission.to_csv('assignment_5.csv', index=True, index_label='Id')

Shape of X_public: (30000, 58)
Shape of y_public: (30000,)
epoch:  0  accuracy:  0.4674  loss:  0.696997344493866
epoch:  1  accuracy:  0.4674  loss:  0.6964302062988281
epoch:  2  accuracy:  0.4674  loss:  0.6958906054496765
epoch:  3  accuracy:  0.4674  loss:  0.6953808665275574
epoch:  4  accuracy:  0.4674  loss:  0.6949030160903931
epoch:  5  accuracy:  0.4674  loss:  0.6944543123245239
epoch:  6  accuracy:  0.4674  loss:  0.6940342783927917
epoch:  7  accuracy:  0.46773333333333333  loss:  0.6936423182487488
epoch:  8  accuracy:  0.4686  loss:  0.6932791471481323
epoch:  9  accuracy:  0.5244  loss:  0.6929445266723633
epoch:  10  accuracy:  0.5326  loss:  0.6926376223564148
epoch:  11  accuracy:  0.5326  loss:  0.6923587322235107
epoch:  12  accuracy:  0.5326  loss:  0.6921088695526123
epoch:  13  accuracy:  0.5326  loss:  0.6918883919715881
epoch:  14  accuracy:  0.5326  loss:  0.6916983127593994
epoch:  15  accuracy:  0.5326  loss:  0.6915387511253357
epoch:  16  accuracy:  0.53