In [1]:
import torch
from torch import nn
from sklearn.model_selection import train_test_split

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
import csv

In [4]:
data = []
atributes = []
labels = []

with open('testData.csv', 'r', newline='') as file:
    reader = csv.reader(file)
    for index, line in enumerate(reader):
        if index == 0:
            atributes = line
        else:
            if (line[0] not in labels):
                labels.append(line[0])
            data.append(line)


In [5]:
data_types = []

def determine_type(value):
    try:
        float(value)
        return 'Float'
    except ValueError:
        return 'Nominal'

In [6]:
for value in data[0]:
    data_types.append(determine_type(value))

In [7]:
for index, type in enumerate(data_types):
    if (type == 'Nominal'):
        unique_values = list(set(row[index] for row in data))
        for row in data:
            row[index] = str(unique_values.index(row[index]))


In [8]:
y = []
X = []
for line in data:
    y.append(labels.index(line[0]))
    X.append([float(s) for s in line[1:]])

In [9]:
y = torch.tensor(y)
X = torch.tensor(X)

In [10]:
RANDOM_SEED = 42

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

In [11]:
X_tr = X_tr.to(device)
X_te = X_te.to(device)
y_tr = y_tr.to(device)
y_te = y_te.to(device)

In [17]:
X_tr[0]

tensor([ 1.0000,  0.0000, 58.0000,  0.0000,  0.0000, 26.5500,  3.0000],
       device='cuda:0')

In [21]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Linear(in_features=len(X_tr[0]), out_features=10),
            nn.BatchNorm1d(10),
            nn.ReLU(),
            nn.Dropout(p=0.05),
            nn.Linear(in_features=10, out_features=len(labels)),
        )

    def forward(self, x):
        return self.layer_stack(x)


In [22]:
model = Model().to(device)
model

Model(
  (layer_stack): Sequential(
    (0): Linear(in_features=7, out_features=10, bias=True)
    (1): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.05, inplace=False)
    (4): Linear(in_features=10, out_features=2, bias=True)
  )
)

In [23]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [24]:
def accuracy_fn(y_pred, y_target):
    correct = torch.eq(y_pred, y_target).sum().item()

    return (correct/len(y_pred))*100

In [25]:
torch.manual_seed(42)

i_count = []
train_loss_values, test_loss_values = [], []
iterations = 10


for i in range(iterations):
    model.train()

    logits = model(X_tr)
    logits_pred = torch.softmax(logits, dim=1).argmax(dim=1)

    acc = accuracy_fn(y_target=y_tr, y_pred=logits_pred)
    loss = loss_fn(logits, y_tr)
    train_loss_values.append(loss.item())

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    i_count.append(i)

    model.eval()
    with torch.inference_mode():
        test_logits = model(X_te)
        test_logits_pred = torch.softmax(test_logits, dim=1).argmax(dim=1)
        test_loss = loss_fn(test_logits, y_te)
        test_loss_values.append(test_loss.item())
        test_acc = accuracy_fn(y_target=y_te, y_pred=test_logits_pred)

    if (i) % 100 == 0:
        print(f'Iteration {i}: TRAIN LOSS: {loss:.5f} | TRAIN ACCURACY: {acc:.1f}% | TEST_LOSS: {test_loss:.5f} | TEST ACCURACY: {test_acc:.1f}%')

print(f'Iteration {i+1}: TRAIN LOSS: {loss:.5f} | TRAIN ACCURACY: {acc:.2f}% | TEST_LOSS: {test_loss:.5f} | TEST ACCURACY: {test_acc:.2f}%')

Iteration 0: TRAIN LOSS: 0.77579 | TRAIN ACCURACY: 46.2% | TEST_LOSS: 0.57226 | TEST ACCURACY: 75.0%
Iteration 10: TRAIN LOSS: 0.64443 | TRAIN ACCURACY: 61.54% | TEST_LOSS: 0.67877 | TEST ACCURACY: 75.00%
