In [11]:
import numpy as np
import torch

# read data
data = []
file_path = './datasets/preprocess_dataset.txt'
with open(file_path, 'r') as file:
    for line in file:
        line = line.strip().split()
        label = int(line[0])  # extract label
        features = [float(item.split(":")[1]) for item in line[1:]]  # extract features
        if len(features) == 8:  #  filter useless data(with 7 features less or more)
            data.append([label] + features)

data = np.array(data)
X = data[:, 1:]  
y = data[:, 0]

# change label from -1, 1 to 0,1
y = np.where(y == -1, 0, 1)

print(X.shape,y.shape)
print(X[:5])
print(y[:5]) # check label 0,1

(759, 8) (759,)
[[-0.294118    0.487437    0.180328   -0.292929   -1.          0.00149028
  -0.53117    -0.0333333 ]
 [-0.882353   -0.145729    0.0819672  -0.414141   -1.         -0.207153
  -0.766866   -0.666667  ]
 [-0.0588235   0.839196    0.0491803  -1.         -1.         -0.305514
  -0.492741   -0.633333  ]
 [-0.882353   -0.105528    0.0819672  -0.535354   -0.777778   -0.162444
  -0.923997   -1.        ]
 [-1.          0.376884   -0.344262   -0.292929   -0.602837    0.28465
   0.887276   -0.6       ]]
[0 1 0 1 0]


In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

# define perceptron model
class Perceptron(nn.Module):
    def __init__(self, input_size):
        super(Perceptron, self).__init__()
        self.fc = nn.Linear(input_size, 1)  # fully connected output single value

    def forward(self, x):
        return torch.sigmoid(self.fc(x))  # sigmoid activate funciton

# parameters
input_size = X.shape[1]  
num_epochs = 100  # training epoch
learning_rate = 0.01  # learning rate

# convert to tensor
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)  # column vector

# split sets
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# define loss function and optimizer
model = Perceptron(input_size)
criterion = nn.BCELoss()  # use binary cross entropy loss function
optimizer = optim.SGD(model.parameters(), lr=learning_rate)  # stochastic gradient descent

# train
for epoch in range(num_epochs):
    model.train()  
    optimizer.zero_grad()  # clear gradient
    outputs = model(X_train)  # forward propagation
    loss = criterion(outputs, y_train)  # calculate loss
    loss.backward()  # backward propagation
    optimizer.step()  # update weights

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# predict
model.eval()  
with torch.no_grad():
    y_pred = model(X_test) 
    y_pred_classes = (y_pred > 0.5).float()  # convert to binary class labels

# accuracy
accuracy = (y_pred_classes.eq(y_test).sum() / float(y_test.size(0))).item()
print(f'Test sets accuracy: {accuracy:.4f}')


Epoch [10/100], Loss: 0.7314
Epoch [20/100], Loss: 0.7132
Epoch [30/100], Loss: 0.6976
Epoch [40/100], Loss: 0.6842
Epoch [50/100], Loss: 0.6728
Epoch [60/100], Loss: 0.6629
Epoch [70/100], Loss: 0.6545
Epoch [80/100], Loss: 0.6472
Epoch [90/100], Loss: 0.6410
Epoch [100/100], Loss: 0.6355
Test sets accuracy: 0.7303
