In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix


In [2]:
data = pd.read_csv('LVQ_data.csv')
data.head()


Unnamed: 0,ParticipantID,TimeTaken,NumberOfAttempts,IPRegion,CodeSimilarity,NumberOfRequests,IsCheater
0,1,34.967142,1,2,52.225452,2,0
1,2,28.617357,2,4,64.456836,1,0
2,3,36.476885,1,4,72.564259,3,0
3,4,45.230299,7,4,55.849152,2,0
4,5,27.658466,6,4,72.663373,1,0


In [3]:
data = data.drop(columns=['ParticipantID'])
scaler = StandardScaler()
X = scaler.fit_transform(data.drop(columns=['IsCheater']))
y = data['IsCheater'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [4]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.int64)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.int64)

In [None]:
class LVQ(nn.Module):
    def __init__(self, input_dim, n_classes, n_prototypes_per_class=2, lr=0.1):
        super(LVQ, self).__init__()
        self.n_classes = n_classes
        self.n_prototypes_per_class = n_prototypes_per_class
        self.prototypes = nn.Parameter(torch.rand(n_classes * n_prototypes_per_class, input_dim))
        self.lr = lr

    def forward(self, x):
        dists = torch.cdist(x, self.prototypes)
        closest_prototype = torch.argmin(dists, dim=1)
        return closest_prototype

    def update_prototypes(self, x, y):
        dists = torch.cdist(x, self.prototypes)
        closest_prototype = torch.argmin(dists, dim=1)

        with torch.no_grad():
            for i, sample in enumerate(x):
                prototype_index = closest_prototype[i]
                prototype_class = prototype_index // self.n_prototypes_per_class

                if prototype_class == y[i]:
                    new_value = self.prototypes[prototype_index] + self.lr * (sample - self.prototypes[prototype_index])
                else:
                    new_value = self.prototypes[prototype_index] - self.lr * (sample - self.prototypes[prototype_index])

                self.prototypes[prototype_index] = new_value


In [8]:
input_dim = X_train.shape[1]
n_classes = len(torch.unique(y_train))
model = LVQ(input_dim=input_dim, n_classes=n_classes, n_prototypes_per_class=2, lr=0.1)
n_epochs = 20
for epoch in range(n_epochs):
    model.update_prototypes(X_train, y_train)

In [9]:
with torch.no_grad():
    pred_train = model(X_train)
    pred_test = model(X_test)
train_acc = accuracy_score(y_train, pred_train)
test_acc = accuracy_score(y_test, pred_test)
print(f"Training Accuracy: {train_acc}")
print(f"Test Accuracy: {test_acc}")
conf_matrix = confusion_matrix(y_test, pred_test)
print("Confusion Matrix:\n", conf_matrix)

Training Accuracy: 0.4142857142857143
Test Accuracy: 0.38
Confusion Matrix:
 [[101 174]
 [ 12  13]]
