In [1]:
import numpy as np
import pandas as pd
import sklearn

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score

import torch
from torch import Tensor

In [2]:
alldata = pd.read_csv('vehicle.csv', header=0, sep=',')

In [3]:
def split(data):
    split_point = int(0.8 * len(data))
    data_train = data[:split_point]
    data_test = data[split_point:]
    print("Length of whole dataset: " + str(len(data)))
    print("Length of train dataset: " + str(len(data_train)))
    print("Length of test dataset: " + str(len(data_test)))
    return data_train, data_test

In [4]:
def evaluation(y_expected, y_predicted):
    precision, recall, fscore, support = precision_recall_fscore_support(y_expected, y_predicted, average="weighted")
    accuracy = accuracy_score(y_expected, y_predicted)
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F-score: {fscore}")

In [5]:
data_train, data_test = split(alldata)

Length of whole dataset: 846
Length of train dataset: 676
Length of test dataset: 170


In [6]:
y_train = pd.DataFrame(data_train['Class'])
x_train = pd.DataFrame(data_train.iloc[:,:-1])
scaler = StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)

x_test = pd.DataFrame(data_test.iloc[:,:-1])
x_test = scaler.transform(x_test)
y_expected = pd.DataFrame(data_test['Class'])

## Logistic Regression

In [7]:
model_logreg = LogisticRegression(solver = "liblinear", max_iter=1000,  penalty = 'l1') 
model_logreg.fit(x_train, y_train.values.ravel())

y_predicted_logreg = model_logreg.predict(x_test) 

evaluation(y_expected, y_predicted_logreg)

Accuracy: 0.8352941176470589
Precision: 0.8322901539864602
Recall: 0.8352941176470589
F-score: 0.8315815818145278


## C-Support Vector Classification.

In [8]:
model_svc = SVC(gamma='auto').fit(x_train, y_train.values.ravel())
y_predicted_v3 = model_svc.predict(x_test)

evaluation(y_expected, y_predicted_v3)

Accuracy: 0.8176470588235294
Precision: 0.8117886456908344
Recall: 0.8176470588235294
F-score: 0.813678535845978


## Pytorch model

In [9]:
X_numpy = alldata.drop("Class", axis=1).values
X_numpy = scaler.transform(X_numpy)
target_map = {
    val: index for index, val in enumerate(alldata.Class.unique())
}
y_numpy = alldata.Class.map(target_map).values
X = torch.tensor(X_numpy, dtype=torch.float32)
y = torch.tensor(y_numpy)

target_map



{'van': 0, 'saab': 1, 'bus': 2, 'opel': 3}

In [10]:
def one_hot_encode(vector):
    n_classes = len(vector.unique())
    one_hot = torch.zeros((vector.shape[0], n_classes))\
        .type(torch.LongTensor)
    return one_hot\
        .scatter(1, vector.type(torch.LongTensor).unsqueeze(1), 1)

y_one_hot = one_hot_encode(y)

In [11]:
random_indices = torch.randperm(X.shape[0])
print(X.shape[0])
n_train = int(0.8 * X.shape[0])
X_train = X[random_indices[:n_train]]
y_train = y[random_indices[:n_train]]
y_train_one_hot = y_one_hot[random_indices[:n_train]]

X_test = X[random_indices[n_train:]]
y_test = y[random_indices[n_train:]]
y_test_one_hot = y_one_hot[random_indices[n_train:]]

846


In [12]:
model = torch.nn.Sequential(
    torch.nn.Linear(18, 4)
)

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
loss_function = torch.nn.CrossEntropyLoss()

In [13]:
n_iterations = 1000
for i in range(1, n_iterations + 1):
    Z = model(X_train)  
    loss = loss_function(Z, y_train)  
    optimizer.zero_grad() 
    loss.backward()
    optimizer.step() 
    
    if i == 1 or i % 100 == 0:
        print(f"Loss at iteration {i}: {loss}")

test_predictions = torch.argmax(
    torch.softmax(model(X_test), 1), axis=1  # 6
)
test_accuracy = float(sum(test_predictions == y_test)) / y_test.shape[0]
print(f"\nFinal Test Accuracy: {test_accuracy}")

Loss at iteration 1: 1.4181666374206543
Loss at iteration 100: 0.878308892250061
Loss at iteration 200: 0.754825234413147
Loss at iteration 300: 0.6911817789077759
Loss at iteration 400: 0.650195837020874
Loss at iteration 500: 0.6206653118133545
Loss at iteration 600: 0.5979087352752686
Loss at iteration 700: 0.5795814990997314
Loss at iteration 800: 0.5643594264984131
Loss at iteration 900: 0.5514271259307861
Loss at iteration 1000: 0.5402486324310303

Final Test Accuracy: 0.7764705882352941


In [14]:
evaluation(y_test, test_predictions)

Accuracy: 0.7764705882352941
Precision: 0.7744583847525024
Recall: 0.7764705882352941
F-score: 0.775284264857157
