In [5]:
import numpy as np

data = np.loadtxt('data/feature_vectors.csv', delimiter=',')
data = data[1:, :]
print(data)

[[ 0.  0.  0. ...  0.  0.  0.]
 [ 0.  0.  0. ... 23. 16.  0.]
 [ 0.  0.  0. ...  0.  0.  0.]
 ...
 [ 0.  1.  3. ... 38. 36.  0.]
 [ 0.  0.  0. ... 78. 67.  0.]
 [ 0.  0.  0. ...  0.  0.  0.]]


In [6]:
X = data[:, :-1]
Y = data[:, -1]

In [7]:
unique_values, counts = np.unique(Y, return_counts=True)

for value, count in zip(unique_values, counts):
    print(f"{value=}: {count=}")


value=0.0: count=297
value=1.0: count=11


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=42)

train_positives = np.sum(Y_train == 1)
test_positives = np.sum(Y_test == 1)
train_negatives = np.sum(Y_train == 0)
test_negatives = np.sum(Y_test == 0)
print(f"Trainig dataset - positive: {train_positives}, negative: {train_negatives}")
print(f"Test dataset - positive: {test_positives}, negative: {test_negatives}")


Trainig dataset - positive: 9, negative: 237
Test dataset - positive: 2, negative: 60


In [10]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score

svm_classifier = SVC(kernel='rbf', random_state=42)

svm_classifier.fit(X_train, Y_train)

Y_pred = svm_classifier.predict(X_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)
f1 = f1_score(Y_test, Y_pred)

print("accuracy_score for test:", accuracy)
print("precision for test:", precision)
print("recall for test:", recall)
print("F1-score for test:", f1)


accuracy_score for test: 0.967741935483871
precision for test: 0.0
recall for test: 0.0
F1-score for test: 0.0


  _warn_prf(average, modifier, msg_start, len(result))


In [12]:

Y_train_pred = svm_classifier.predict(X_train)

# Oblicz miary dla zbioru treningowego
train_accuracy = accuracy_score(Y_train, Y_train_pred)
train_precision = precision_score(Y_train, Y_train_pred)
train_recall = recall_score(Y_train, Y_train_pred)
train_f1 = f1_score(Y_train, Y_train_pred)

print("accuracy_score for train:", train_accuracy)
print("precision for train:", train_precision)
print("recall for train:", train_recall)
print("F1-score for train:", train_f1)


accuracy_score for train: 0.975609756097561
precision for train: 1.0
recall for train: 0.3333333333333333
F1-score for train: 0.5


### With scaler

In [13]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm_classifier = SVC(kernel='rbf', random_state=42)

svm_classifier.fit(X_train_scaled, Y_train)

Y_pred = svm_classifier.predict(X_test_scaled)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)
f1 = f1_score(Y_test, Y_pred)

print("For test set:")
print("accuracy_score:", accuracy)
print("precision:", precision)
print("recall:", recall)
print("F1-score:", f1)


Y_train_pred = svm_classifier.predict(X_train_scaled)
train_accuracy = accuracy_score(Y_train, Y_train_pred)
train_precision = precision_score(Y_train, Y_train_pred)
train_recall = recall_score(Y_train, Y_train_pred)
train_f1 = f1_score(Y_train, Y_train_pred)

print("\For training set:")
print("accuracy_score:", train_accuracy)
print("precision:", train_precision)
print("recall:", train_recall)
print("F1-score:", train_f1)


For test set:
accuracy_score: 0.967741935483871
precision: 0.0
recall: 0.0
F1-score: 0.0
\For training set:
accuracy_score: 0.9796747967479674
precision: 1.0
recall: 0.4444444444444444
F1-score: 0.6153846153846153


  _warn_prf(average, modifier, msg_start, len(result))
