# Train the dataset

In [2]:
from sklearn.model_selection import train_test_split
import pandas as pd
import pickle, os

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import *

In [7]:
df = pd.read_csv('../../dataset/csv/dataset_v0.csv')
df.head(3)


Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z73,v73,x74,y74,z74,v74,x75,y75,z75,v75
0,diam,0.543607,0.154529,-0.806512,0.999885,0.549615,0.124989,-0.78971,0.999588,0.514044,...,-0.771526,0.999838,0.385645,0.659511,-0.477846,0.978621,0.6633,0.577869,-0.231719,0.906865
1,diam,0.50617,0.182203,-0.680235,0.999978,0.531654,0.153888,-0.655531,0.999925,0.477606,...,-0.696684,0.999917,0.345774,0.67079,-0.396579,0.979707,0.688755,0.615902,-0.268427,0.985749
2,diam,0.504694,0.167553,-0.766055,0.999976,0.527472,0.134284,-0.739763,0.999921,0.47508,...,-0.780847,0.99994,0.352903,0.677664,-0.461034,0.982088,0.679477,0.620663,-0.349626,0.985


In [6]:
df['class'].value_counts()

class
diam        35
celinguk    35
Name: count, dtype: int64

In [8]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1234)

print(f"Train set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

Train set size: 59
Test set size: 11


In [9]:
y_test

46    celinguk
57    celinguk
33        diam
36    celinguk
6         diam
25        diam
63    celinguk
66    celinguk
8         diam
20        diam
29        diam
Name: class, dtype: object

In [10]:
model = make_pipeline(StandardScaler(), SVC(probability=True)).fit(X_train, y_train)
y_probs = model.predict_proba(X_train)

In [11]:
version = 0
path = f'../../model/trained/'
os.makedirs(os.path.dirname(path), exist_ok=True)

with open(f'{path}/model_v{version}.pkl', 'wb') as f:
    pickle.dump(model, f)

In [13]:
y_pred = model.predict(X_train)
y_pred

array(['diam', 'diam', 'diam', 'celinguk', 'celinguk', 'diam', 'celinguk',
       'celinguk', 'celinguk', 'diam', 'celinguk', 'diam', 'celinguk',
       'diam', 'diam', 'diam', 'diam', 'diam', 'celinguk', 'diam',
       'celinguk', 'celinguk', 'diam', 'celinguk', 'diam', 'diam',
       'celinguk', 'celinguk', 'celinguk', 'diam', 'diam', 'celinguk',
       'diam', 'celinguk', 'diam', 'celinguk', 'diam', 'celinguk',
       'celinguk', 'diam', 'diam', 'diam', 'diam', 'celinguk', 'celinguk',
       'celinguk', 'celinguk', 'celinguk', 'celinguk', 'diam', 'diam',
       'celinguk', 'diam', 'celinguk', 'diam', 'diam', 'celinguk',
       'celinguk', 'celinguk'], dtype=object)

In [14]:
# Set the threshold for positive detection
threshold = 0.75

# Get the predicted probabilities for the test set
y_pred_proba = model.predict_proba(X_test)[:, 1]

# Apply the threshold to get binary predictions
y_pred = (y_pred_proba >= threshold).astype(int)

# Convert y_test to binary format (1 for positive class, 0 for negative class)
y_test_binary = (y_test == 'positive_class').astype(int)  # Replace 'positive_class' with the actual positive class label


# Generate confusion matrix
cm = confusion_matrix(y_test_binary, y_pred)
TN, FP, FN, TP = cm.ravel()

print("True Positive (TP):", TP)
print("False Positive (FP):", FP)
print("True Negative (TN):", TN)
print("False Negative (FN):", FN)

True Positive (TP): 0
False Positive (FP): 6
True Negative (TN): 5
False Negative (FN): 0
