# Train the model

In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import pickle, os

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import *

In [2]:
df = pd.read_csv('dataset/csv/ds_faced_v1.csv')

In [3]:
df.head(3)

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,v73,x74,y74,z74,v74,x75,y75,z75,v75,face_d15
0,CelingakCelinguk,0.607824,0.675347,-1.774675,0.995053,0.677087,0.639612,-0.936251,0.992441,0.435848,...,0.991024,0.212395,1.56619,-0.976496,0.022729,0.212395,1.56619,-0.976496,0.022729,2.0
1,CelingakCelinguk,0.697874,0.692874,-1.678397,0.996192,0.69459,0.639218,-0.872528,0.992959,0.500863,...,0.991732,0.230934,1.657102,-0.560761,0.039072,0.230934,1.657102,-0.560761,0.039072,0.0
2,CelingakCelinguk,0.516435,0.684887,-1.572658,0.99531,0.682988,0.643252,-0.913507,0.994711,0.449533,...,0.996217,0.274223,1.540875,-1.733695,0.037122,0.274223,1.540875,-1.733695,0.037122,2.0


In [4]:
df['class'].value_counts()

class
CelingakCelinguk    30
Diam                30
NutupMuka           30
Name: count, dtype: int64

In [5]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1234)

print(f"Train set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

Train set size: 76
Test set size: 14


In [6]:
y_test

36                Diam
59                Diam
65           NutupMuka
45                Diam
60           NutupMuka
84           NutupMuka
72           NutupMuka
68           NutupMuka
9     CelingakCelinguk
74           NutupMuka
29    CelingakCelinguk
82           NutupMuka
44                Diam
10    CelingakCelinguk
Name: class, dtype: object

In [7]:
model = make_pipeline(StandardScaler(), SVC(probability=True)).fit(X_train, y_train)
y_probs = model.predict_proba(X_train)

In [12]:
version = 3
path = f'../Model/'
os.makedirs(os.path.dirname(path), exist_ok=True)

with open(f'{path}/v{version}_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [9]:
y_pred = model.predict(X_train)

In [10]:
y_pred

array(['Diam', 'Diam', 'NutupMuka', 'Diam', 'CelingakCelinguk', 'Diam',
       'NutupMuka', 'CelingakCelinguk', 'NutupMuka', 'Diam', 'NutupMuka',
       'Diam', 'NutupMuka', 'Diam', 'CelingakCelinguk', 'Diam', 'Diam',
       'NutupMuka', 'NutupMuka', 'NutupMuka', 'CelingakCelinguk',
       'CelingakCelinguk', 'CelingakCelinguk', 'CelingakCelinguk',
       'CelingakCelinguk', 'CelingakCelinguk', 'Diam', 'NutupMuka',
       'Diam', 'CelingakCelinguk', 'NutupMuka', 'Diam',
       'CelingakCelinguk', 'NutupMuka', 'CelingakCelinguk',
       'CelingakCelinguk', 'Diam', 'Diam', 'Diam', 'CelingakCelinguk',
       'NutupMuka', 'CelingakCelinguk', 'CelingakCelinguk', 'NutupMuka',
       'Diam', 'NutupMuka', 'CelingakCelinguk', 'CelingakCelinguk',
       'Diam', 'CelingakCelinguk', 'CelingakCelinguk', 'CelingakCelinguk',
       'CelingakCelinguk', 'NutupMuka', 'NutupMuka', 'Diam', 'Diam',
       'Diam', 'NutupMuka', 'NutupMuka', 'NutupMuka', 'Diam', 'NutupMuka',
       'NutupMuka', 'Diam', 'Diam'

In [11]:
# Set the threshold for positive detection
threshold = 0.75

# Get the predicted probabilities for the test set
y_pred_proba = model.predict_proba(X_test)[:, 1]

# Apply the threshold to get binary predictions
y_pred = (y_pred_proba >= threshold).astype(int)

# Convert y_test to binary format (1 for positive class, 0 for negative class)
y_test_binary = (y_test == 'positive_class').astype(int)  # Replace 'positive_class' with the actual positive class label


# Generate confusion matrix
cm = confusion_matrix(y_test_binary, y_pred)
TN, FP, FN, TP = cm.ravel()

print("True Positive (TP):", TP)
print("False Positive (FP):", FP)
print("True Negative (TN):", TN)
print("False Negative (FN):", FN)

True Positive (TP): 0
False Positive (FP): 2
True Negative (TN): 12
False Negative (FN): 0
