# Train the model

In [15]:
from sklearn.model_selection import train_test_split
import pandas as pd
import pickle, os

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import *

In [7]:
df = pd.read_csv('../dataset/csv/finale_coords.csv')

In [8]:
df.head(3)

Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,z43,v43,x44,y44,z44,v44,x45,y45,z45,v45
0,Celinguk,0.798976,0.749394,-2.025858,0.992949,0.816979,0.671713,-1.966166,0.988409,0.734435,...,-1.78471,0.996934,0.444733,0.640802,-1.864306,0.995479,0.34341,0.628823,-1.754694,0.99665
1,Celinguk,0.630763,0.715939,-1.670721,0.998392,0.699276,0.641763,-1.630399,0.998039,0.563152,...,-1.723995,0.997969,0.469674,0.65657,-1.730082,0.997713,0.375618,0.648416,-1.680366,0.997022
2,Celinguk,0.645962,0.681424,-1.684919,0.999164,0.680811,0.612878,-1.617121,0.998941,0.569681,...,-1.845601,0.996398,0.808877,0.621602,-1.753377,0.994095,0.715321,0.616099,-1.838073,0.995596


In [9]:
df['class'].value_counts()

class
Terdiam     11
Celinguk    10
Name: count, dtype: int64

In [12]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1234)

print(f"Train set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

Train set size: 17
Test set size: 4


In [13]:
y_test

3     Celinguk
13     Terdiam
2     Celinguk
16     Terdiam
Name: class, dtype: object

In [17]:
model = make_pipeline(StandardScaler(), SVC(probability=True)).fit(X_train, y_train)
y_probs = model.predict_proba(X_train)

In [18]:
version = 1
path = f'../Model/'
os.makedirs(os.path.dirname(path), exist_ok=True)

with open(f'{path}/v{version}_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [19]:
y_pred = model.predict(X_train)

In [20]:
y_pred

array(['Terdiam', 'Celinguk', 'Celinguk', 'Celinguk', 'Celinguk',
       'Terdiam', 'Terdiam', 'Terdiam', 'Terdiam', 'Celinguk', 'Celinguk',
       'Celinguk', 'Terdiam', 'Terdiam', 'Celinguk', 'Terdiam', 'Terdiam'],
      dtype=object)

In [21]:
# Set the threshold for positive detection
threshold = 0.75

# Get the predicted probabilities for the test set
y_pred_proba = model.predict_proba(X_test)[:, 1]

# Apply the threshold to get binary predictions
y_pred = (y_pred_proba >= threshold).astype(int)

# Convert y_test to binary format (1 for positive class, 0 for negative class)
y_test_binary = (y_test == 'positive_class').astype(int)  # Replace 'positive_class' with the actual positive class label


# Generate confusion matrix
cm = confusion_matrix(y_test_binary, y_pred)
TN, FP, FN, TP = cm.ravel()

print("True Positive (TP):", TP)
print("False Positive (FP):", FP)
print("True Negative (TN):", TN)
print("False Negative (FN):", FN)

True Positive (TP): 0
False Positive (FP): 2
True Negative (TN): 2
False Negative (FN): 0
