In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score
from pycaret.classification import *
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
# mlflow.set_experiment("Baseline-Models") 

In [2]:
df = pd.read_csv('celeb_embeddings.csv')
df.sample(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,target
228,0.004839,0.037962,0.000964,-0.04296,0.073875,-0.024658,-0.0146,0.051273,0.056075,0.040022,...,-0.060652,0.03823,-0.048362,-0.094684,0.037091,0.010041,-0.040914,0.006107,0.043223,1
464,0.017879,0.104591,0.046294,-0.014512,0.054697,-0.023779,-0.010416,0.044969,0.039795,0.020536,...,-0.007118,0.019549,-0.09236,0.016107,-0.013383,0.044809,0.051787,0.009144,0.026285,1
682,-0.014951,-0.035121,-0.050525,-0.025533,-0.028107,0.033384,0.041759,0.030706,0.0502,-0.012033,...,-0.021835,0.048683,0.10116,-0.000138,0.027972,0.009944,-0.074664,-0.07379,-0.01191,0


In [3]:
# Separate the features and the target
X = df.drop('target', axis=1)
y = df['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [4]:
def generate_model_metrics(y_true, y_pred):

    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='binary')  # Assuming binary classification
    recall = recall_score(y_true, y_pred, average='binary')  # Assuming binary classification
    f1 = f1_score(y_true, y_pred, average='binary')  # Assuming binary classification
    cm = confusion_matrix(y_true, y_pred)

    return accuracy, precision, recall, f1, cm

In [7]:
file_path = 'celeb_embeddings.csv'
data = pd.read_csv(file_path)

# # load dataset
# from pycaret.datasets import get_data
# classifier = get_data(data)
 
# init setup
from pycaret.classification import *
clf1 = setup(data = data, target = 'target')
 
# compare models
best = compare_models(sort = 'F1')

## Testing all classifiers models from PyCaret 

In [6]:
# Here we test our dataset with all classifiers models from PyCaret
# Initialize PyCaret environment
classifier_test = setup(data = df, target = 'target') # session_id=123 allow to reproduce the same results

# Compare all (classifiers) models in PyCaret
models = compare_models()

# Select best model
best_model = automl(optimize = 'Accuracy')

# Fine tuning the best model
tuned_best_model = tune_model(best_model)

# Predict class labels
predictions = predict_model(tuned_best_model, data = df)


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)


Fitting 10 folds for each of 10 candidates, totalling 100 fits


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Extra Trees Classifier,0.9918,0.9998,0.991,0.9928,0.9919,0.9835,0.9835


In [14]:
# Save the model
save_model(tuned_best_model, 'tuned_best_model')

# Load the model
loaded_model = load_model('tuned_best_model')

# Predict using the loaded model
predictions_loaded_model = predict_model(loaded_model, data = df)

Transformation Pipeline and Model Successfully Saved
Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Extra Trees Classifier,0.9918,0.9998,0.991,0.9928,0.9919,0.9835,0.9835


## Testing SVM model

In [7]:
from sklearn.svm import SVC

# Create a SVM Classifier
svm_model = SVC(kernel='linear') # Linear Kernel

# Train the model using the training sets
svm_model.fit(X_train, y_train)

# Predict the response for test dataset
svm_predictions = svm_model.predict(X_test)


In [8]:
svm_accuracy, svm_precision, svm_recall, svm_f1, svm_cm = generate_model_metrics(y_test, svm_predictions)

In [13]:
print("Model\t\t\tAccuracy\t\tPrecision\t\tRecall\t\t\tF1 Score")
print(f"SVM Model\t\t{svm_accuracy}\t{svm_precision}\t{svm_recall}\t{svm_f1}")


Model			Accuracy		Precision		Recall			F1 Score
SVM Model		0.9272727272727272	0.9245283018867925	0.9245283018867925	0.9245283018867925
