In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import classification_report, confusion_matrix, precision_score

In [2]:
df = pd.read_csv('celeb_embeddings.csv')
df.sample(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,503,504,505,506,507,508,509,510,511,target
331,-0.014808,0.033748,-0.062553,0.043534,0.044709,-0.011139,-0.016997,0.143988,0.009451,0.030882,...,-0.018095,0.032704,-0.031931,-0.004914,0.048361,0.027371,0.027022,0.014785,-0.01225,1
295,0.035514,0.02536,0.010652,0.01768,0.050758,-0.010559,0.031325,0.025042,0.03453,0.068032,...,-0.072164,0.019291,-0.057132,-0.095795,0.078904,0.02708,-0.017309,0.025224,0.023855,1
662,0.002962,0.041258,-0.031453,-0.012893,-0.000747,-0.002275,-0.002039,0.008099,0.018446,0.019329,...,-0.004031,0.121651,0.010412,0.000583,-0.002722,0.020523,0.051351,0.062593,-0.032006,0


In [3]:
# Separate the features and the target
X = df.drop('target', axis=1)
y = df['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [4]:
def print_model_metrics(y_true, y_pred):
    # Print a classification report
    print(classification_report(y_true, y_pred))

    # Print confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    print("### confusion matrix ###")
    print(cm)
    print("")

    # Print precision
    precision = precision_score(y_true, y_pred, average='binary')  # Assuming binary classification
    print("### Precision ###")
    print(round(precision, 4))

In [5]:
# Create and train a KNN classifier
clf = KNeighborsClassifier(n_neighbors=3, weights='uniform', metric='euclidean')
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Print the model metrics
print_model_metrics(y_test, y_pred)

              precision    recall  f1-score   support

           0       1.00      0.81      0.89        57
           1       0.83      1.00      0.91        53

    accuracy                           0.90       110
   macro avg       0.91      0.90      0.90       110
weighted avg       0.92      0.90      0.90       110

### confusion matrix ###
[[46 11]
 [ 0 53]]

### Precision ###
0.8281


In [6]:
# Create and train a SVM classifier
clf = SVC(C=1.0, kernel='rbf', gamma='scale', random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Print the model metrics
print_model_metrics(y_test, y_pred)

              precision    recall  f1-score   support

           0       0.98      1.00      0.99        57
           1       1.00      0.98      0.99        53

    accuracy                           0.99       110
   macro avg       0.99      0.99      0.99       110
weighted avg       0.99      0.99      0.99       110

### confusion matrix ###
[[57  0]
 [ 1 52]]

### Precision ###
1.0


In [7]:
# Create and train a Decision Tree classifier
clf = DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None)
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Print the model metrics
print_model_metrics(y_test, y_pred)

              precision    recall  f1-score   support

           0       0.88      0.88      0.88        57
           1       0.87      0.87      0.87        53

    accuracy                           0.87       110
   macro avg       0.87      0.87      0.87       110
weighted avg       0.87      0.87      0.87       110

### confusion matrix ###
[[50  7]
 [ 7 46]]

### Precision ###
0.8679


In [8]:
# Create and train a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None)
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Print the model metrics
print_model_metrics(y_test, y_pred)

              precision    recall  f1-score   support

           0       0.95      0.98      0.97        57
           1       0.98      0.94      0.96        53

    accuracy                           0.96       110
   macro avg       0.96      0.96      0.96       110
weighted avg       0.96      0.96      0.96       110

### confusion matrix ###
[[56  1]
 [ 3 50]]

### Precision ###
0.9804


In [9]:
# Create and train a MLP classifier
clf = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam')
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Print the model metrics
print_model_metrics(y_test, y_pred)

              precision    recall  f1-score   support

           0       0.97      0.98      0.97        57
           1       0.98      0.96      0.97        53

    accuracy                           0.97       110
   macro avg       0.97      0.97      0.97       110
weighted avg       0.97      0.97      0.97       110

### confusion matrix ###
[[56  1]
 [ 2 51]]

### Precision ###
0.9808


In [10]:
# Create and train AdaBoost classifier
clf = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, learning_rate=1.0)
clf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = clf.predict(X_test)

# Print the model metrics
print_model_metrics(y_test, y_pred)

              precision    recall  f1-score   support

           0       0.95      0.96      0.96        57
           1       0.96      0.94      0.95        53

    accuracy                           0.95       110
   macro avg       0.95      0.95      0.95       110
weighted avg       0.95      0.95      0.95       110

### confusion matrix ###
[[55  2]
 [ 3 50]]

### Precision ###
0.9615
