In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Load the CSV data into a pandas DataFrame
data = pd.read_csv('all_videos_metrics.csv')

X = data.drop(columns=['Deception', 'Video'])
y = data['Deception']

In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# SVM

In [4]:
# Define the model
svm_model = SVC()

In [5]:
# Define the parameters range for random search
param_dist = {
    'C': np.logspace(-4, 4, 20),  # Regularization parameter
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],  # Types of kernels
    'gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13)),  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
    'degree': [2, 3, 4, 5],  # Degree of the polynomial kernel function
    'coef0': np.linspace(0, 10, 10)  # Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'.
}

# Create a RandomizedSearchCV object
random_search = RandomizedSearchCV(estimator=svm_model, param_distributions=param_dist, n_iter=50, cv=5, verbose=2, random_state=42, n_jobs=-1)


In [6]:
# Fit the model
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 50 candidates, totalling 250 fits


In [16]:
# Print the best parameters and the best score
print("Best parameters:", random_search.best_params_)
print("Best score:", random_search.best_score_)

# Evaluate the best model on the test set
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Test set accuracy:", accuracy)
print("Classification Report:\n", class_report)

Best parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 9, 'learning_rate': 0.12999999999999998, 'gamma': 1.2000000000000002, 'colsample_bytree': 0.8}
Best accuracy: 0.7819321400813297
Test set accuracy: 0.7842194415605704
