In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [2]:
# Load the CSV data into a pandas DataFrame
data = pd.read_csv('all_videos_metrics.csv')

X = data.drop(columns=['Deception', 'Video'])
y = data['Deception']

In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Recurrent Neural Network

In [4]:
# Function to create the RNN model, required for KerasClassifier
def create_rnn_model(units=50, activation='relu', optimizer='adam', dropout_rate=0.2):
    model = Sequential([
        LSTM(units, input_shape=(X_train.shape[1], X_train.shape[2]), activation=activation, return_sequences=True),
        Dropout(dropout_rate),
        LSTM(units, activation=activation),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [None]:
# Wrap Keras model so it can be used by scikit-learn
rnn_model = KerasClassifier(build_fn=create_rnn_model, epochs=100, batch_size=32, verbose=0)

In [5]:
# Define the parameters range for random search
param_dist = {
    'units': [50, 100, 150],
    'activation': ['relu', 'tanh'],
    'optimizer': ['adam', 'rmsprop'],
    'dropout_rate': [0.1, 0.2, 0.3],
    'epochs': [50, 100],
    'batch_size': [16, 32, 64]
}

# Create a RandomizedSearchCV object
random_search = RandomizedSearchCV(estimator=rnn_model, param_distributions=param_dist, n_iter=10, cv=3, verbose=2, random_state=42)


In [6]:
# Fit the model
random_search.fit(X_train, y_train)

Fitting 5 folds for each of 50 candidates, totalling 250 fits


In [16]:
# Print the best parameters and the best score
print("Best parameters:", random_search.best_params_)
print("Best score:", random_search.best_score_)

# Evaluate the best model on the test set
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Test set accuracy:", accuracy)
print("Classification Report:\n", class_report)

Best parameters: {'subsample': 0.7, 'n_estimators': 500, 'max_depth': 9, 'learning_rate': 0.12999999999999998, 'gamma': 1.2000000000000002, 'colsample_bytree': 0.8}
Best accuracy: 0.7819321400813297
Test set accuracy: 0.7842194415605704
