In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

# Load the extracted audio features from CSV
data = pd.read_csv('DATASET-balanced.csv')

# Assuming your CSV has columns like 'feature_1', 'feature_2', ..., 'label'
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Encode the labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the input data for TDNN (assuming time dimension is the second axis)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Define the first model (TDNN)
model1 = Sequential([
    layers.Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(128, kernel_size=3, activation='relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

# Define the second model (RNN)
model2 = Sequential([
    layers.SimpleRNN(64, activation='relu', input_shape=(X_train.shape[1], 1)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

# Define the third model (CNN)
model3 = Sequential([
    layers.Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(128, kernel_size=3, activation='relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

# Compile each model
models = [model1, model2, model3]

for model in models:
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train each model
for model in models:
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Make predictions using each model
predictions = np.zeros((len(X_test), len(models)))

for i, model in enumerate(models):
    predictions[:, i] = model.predict(X_test).flatten()

# Average the predictions
average_predictions = np.mean(predictions, axis=1)
ensemble_predictions = (average_predictions > 0.5).astype(int)

# Evaluate the model on the test set
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)

# Convert back to original labels
y_pred = label_encoder.inverse_transform(y_pred.flatten())
y_test = label_encoder.inverse_transform(y_test)


# Convert ensemble predictions to original labels
ensemble_predictions_original = label_encoder.inverse_transform(ensemble_predictions)

# Calculate accuracy
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions_original)
print(f'Ensemble Accuracy: {ensemble_accuracy}')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Ensemble Accuracy: 0.9605263157894737


In [None]:
from sklearn.metrics import precision_score,recall_score,f1_score

precision = precision_score(y_test, y_pred, average='binary', pos_label='FAKE')
recall = recall_score(y_test, y_pred, average='binary', pos_label='FAKE')
f1 = f1_score(y_test, y_pred, average='binary', pos_label='FAKE')

print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

Precision: 0.8597756410256411
Recall: 0.8994132439228835
F1 Score: 0.8791478902089307
