In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score

# Define paths to your CSV datasets
dataset1_path = "DATASET-balanced.csv"
dataset2_path = "DATASET-LA.csv"

# Read data from CSV files
df1 = pd.read_csv(dataset1_path)
df2 = pd.read_csv(dataset2_path)
# Define desired size for datasets
target_size = 1000 

# Stratified sampling to maintain class balance
smaller_df = df1.sample(target_size, random_state=42) 
larger_df = df2.sample(target_size, random_state=42)

# Combine features and labels from both datasets
features1 = smaller_df.drop("Classname", axis=1)
features2 = larger_df.drop("Classname", axis=1)
labels1 = smaller_df["Classname"]
labels2 =larger_df["Classname"]


# Split data into training and testing sets
X_train1, X_test1, y_train1, y_test1 = train_test_split(features1, labels1, test_size=0.2)
X_train2, X_test2, y_train2, y_test2 = train_test_split(features2, labels2, test_size=0.2)


In [12]:

# Define two machine learning models 
model1 = RandomForestClassifier()
model2 = RandomForestClassifier()

# Encode labels using LabelEncoder
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train1_encoded = le.fit_transform(y_train1)
y_train2_encoded = le.fit_transform(y_train2)

# Train models with encoded labels
model1.fit(X_train1, y_train1_encoded)
model2.fit(X_train2, y_train2_encoded)

estimators = [
    ("model1", model1),
    ("model2", model2),
]

voting_classifier = VotingClassifier(estimators=estimators, voting="soft")

# Create a voting classifier ensemble

voting_classifier.fit(X_train1, y_train1)

# Make predictions on the test set using the ensemble
voting_predictions = voting_classifier.predict(X_test1)

# Evaluate the accuracy of the ensemble model
accuracy = accuracy_score(y_test1, voting_predictions)
print(f"Ensemble accuracy: {accuracy:.4f}")

# Create a voting classifier ensemble
voting_classifier2 = VotingClassifier(estimators=estimators, voting="soft")  # Choose voting strategy

voting_classifier2.fit(X_train2, y_train2)
# Make predictions on the test set using the ensemble
voting_predictions2 = voting_classifier2.predict(X_test2)

# Evaluate the accuracy of the ensemble model
accuracy2 = accuracy_score(y_test2, voting_predictions2)
print(f"Ensemble accuracy2: {accuracy2:.4f}")


Ensemble accuracy: 0.9550
Ensemble accuracy2: 0.8950


In [13]:
from sklearn.metrics import precision_score,recall_score

precision1 = precision_score(y_test1, voting_predictions, average='binary', pos_label='FAKE')
recall1 = recall_score(y_test1, voting_predictions, average='binary', pos_label='FAKE')

print(f'Precision1: {precision1:.4f}')
print(f'Recall1: {recall1}')


precision2 = precision_score(y_test2, voting_predictions2, average='binary', pos_label='spoof')
recall2 = recall_score(y_test2, voting_predictions2, average='binary', pos_label='spoof')

print(f'Precision2: {precision2}')
print(f'Recall2: {recall2}')

Precision1: 0.9101
Recall1: 0.9878048780487805
Precision2: 0.8994974874371859
Recall2: 0.9944444444444445


Both Random Forest : 93%

In [14]:

# Define two machine learning models 
from sklearn.ensemble import HistGradientBoostingClassifier


model1 = RandomForestClassifier()
model2 = HistGradientBoostingClassifier()


# Encode labels using LabelEncoder
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train1_encoded = le.fit_transform(y_train1)
y_train2_encoded = le.fit_transform(y_train2)

# Train models with encoded labels
model1.fit(X_train1, y_train1_encoded)
model2.fit(X_train2, y_train2_encoded)

estimators = [
    ("model1", model1),
    ("model2", model2),
]

voting_classifier = VotingClassifier(estimators=estimators, voting="soft")

# Create a voting classifier ensemble

voting_classifier.fit(X_train1, y_train1)

# Make predictions on the test set using the ensemble
voting_predictions = voting_classifier.predict(X_test1)

# Evaluate the accuracy of the ensemble model
accuracy = accuracy_score(y_test1, voting_predictions)
print(f"Ensemble accuracy: {accuracy:.4f}")

# Create a voting classifier ensemble
voting_classifier2 = VotingClassifier(estimators=estimators, voting="soft")  # Choose voting strategy

voting_classifier2.fit(X_train2, y_train2)
# Make predictions on the test set using the ensemble
voting_predictions2 = voting_classifier2.predict(X_test2)

# Evaluate the accuracy of the ensemble model
accuracy2 = accuracy_score(y_test2, voting_predictions2)
print(f"Ensemble accuracy2: {accuracy2:.4f}")


Ensemble accuracy: 0.9750
Ensemble accuracy2: 0.8950


In [15]:
from sklearn.metrics import precision_score,recall_score

precision1 = precision_score(y_test1, voting_predictions, average='binary', pos_label='FAKE')
recall1 = recall_score(y_test1, voting_predictions, average='binary', pos_label='FAKE')

print(f'Precision1: {precision1:.4f}')
print(f'Recall1: {recall1}')


precision2 = precision_score(y_test2, voting_predictions2, average='binary', pos_label='spoof')
recall2 = recall_score(y_test2, voting_predictions2, average='binary', pos_label='spoof')

print(f'Precision2: {precision2}')
print(f'Recall2: {recall2}')

Precision1: 0.9529
Recall1: 0.9878048780487805
Precision2: 0.8994974874371859
Recall2: 0.9944444444444445


RandomForest + HistGradientBoostingClassifier : 95.50%

In [16]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import HistGradientBoostingClassifier

model1 = RandomForestClassifier()
model2 = HistGradientBoostingClassifier()
model3 = SVC(probability=True)
model4 = KNeighborsClassifier()


# Encode labels using LabelEncoder
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train1_encoded = le.fit_transform(y_train1)
y_train2_encoded = le.fit_transform(y_train2)

# Train models with encoded labels
model1.fit(X_train1, y_train1_encoded)
model2.fit(X_train2, y_train2_encoded)
model3.fit(X_train2, y_train2_encoded)
model4.fit(X_train1, y_train1_encoded)

estimators = [
    ("model1", model1),
    ("model2", model2),
    ("model3", model3),
    ("model4",model4)
]

voting_classifier = VotingClassifier(estimators=estimators, voting="soft")

# Create a voting classifier ensemble

voting_classifier.fit(X_train1, y_train1)

# Make predictions on the test set using the ensemble
voting_predictions = voting_classifier.predict(X_test1)

# Evaluate the accuracy of the ensemble model
accuracy = accuracy_score(y_test1, voting_predictions)
print(f"Ensemble accuracy: {accuracy:.4f}")

# Create a voting classifier ensemble
voting_classifier2 = VotingClassifier(estimators=estimators, voting="soft")  # Choose your voting strategy

voting_classifier2.fit(X_train2, y_train2)
# Make predictions on the test set using the ensemble
voting_predictions2 = voting_classifier2.predict(X_test2)

# Evaluate the accuracy of the ensemble model
accuracy2 = accuracy_score(y_test2, voting_predictions2)
print(f"Ensemble accuracy2: {accuracy2:.4f}")


Ensemble accuracy: 0.9550
Ensemble accuracy2: 0.9000


In [18]:
from sklearn.metrics import precision_score,recall_score

precision1 = precision_score(y_test1, voting_predictions, average='binary', pos_label='FAKE')
recall1 = recall_score(y_test1, voting_predictions, average='binary', pos_label='FAKE')

print(f'Precision1: {precision1:.4f}')
print(f'Recall1: {recall1}')


precision2 = precision_score(y_test2, voting_predictions2, average='binary', pos_label='spoof')
recall2 = recall_score(y_test2, voting_predictions2, average='binary', pos_label='spoof')

print(f'Precision2: {precision2}')
print(f'Recall2: {recall2}')

Precision1: 0.9195
Recall1: 0.975609756097561
Precision2: 0.9
Recall2: 1.0


In [19]:

# Define two machine learning models 
from sklearn.tree import DecisionTreeClassifier

model1 = DecisionTreeClassifier()
model2 = DecisionTreeClassifier()

# Encode labels using LabelEncoder
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train1_encoded = le.fit_transform(y_train1)
y_train2_encoded = le.fit_transform(y_train2)

# Train models with encoded labels
model1.fit(X_train1, y_train1_encoded)
model2.fit(X_train2, y_train2_encoded)

estimators = [
    ("model1", model1),
    ("model2", model2),
]

voting_classifier = VotingClassifier(estimators=estimators, voting="soft")

# Create a voting classifier ensemble

voting_classifier.fit(X_train1, y_train1)

# Make predictions on the test set using the ensemble
voting_predictions = voting_classifier.predict(X_test1)

# Evaluate the accuracy of the ensemble model
accuracy = accuracy_score(y_test1, voting_predictions)
print(f"Ensemble accuracy: {accuracy:.4f}")

# Create a voting classifier ensemble
voting_classifier2 = VotingClassifier(estimators=estimators, voting="soft")  # Choose voting strategy

voting_classifier2.fit(X_train2, y_train2)
# Make predictions on the test set using the ensemble
voting_predictions2 = voting_classifier2.predict(X_test2)

# Evaluate the accuracy of the ensemble model
accuracy2 = accuracy_score(y_test2, voting_predictions2)
print(f"Ensemble accuracy2: {accuracy2:.4f}")


Ensemble accuracy: 0.8650
Ensemble accuracy2: 0.7850


In [20]:
from sklearn.metrics import precision_score,recall_score

precision1 = precision_score(y_test1, voting_predictions, average='binary', pos_label='FAKE')
recall1 = recall_score(y_test1, voting_predictions, average='binary', pos_label='FAKE')

print(f'Precision1: {precision1:.4f}')
print(f'Recall1: {recall1}')


precision2 = precision_score(y_test2, voting_predictions2, average='binary', pos_label='spoof')
recall2 = recall_score(y_test2, voting_predictions2, average='binary', pos_label='spoof')

print(f'Precision2: {precision2}')
print(f'Recall2: {recall2}')

Precision1: 0.8090
Recall1: 0.8780487804878049
Precision2: 0.8914285714285715
Recall2: 0.8666666666666667


In [23]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

model1 = SVC(probability=True)
model2 = KNeighborsClassifier()


# Encode labels using LabelEncoder
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train1_encoded = le.fit_transform(y_train1)
y_train2_encoded = le.fit_transform(y_train2)

# Train models with encoded labels
model1.fit(X_train1, y_train1_encoded)
model2.fit(X_train2, y_train2_encoded)

estimators = [
    ("model1", model1),
    ("model2", model2)
]

voting_classifier = VotingClassifier(estimators=estimators, voting="soft")

# Create a voting classifier ensemble

voting_classifier.fit(X_train1, y_train1)

# Make predictions on the test set using the ensemble
voting_predictions = voting_classifier.predict(X_test1)

# Evaluate the accuracy of the ensemble model
accuracy = accuracy_score(y_test1, voting_predictions)
print(f"Ensemble accuracy: {accuracy:.4f}")

# Create a voting classifier ensemble
voting_classifier2 = VotingClassifier(estimators=estimators, voting="soft")  # Choose your voting strategy

voting_classifier2.fit(X_train2, y_train2)
# Make predictions on the test set using the ensemble
voting_predictions2 = voting_classifier2.predict(X_test2)

# Evaluate the accuracy of the ensemble model
accuracy2 = accuracy_score(y_test2, voting_predictions2)
print(f"Ensemble accuracy2: {accuracy2:.4f}")


Ensemble accuracy: 0.6450
Ensemble accuracy2: 0.9000


In [24]:
from sklearn.metrics import precision_score,recall_score

precision1 = precision_score(y_test1, voting_predictions, average='binary', pos_label='FAKE')
recall1 = recall_score(y_test1, voting_predictions, average='binary', pos_label='FAKE')

print(f'Precision1: {precision1:.4f}')
print(f'Recall1: {recall1}')


precision2 = precision_score(y_test2, voting_predictions2, average='binary', pos_label='spoof')
recall2 = recall_score(y_test2, voting_predictions2, average='binary', pos_label='spoof')

print(f'Precision2: {precision2}')
print(f'Recall2: {recall2}')

Precision1: 0.5495
Recall1: 0.7439024390243902
Precision2: 0.9
Recall2: 1.0
