In [31]:
import pandas as pd

datasets = {
    "LIAR" : {
        "file_path" : r"/content/drive/Shareddrives/[FYP] Fake News Detection/Datasets/LIAR/Liar_with_WELFAKE_Lexicon_Scores.csv",
    },

    "ISOT" : {
        "file_path" : r"/content/drive/Shareddrives/[FYP] Fake News Detection/Datasets/ISOT/ISOT_with_WELFAKE_Lexicon_Scores.csv",
    },

    "Kaggle_real_fake" : {
        "file_path" : r"/content/drive/Shareddrives/[FYP] Fake News Detection/Datasets/Kaggle_real_fake/Kaggle_real_fake_with_WELFAKE_Lexicon_Scores.csv",
    },

    "CodaLab" : {
        "file_path" : r"/content/drive/Shareddrives/[FYP] Fake News Detection/Datasets/CodaLab Covid/CodaLab_with_WELFAKE_Lexicon_Scores.csv",
    },

    "FakeNewsNet" : {
        "file_path" : r"/content/drive/Shareddrives/[FYP] Fake News Detection/Datasets/FakeNewsNet/FakeNewsNet_with_WELFAKE_Lexicon_Scores.csv",
    }
}

for d in datasets.keys():
  datasets[d]["dataframe"] = pd.read_csv(datasets[d]["file_path"]) 

In [32]:
def convert_label(label):

  if label in ['true', 'mostly-true', 'half-true', 'real', 'Real', 0, 'REAL']:
    return 0

  if label in ['false', 'pants-fire', 'barely-true', 'fake', 'Fake', 1, 'FAKE']:
    return 1

def get_fake_score(scores):
  return float(scores.strip("[]").split(",")[0].strip())

def get_true_score(scores):
  return float(scores.strip("[]").split(",")[1].strip())

In [33]:
! pip install deslib -q

In [34]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

def train_classifier(dataset_name, data):

  print("================================")
  print("Current Dataset : {}".format(dataset_name))
  print("================================")
  print()

  df = data[dataset_name]["dataframe"]
  df['2_way_label'] = df['label'].apply(convert_label)
  df['fake_score'], df['true_score'] = df['scores'].apply(get_fake_score), df['scores'].apply(get_true_score)
  X_train, y_train = df[['fake_score', 'true_score']].to_numpy(), df[['2_way_label']].to_numpy()

  from sklearn.model_selection import train_test_split
  import numpy as np

  rng = np.random.RandomState(42)

  # Split the data into training and DSEL for DS techniques
  X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.5, random_state=rng, stratify=y_train)
  X_train = scaler.fit_transform(X_train)
  X_dsel = scaler.transform(X_dsel)
  
  from deslib.des.knora_e import KNORAE
  from sklearn.neural_network import MLPClassifier
  from sklearn.linear_model import Perceptron
  from sklearn.calibration import CalibratedClassifierCV
  from sklearn.naive_bayes import GaussianNB
  from sklearn.tree import DecisionTreeClassifier
  from sklearn.neighbors import KNeighborsClassifier

  print("Perceptron")
  print("----------")
  model_perceptron = CalibratedClassifierCV(Perceptron(max_iter=100, random_state=rng), cv=3)
  model_perceptron.fit(X_train, y_train.ravel())
  print("\nDone !!!\n")

  print("Gaussian Naive Bayes")
  print("--------------------")
  model_bayes = GaussianNB().fit(X_train, y_train.ravel())
  print("\nDone !!!\n")

  print("Decision Tree")
  print("-------------")
  model_tree = DecisionTreeClassifier(random_state=rng, max_depth=10).fit(X_train, y_train.ravel())
  print("\nDone !!!\n")

  print("MLP Classifier")
  print("--------------")
  model_mlp = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train.ravel())
  print("\nDone !!!\n")

  print("KNeighbors Classifier")
  print("---------------------")
  model_knn = KNeighborsClassifier(n_neighbors=7).fit(X_train, y_train.ravel())
  print("\nDone !!!\n")

  pool_classifiers = [
                      model_perceptron,
                      model_bayes,
                      model_tree,
                      model_mlp,
                      model_knn
                      ]

  # Initialize the DES model
  knorae = KNORAE(pool_classifiers)

  # Preprocess the Dynamic Selection dataset (DSEL)
  knorae.fit(X_dsel, y_dsel.ravel())

  print("********************************\n")

  return knorae  

In [35]:
for d in datasets:
  datasets[d]["classifier"] = train_classifier(d, datasets)

Current Dataset : LIAR

Perceptron
----------

Done !!!

Gaussian Naive Bayes
--------------------

Done !!!

Decision Tree
-------------

Done !!!

MLP Classifier
--------------

Done !!!

KNeighbors Classifier
---------------------

Done !!!

********************************

Current Dataset : ISOT

Perceptron
----------

Done !!!

Gaussian Naive Bayes
--------------------

Done !!!

Decision Tree
-------------

Done !!!

MLP Classifier
--------------

Done !!!

KNeighbors Classifier
---------------------

Done !!!

********************************

Current Dataset : Kaggle_real_fake

Perceptron
----------

Done !!!

Gaussian Naive Bayes
--------------------

Done !!!

Decision Tree
-------------

Done !!!

MLP Classifier
--------------

Done !!!

KNeighbors Classifier
---------------------

Done !!!

********************************

Current Dataset : CodaLab

Perceptron
----------

Done !!!

Gaussian Naive Bayes
--------------------

Done !!!

Decision Tree
-------------

Done !!!


In [36]:
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

def cross_evaluate(dataset):

  list_of_datasets = ["LIAR", "ISOT", "Kaggle_real_fake", "CodaLab", "FakeNewsNet"]
  results = {}

  for key in list_of_datasets:

    print("Model Trained Using {}".format(key))
    print("==================================")
    print()

    results[key] = {}
    clf = dataset[key]["classifier"]

    average_accuracy = 0

    for d in list_of_datasets:

      if d != key:

        print("Tested on {}".format(d))
        print("-----------------------")
        print()

        d_1 = dataset[d]["dataframe"]

        d_1['fake_score'], d_1['true_score'] = d_1['scores'].apply(get_fake_score), d_1['scores'].apply(get_true_score)

        X_test, y_test = d_1[['fake_score', 'true_score']].to_numpy(), d_1[['2_way_label']].to_numpy()
        X_test = scaler.transform(X_test)
        
        y_pred = clf.predict(X_test)

        a = accuracy_score(y_test, y_pred)
        results[key][d] = a
        average_accuracy += a
        print("Done !!!\n")

    results[key]['Average Accuracy'] = average_accuracy/(len(list_of_datasets)-1)

  return results

In [37]:
result_final = cross_evaluate(datasets)

Model Trained Using LIAR

Tested on ISOT
-----------------------

Done !!!

Tested on Kaggle_real_fake
-----------------------

Done !!!

Tested on CodaLab
-----------------------

Done !!!

Tested on FakeNewsNet
-----------------------

Done !!!

Model Trained Using ISOT

Tested on LIAR
-----------------------

Done !!!

Tested on Kaggle_real_fake
-----------------------

Done !!!

Tested on CodaLab
-----------------------

Done !!!

Tested on FakeNewsNet
-----------------------

Done !!!

Model Trained Using Kaggle_real_fake

Tested on LIAR
-----------------------

Done !!!

Tested on ISOT
-----------------------

Done !!!

Tested on CodaLab
-----------------------

Done !!!

Tested on FakeNewsNet
-----------------------

Done !!!

Model Trained Using CodaLab

Tested on LIAR
-----------------------

Done !!!

Tested on ISOT
-----------------------

Done !!!

Tested on Kaggle_real_fake
-----------------------

Done !!!

Tested on FakeNewsNet
-----------------------

Done !!!

Model Tr

In [38]:
import pprint

pp = pprint.PrettyPrinter(indent=2)
pp.pprint(result_final)

{ 'CodaLab': { 'Average Accuracy': 0.5778326874482785,
               'FakeNewsNet': 0.5844111053629936,
               'ISOT': 0.6332353334224241,
               'Kaggle_real_fake': 0.5480662983425414,
               'LIAR': 0.5456180126651552},
  'FakeNewsNet': { 'Average Accuracy': 0.5233209084658691,
                   'CodaLab': 0.5200934579439253,
                   'ISOT': 0.5114036259967036,
                   'Kaggle_real_fake': 0.5067087608524072,
                   'LIAR': 0.5550777890704401},
  'ISOT': { 'Average Accuracy': 0.4430089481578591,
            'CodaLab': 0.5142990654205607,
            'FakeNewsNet': 0.3701069149853423,
            'Kaggle_real_fake': 0.41878453038674035,
            'LIAR': 0.4688452818387929},
  'Kaggle_real_fake': { 'Average Accuracy': 0.4379468599183053,
                        'CodaLab': 0.49457943925233644,
                        'FakeNewsNet': 0.3072943610967408,
                        'ISOT': 0.5080404472359571,
                       