In [29]:

# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import kaleido
# Hugging Face Transformers and datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, pipeline
from datasets import load_dataset, Dataset
# scikit-learn for dataset splitting
from sklearn.model_selection import train_test_split


this first cell is for loading my dataset, splitting it into a test and training set. then saving the respective datasets to csv files

In [30]:
def IMPORT_BABEv3():
    dataset = load_dataset("mediabiasgroup/BABE-v3")
    df = pd.DataFrame(dataset["train"])
    DF_TRAIN, DF_TEST = train_test_split(df, test_size=0.20, random_state=42)
    DF_TRAIN.to_csv("TRAINING_DATAFRAME.csv", index=False)
    DF_TEST.to_csv("TESTING_DATAFRAME.csv", index=False)

IMPORT_BABEv3()

Using the latest cached version of the dataset since mediabiasgroup/BABE-v3 couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /Users/oscar/.cache/huggingface/datasets/mediabiasgroup___BABE-v3/default/0.0.0/5493fe1686f29d4fce6912ccf9e2e03780493bd6 (last modified on Mon Mar 18 10:23:39 2024).


This is for running the pre trained models

In [31]:
#This is for the analysis of the D1V1DE bias-detection model (https://huggingface.co/D1V1DE/bias-detection?text=I+like+you.+I+love+you)
def PRE_TEST_D1V1DE():
    try:
        pipe = pipeline("text-classification", model="D1V1DE/bias-detection")
        CurrentDF = pd.read_csv("TESTING_DATAFRAME.csv")
        CurrentDF['Predicted'] = 'XXX'
        CurrentDF.drop(['news_link','outlet','label','label_opinion','biased_words'], axis=1, inplace=True)
        for index, row in CurrentDF.iterrows():
            text_data = row['text']
            bias = pipe(text_data)
            CurrentDF.at[index, 'Predicted'] = bias[0]["label"]
        CurrentDF.to_csv("temp.csv", index = False)
        
        Type =  GEN_EVAL_ScorePerTopic(CurrentDF, "type")
        Topic = GEN_EVAL_ScorePerTopic(CurrentDF, "topic")
        results = Type.join(Topic).T
        results['score'] = results['score'] * 100
        results.to_csv("PRED1V1DE.csv", index=True)
        print("D1V1DE/bias-detection examination completed successfully")
        return results
    except Exception as e:
        print("D1V1DE/bias-detection failed")
        print(e)

PRE_TEST_D1V1DE()

D1V1DE/bias-detection examination completed successfully


Unnamed: 0,correct,count,score,field
,133.0,289.0,46.020761,type
left,64.0,203.0,31.527094,type
center,122.0,136.0,89.705882,type
right,66.0,197.0,33.502538,type
white-nationalism,6.0,30.0,20.0,topic
gender,13.0,22.0,59.090909,topic
#metoo,6.0,7.0,85.714286,topic
marriage-equality,20.0,68.0,29.411765,topic
taxes,29.0,54.0,53.703704,topic
student-debt,7.0,22.0,31.818182,topic


In [39]:
#This is for the analysis of the finetuned D1V1DE bias-detection model (https://huggingface.co/D1V1DE/bias-detection?text=I+like+you.+I+love+you)
def FIN_TEST_D1V1DE():
    try:
        # Load the fine-tuned model as a pipeline for text classification
        pipe = pipeline("text-classification", model="./fine_tuned/D1V1DE")
        # Load the test dataset
        test_df = pd.read_csv("TESTING_DATAFRAME.csv")
        test_df['Predicted'] = 'XXX'  # Initialize the Predicted column
        # Drop unnecessary columns
        test_df.drop(['news_link', 'outlet', 'label', 'label_opinion', 'biased_words'], axis=1, inplace=True)
        # Make predictions and store in the DataFrame
        for index, row in test_df.iterrows():
            text_data = row['text']
            prediction = pipe(text_data)
            test_df.at[index, 'Predicted'] = prediction[0]["label"]

        # Save the DataFrame with predictions to a temporary CSV file
        test_df.to_csv("temp_predictions.csv", index=False)

        # Analyze results by type and topic
        Type = GEN_EVAL_ScorePerTopic(test_df, "type")
        Topic = GEN_EVAL_ScorePerTopic(test_df, "topic")

        # Combine results and calculate scores
        results = Type.join(Topic).T
        results['score'] = results['score'] * 100

        # Save final results to a CSV file
        results.to_csv("FIN_TEST_D1V1DE_RESULTS.csv", index=True)
        print("Fine-tuned D1V1DE/bias-detection model evaluation completed successfully")
        return results
    except Exception as e:
        print("Evaluation of the fine-tuned D1V1DE/bias-detection model failed")
        print(e)

# Example usage
FIN_TEST_D1V1DE()

Fine-tuned D1V1DE/bias-detection model evaluation completed successfully


Unnamed: 0,correct,count,score,field
,119.0,289.0,41.176471,type
left,85.0,203.0,41.871921,type
center,128.0,136.0,94.117647,type
right,83.0,197.0,42.13198,type
white-nationalism,8.0,30.0,26.666667,topic
gender,14.0,22.0,63.636364,topic
#metoo,7.0,7.0,100.0,topic
marriage-equality,22.0,68.0,32.352941,topic
taxes,30.0,54.0,55.555556,topic
student-debt,12.0,22.0,54.545455,topic


the bellow section is for fine tuning models and their evaluations

In [33]:
#fine tuning the model with my training dataset
def FIN_TRAIN_D1V1DE(training_data_path):
    # Load and preprocess the dataset
    df = pd.read_csv(training_data_path)
    df = df[['text', 'label']]
    df_train, df_val = train_test_split(df, test_size=0.1)
    train_dataset = Dataset.from_pandas(df_train)
    val_dataset = Dataset.from_pandas(df_val)
    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained("D1V1DE/bias-detection")
    # Tokenization function
    def tokenize_function(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True)
    # Tokenize the datasets
    tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
    tokenized_val_dataset = val_dataset.map(tokenize_function, batched=True)
    # Load the pretrained model
    model = AutoModelForSequenceClassification.from_pretrained("D1V1DE/bias-detection")
    # Training arguments
    training_args = TrainingArguments(
        output_dir="./results",
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10
    )
    # Initialize the Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train_dataset,
        eval_dataset=tokenized_val_dataset
    )
    # Fine-tune the model
    trainer.train()
    # Save the fine-tuned model
    tokenizer.save_pretrained("./fine_tuned/D1V1DE")
    model.save_pretrained("./fine_tuned/D1V1DE")


#code for fine-tuning individual models
#FIN_TRAIN_D1V1DE("TRAINING_DATAFRAME.csv")

Map: 100%|██████████| 2966/2966 [00:00<00:00, 10660.06 examples/s]
Map: 100%|██████████| 330/330 [00:00<00:00, 10420.63 examples/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
  2%|▏         | 10/558 [00:11<10:27,  1.14s/it]

{'loss': 0.3819, 'grad_norm': 24.358970642089844, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.05}


  4%|▎         | 20/558 [00:23<10:12,  1.14s/it]

{'loss': 0.3204, 'grad_norm': 23.58262825012207, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.11}


  5%|▌         | 30/558 [00:34<09:58,  1.13s/it]

{'loss': 0.3584, 'grad_norm': 19.62409210205078, 'learning_rate': 3e-06, 'epoch': 0.16}


  7%|▋         | 40/558 [00:45<09:47,  1.13s/it]

{'loss': 0.2999, 'grad_norm': 11.214188575744629, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.22}


  9%|▉         | 50/558 [00:57<09:36,  1.13s/it]

{'loss': 0.2756, 'grad_norm': 21.374582290649414, 'learning_rate': 5e-06, 'epoch': 0.27}


 11%|█         | 60/558 [01:08<09:22,  1.13s/it]

{'loss': 0.3748, 'grad_norm': 3.4241037368774414, 'learning_rate': 6e-06, 'epoch': 0.32}


 13%|█▎        | 70/558 [01:20<09:34,  1.18s/it]

{'loss': 0.4317, 'grad_norm': 14.77900505065918, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.38}


 14%|█▍        | 80/558 [01:31<09:19,  1.17s/it]

{'loss': 0.4097, 'grad_norm': 20.95553207397461, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.43}


 16%|█▌        | 90/558 [01:43<09:06,  1.17s/it]

{'loss': 0.3931, 'grad_norm': 13.118010520935059, 'learning_rate': 9e-06, 'epoch': 0.48}


 18%|█▊        | 100/558 [01:55<08:46,  1.15s/it]

{'loss': 0.4772, 'grad_norm': 33.427513122558594, 'learning_rate': 1e-05, 'epoch': 0.54}


 20%|█▉        | 110/558 [02:06<08:34,  1.15s/it]

{'loss': 0.3196, 'grad_norm': 18.216777801513672, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.59}


 22%|██▏       | 120/558 [02:18<08:21,  1.15s/it]

{'loss': 0.36, 'grad_norm': 26.892658233642578, 'learning_rate': 1.2e-05, 'epoch': 0.65}


 23%|██▎       | 130/558 [02:29<08:21,  1.17s/it]

{'loss': 0.419, 'grad_norm': 20.00519561767578, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.7}


 25%|██▌       | 140/558 [02:41<07:58,  1.14s/it]

{'loss': 0.3293, 'grad_norm': 29.422935485839844, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.75}


 27%|██▋       | 150/558 [02:52<07:50,  1.15s/it]

{'loss': 0.3562, 'grad_norm': 20.47928810119629, 'learning_rate': 1.5e-05, 'epoch': 0.81}


 29%|██▊       | 160/558 [03:04<07:39,  1.15s/it]

{'loss': 0.5021, 'grad_norm': 34.013729095458984, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.86}


 30%|███       | 170/558 [03:15<07:27,  1.15s/it]

{'loss': 0.4971, 'grad_norm': 11.920598030090332, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.91}


 32%|███▏      | 180/558 [03:27<07:16,  1.15s/it]

{'loss': 0.4027, 'grad_norm': 13.094677925109863, 'learning_rate': 1.8e-05, 'epoch': 0.97}


 34%|███▍      | 190/558 [03:38<07:16,  1.19s/it]

{'loss': 0.3508, 'grad_norm': 11.992867469787598, 'learning_rate': 1.9e-05, 'epoch': 1.02}


 36%|███▌      | 200/558 [03:51<07:36,  1.27s/it]

{'loss': 0.2718, 'grad_norm': 20.38723373413086, 'learning_rate': 2e-05, 'epoch': 1.08}


 38%|███▊      | 210/558 [04:04<07:39,  1.32s/it]

{'loss': 0.2007, 'grad_norm': 23.12297248840332, 'learning_rate': 2.1e-05, 'epoch': 1.13}


 39%|███▉      | 220/558 [04:17<07:26,  1.32s/it]

{'loss': 0.5016, 'grad_norm': 21.839778900146484, 'learning_rate': 2.2000000000000003e-05, 'epoch': 1.18}


 41%|████      | 230/558 [04:31<07:11,  1.31s/it]

{'loss': 0.3398, 'grad_norm': 12.050722122192383, 'learning_rate': 2.3000000000000003e-05, 'epoch': 1.24}


 43%|████▎     | 240/558 [04:44<06:56,  1.31s/it]

{'loss': 0.1754, 'grad_norm': 1.7716572284698486, 'learning_rate': 2.4e-05, 'epoch': 1.29}


 45%|████▍     | 250/558 [04:57<06:50,  1.33s/it]

{'loss': 0.2187, 'grad_norm': 15.061874389648438, 'learning_rate': 2.5e-05, 'epoch': 1.34}


 47%|████▋     | 260/558 [05:10<06:32,  1.32s/it]

{'loss': 0.4196, 'grad_norm': 15.103972434997559, 'learning_rate': 2.6000000000000002e-05, 'epoch': 1.4}


 48%|████▊     | 270/558 [05:24<06:20,  1.32s/it]

{'loss': 0.3582, 'grad_norm': 16.9282169342041, 'learning_rate': 2.7000000000000002e-05, 'epoch': 1.45}


 50%|█████     | 280/558 [05:37<06:05,  1.31s/it]

{'loss': 0.3379, 'grad_norm': 15.1163911819458, 'learning_rate': 2.8000000000000003e-05, 'epoch': 1.51}


 52%|█████▏    | 290/558 [05:50<05:49,  1.30s/it]

{'loss': 0.3512, 'grad_norm': 10.724246978759766, 'learning_rate': 2.9e-05, 'epoch': 1.56}


 54%|█████▍    | 300/558 [06:03<05:33,  1.29s/it]

{'loss': 0.3004, 'grad_norm': 29.348575592041016, 'learning_rate': 3e-05, 'epoch': 1.61}


 56%|█████▌    | 310/558 [06:16<05:18,  1.28s/it]

{'loss': 0.3169, 'grad_norm': 16.09638786315918, 'learning_rate': 3.1e-05, 'epoch': 1.67}


 57%|█████▋    | 320/558 [06:29<05:16,  1.33s/it]

{'loss': 0.3088, 'grad_norm': 13.173498153686523, 'learning_rate': 3.2000000000000005e-05, 'epoch': 1.72}


 59%|█████▉    | 330/558 [06:42<04:54,  1.29s/it]

{'loss': 0.2892, 'grad_norm': 10.872442245483398, 'learning_rate': 3.3e-05, 'epoch': 1.77}


 61%|██████    | 340/558 [06:55<04:49,  1.33s/it]

{'loss': 0.3727, 'grad_norm': 11.119922637939453, 'learning_rate': 3.4000000000000007e-05, 'epoch': 1.83}


 63%|██████▎   | 350/558 [07:09<04:32,  1.31s/it]

{'loss': 0.4171, 'grad_norm': 30.46381378173828, 'learning_rate': 3.5e-05, 'epoch': 1.88}


 65%|██████▍   | 360/558 [07:22<04:14,  1.29s/it]

{'loss': 0.5223, 'grad_norm': 10.132445335388184, 'learning_rate': 3.6e-05, 'epoch': 1.94}


 66%|██████▋   | 370/558 [07:35<04:01,  1.28s/it]

{'loss': 0.4232, 'grad_norm': 11.994037628173828, 'learning_rate': 3.7e-05, 'epoch': 1.99}


 68%|██████▊   | 380/558 [07:47<03:50,  1.29s/it]

{'loss': 0.2505, 'grad_norm': 14.553853988647461, 'learning_rate': 3.8e-05, 'epoch': 2.04}


 70%|██████▉   | 390/558 [08:00<03:36,  1.29s/it]

{'loss': 0.2758, 'grad_norm': 17.802513122558594, 'learning_rate': 3.9000000000000006e-05, 'epoch': 2.1}


 72%|███████▏  | 400/558 [08:13<03:25,  1.30s/it]

{'loss': 0.1984, 'grad_norm': 15.71505355834961, 'learning_rate': 4e-05, 'epoch': 2.15}


 73%|███████▎  | 410/558 [08:25<03:05,  1.26s/it]

{'loss': 0.2327, 'grad_norm': 16.402812957763672, 'learning_rate': 4.1e-05, 'epoch': 2.2}


 75%|███████▌  | 420/558 [08:38<02:59,  1.30s/it]

{'loss': 0.2622, 'grad_norm': 22.064594268798828, 'learning_rate': 4.2e-05, 'epoch': 2.26}


 77%|███████▋  | 430/558 [08:52<02:46,  1.30s/it]

{'loss': 0.4405, 'grad_norm': 82.52388763427734, 'learning_rate': 4.3e-05, 'epoch': 2.31}


 79%|███████▉  | 440/558 [09:05<02:35,  1.32s/it]

{'loss': 0.5406, 'grad_norm': 38.06264877319336, 'learning_rate': 4.4000000000000006e-05, 'epoch': 2.37}


 81%|████████  | 450/558 [09:18<02:22,  1.32s/it]

{'loss': 0.2771, 'grad_norm': 12.347790718078613, 'learning_rate': 4.5e-05, 'epoch': 2.42}


 82%|████████▏ | 460/558 [09:31<02:04,  1.27s/it]

{'loss': 0.3268, 'grad_norm': 27.564191818237305, 'learning_rate': 4.600000000000001e-05, 'epoch': 2.47}


 84%|████████▍ | 470/558 [09:44<01:53,  1.29s/it]

{'loss': 0.2239, 'grad_norm': 35.727149963378906, 'learning_rate': 4.7e-05, 'epoch': 2.53}


 86%|████████▌ | 480/558 [09:57<01:42,  1.32s/it]

{'loss': 0.4671, 'grad_norm': 38.475040435791016, 'learning_rate': 4.8e-05, 'epoch': 2.58}


 88%|████████▊ | 490/558 [10:10<01:29,  1.32s/it]

{'loss': 0.3073, 'grad_norm': 11.285598754882812, 'learning_rate': 4.9e-05, 'epoch': 2.63}


 90%|████████▉ | 500/558 [10:23<01:14,  1.29s/it]Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'loss': 0.2544, 'grad_norm': 31.389554977416992, 'learning_rate': 5e-05, 'epoch': 2.69}


 91%|█████████▏| 510/558 [10:36<01:01,  1.28s/it]

{'loss': 0.2774, 'grad_norm': 20.52629280090332, 'learning_rate': 4.1379310344827587e-05, 'epoch': 2.74}


 93%|█████████▎| 520/558 [10:50<00:50,  1.33s/it]

{'loss': 0.3328, 'grad_norm': 10.32570743560791, 'learning_rate': 3.275862068965517e-05, 'epoch': 2.8}


 95%|█████████▍| 530/558 [11:03<00:36,  1.31s/it]

{'loss': 0.2875, 'grad_norm': 14.379392623901367, 'learning_rate': 2.413793103448276e-05, 'epoch': 2.85}


 97%|█████████▋| 540/558 [11:16<00:23,  1.31s/it]

{'loss': 0.2425, 'grad_norm': 23.04095458984375, 'learning_rate': 1.5517241379310346e-05, 'epoch': 2.9}


 99%|█████████▊| 550/558 [11:29<00:10,  1.32s/it]

{'loss': 0.2372, 'grad_norm': 29.879043579101562, 'learning_rate': 6.896551724137932e-06, 'epoch': 2.96}


100%|██████████| 558/558 [11:38<00:00,  1.25s/it]


{'train_runtime': 698.96, 'train_samples_per_second': 12.73, 'train_steps_per_second': 0.798, 'train_loss': 0.33937042673856127, 'epoch': 3.0}


Bellow is all nececary code for evaluations

In [34]:
#used for doing all my scoring and accuracy testing of models using the bias dataset
#variables need renaming and potentailly this code could be sped up
def GEN_EVAL_ScorePerTopic(data, field):
    unique_items = data[field].unique().tolist()
    scores = {item: {"correct": 0, "count": 0} for item in unique_items}
    for index, row in data.iterrows():
        correct = False
        if pd.isna(row[field]) and row['Predicted'] == 'NEUTRAL':
            correct = True
        elif not pd.isna(row[field]) and row['Predicted'] != 'NEUTRAL':
            correct = True
        scores[row[field]]['count'] += 1
        if correct:
            scores[row[field]]['correct'] += 1
    current = {}
    for item in scores:
        scores[item]['score'] = scores[item]['correct'] / scores[item]['count']
    scores = pd.DataFrame(scores).T
    scores["field"] = field #could do with changing from field
    return pd.DataFrame(scores).T

In [35]:
def GEN_EVAL_evaluate_model(model, tokenizer, eval_dataset):
    #used for evaluating performance of models in terms of speed. needs further research
    # Initialize the trainer
    trainer = Trainer(model=model)
    # Tokenize the evaluation dataset
    def tokenize_function(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True)
    tokenized_eval_dataset = eval_dataset.map(tokenize_function, batched=True)
    # Evaluate the model
    results = trainer.evaluate(tokenized_eval_dataset)
    return results

using the standard python tools for evaluating models performance

In [36]:
def EVAL_D1V1DE_evaluate_model():
    #evalating fine tuned D1V1DE model
    tokenizer = AutoTokenizer.from_pretrained("D1V1DE/bias-detection")
    model = AutoModelForSequenceClassification.from_pretrained("fine_tuned/D1V1DE")
    df_eval = pd.read_csv('TESTING_DATAFRAME.csv')
    eval_dataset = Dataset.from_pandas(df_eval[['text', 'label']])
    # Evaluate the model
    evaluation_results = GEN_EVAL_evaluate_model(model, tokenizer, eval_dataset)

    print("FIND1V1DE: ", evaluation_results)


    # Replace 'D1V1DE/original-model-name' with the correct path to the original model on Hugging Face
    original_model = AutoModelForSequenceClassification.from_pretrained("D1V1DE/bias-detection")
    # Evaluate the original model
    original_evaluation_results = GEN_EVAL_evaluate_model(original_model, tokenizer, eval_dataset)

    # Print the evaluation results of the original model
    print("PRED1V1DE: ", original_evaluation_results)

EVAL_D1V1DE_evaluate_model()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Map: 100%|██████████| 825/825 [00:00<00:00, 7863.52 examples/s]
100%|██████████| 104/104 [00:16<00:00,  6.30it/s]


FIND1V1DE:  {'eval_loss': 0.4441041946411133, 'eval_runtime': 16.5461, 'eval_samples_per_second': 49.861, 'eval_steps_per_second': 6.285}


Map: 100%|██████████| 825/825 [00:00<00:00, 9296.54 examples/s]
100%|██████████| 104/104 [00:16<00:00,  6.31it/s]

PRED1V1DE:  {'eval_loss': 0.29630759358406067, 'eval_runtime': 16.5132, 'eval_samples_per_second': 49.96, 'eval_steps_per_second': 6.298}





bellow is code for visualisations and evaluation assistive tools

In [37]:
def GEN_VIS_star(data,key):
    FilteredData = data[data['field'] == key]
    # Preparing the data for the star plot (radar chart)
    FilteredData = FilteredData.reset_index()
    FilteredData['index'] = FilteredData['index'].fillna('No Bias')
    labels=FilteredData['index']
    #print(labels)
    stats=FilteredData['score']
    #print(stats)

    # Create radar chart
    angles=np.linspace(0, 2*np.pi, len(labels), endpoint=False).tolist()
    stats=np.concatenate((stats,[stats[0]]))
    angles+=angles[:1]
    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
    ax.fill(angles, stats, color='blue', alpha=0.25)
    ax.set_yticklabels([])
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(labels)

    # Display the plot
    plt.title('Star Plot of '+  str(key) +' vs. accuracy rating')
    plt.show()