### Technique level baselines
Multilabel problem
N dataset: 2310
N for test: 20% / 462

Weighted avg F1 for most popular class prediction: 0.16
Weighted avg F1 for random class prediction: 0.12

In [10]:
import numpy as np
import pandas as pd
import os
import random
import ast
import csv
from collections import Counter
from sklearn.metrics import confusion_matrix, classification_report

#### Generate random 10 classes

In [4]:
techniques_list = [
    'Doubts_government',
    'Doubts_Army',
    'Doubts_media',
    'Doubts_partners',
    'Doubts_other',
    'Black_White',
    'Emotion_Fear',
    'Emotion_Anger',
    'Emotion_Hate_Disgust',
    'Emotion_other'
]

In [5]:
# Example
technique = random.choice(techniques_list)
technique

'Doubts_government'

In [12]:
true_file = "datasets/TC.labels_true.txt"

# Initialize dictionaries to store IDs and techniques
true_dict = {}

# Read contents of TC.labels_true.txt
with open(true_file, "r") as true_f:
    true_reader = csv.reader(true_f, delimiter="\t")
    for row in true_reader:
        id_ = row[0]
        technique = row[1]
        if id_ not in true_dict:
            true_dict[id_] = [technique]
        else:
            true_dict[id_].append(technique)
true_dict

{'209813': ['Emotion_Fear', 'Emotion_Fear', 'Emotion_other', 'Emotion_other'],
 '210028': ['Emotion_other'],
 '210117': ['Emotion_other'],
 '210306': ['Emotion_Fear'],
 '210365': ['Emotion_Fear'],
 '210441': ['Emotion_Fear'],
 '210456': ['Emotion_other', 'Emotion_other', 'Emotion_Anger'],
 '210467': ['Doubts_government'],
 '210735': ['Emotion_other', 'Emotion_Anger'],
 '210911': ['Emotion_Hate_Disgust'],
 '210965': ['Emotion_Anger', 'Emotion_Anger'],
 '211009': ['Emotion_other'],
 '211048': ['Emotion_other', 'Doubts_government'],
 '211326': ['Emotion_other'],
 '211371': ['Emotion_Anger'],
 '211402': ['Emotion_other'],
 '211495': ['Doubts_government',
  'Emotion_other',
  'Emotion_other',
  'Doubts_government',
  'Emotion_other',
  'Doubts_government',
  'Doubts_government'],
 '211632': ['Emotion_Anger', 'Emotion_Hate_Disgust'],
 '211885': ['Emotion_other'],
 '212018': ['Emotion_other', 'Emotion_other'],
 '212074': ['Emotion_Anger', 'Emotion_Hate_Disgust'],
 '212219': ['Emotion_Anger'],

In [28]:
# Random generated
# Load the indices from the file
with open('datasets/TC_labels_for_eval.txt', 'r') as file:
    lines = file.readlines()
predicted_labels = {}

# Iterate over the lines in the file
for line in lines:
    # Parse the line to extract the ID and question mark indices
    id_, _, start, end = line.strip().split()
    start, end = int(start), int(end)
    
    # Randomly select a manipulation technique
    selected_technique = random.choice(techniques_list)
    if id_ in predicted_labels:
        predicted_labels[id_].append(selected_technique)
    else:
        predicted_labels[id_] = [selected_technique]

    # Print the replaced text (optional)
    replaced_text = line[:start] + selected_technique + line[end:]
    
    # Append the ID and technique to the list
    #id_technique_list.append({id_, selected_technique})


# Print the list of ID and technique pairs
predicted_labels

{'209813': ['Emotion_other',
  'Doubts_other',
  'Emotion_Hate_Disgust',
  'Doubts_other'],
 '210028': ['Doubts_partners'],
 '210117': ['Doubts_partners'],
 '210306': ['Doubts_Army'],
 '210365': ['Emotion_Anger'],
 '210441': ['Doubts_Army'],
 '210456': ['Doubts_government', 'Doubts_media', 'Doubts_Army'],
 '210467': ['Doubts_partners'],
 '210735': ['Black_White', 'Emotion_other'],
 '210911': ['Emotion_Anger'],
 '210965': ['Doubts_media', 'Doubts_media'],
 '211009': ['Doubts_partners'],
 '211048': ['Emotion_Anger', 'Doubts_Army'],
 '211326': ['Doubts_media'],
 '211371': ['Doubts_media'],
 '211402': ['Doubts_other'],
 '211495': ['Emotion_Fear',
  'Doubts_Army',
  'Black_White',
  'Doubts_partners',
  'Doubts_other',
  'Emotion_Fear',
  'Doubts_other'],
 '211632': ['Emotion_Fear', 'Doubts_partners'],
 '211885': ['Doubts_government'],
 '212018': ['Doubts_other', 'Doubts_Army'],
 '212074': ['Black_White', 'Emotion_other'],
 '212219': ['Doubts_media'],
 '212307': ['Doubts_government'],
 '212

In [31]:
# Random 10 technique
true_labels = []
predicted = []

for id_, true_techniques in true_dict.items():
    # Get the corresponding predicted techniques from id_technique_dict
    #predicted_techniques = predicted_labels.get(id_, [])
    predicted_techniques = predicted_labels[id_] if id_ in predicted_labels else []
    
    # Add true and predicted labels to the respective lists
    true_labels.extend(true_techniques)
    predicted.extend(predicted_techniques)

# Generate classification report
report = classification_report(true_labels, predicted)

# Print classification report
print(report)

                      precision    recall  f1-score   support

         Black_White       0.03      0.08      0.04        37
         Doubts_Army       0.01      0.03      0.02        29
   Doubts_government       0.14      0.10      0.12       144
        Doubts_media       0.01      0.04      0.01        27
        Doubts_other       0.00      0.00      0.00         2
     Doubts_partners       0.03      0.23      0.05        13
       Emotion_Anger       0.17      0.11      0.13       158
        Emotion_Fear       0.20      0.12      0.15       179
Emotion_Hate_Disgust       0.12      0.12      0.12       105
       Emotion_other       0.27      0.07      0.12       338

            accuracy                           0.10      1032
           macro avg       0.10      0.09      0.08      1032
        weighted avg       0.18      0.10      0.12      1032


#### Predicted only majority class

In [33]:
true_labels = []
predicted_labels = []

# Iterate over the keys (IDs) in true_dict
for id_, true_techniques in true_dict.items():
    # Define the predicted technique
    predicted_technique = "Emotion_other"
    
    # Add true and predicted labels to the respective lists
    true_labels.extend(true_techniques)
    predicted_labels.extend([predicted_technique] * len(true_techniques))

# Generate a classification report
report = classification_report(true_labels, predicted_labels)

# Print the classification report
print(report)

                      precision    recall  f1-score   support

         Black_White       0.00      0.00      0.00        37
         Doubts_Army       0.00      0.00      0.00        29
   Doubts_government       0.00      0.00      0.00       144
        Doubts_media       0.00      0.00      0.00        27
        Doubts_other       0.00      0.00      0.00         2
     Doubts_partners       0.00      0.00      0.00        13
       Emotion_Anger       0.00      0.00      0.00       158
        Emotion_Fear       0.00      0.00      0.00       179
Emotion_Hate_Disgust       0.00      0.00      0.00       105
       Emotion_other       0.33      1.00      0.49       338

            accuracy                           0.33      1032
           macro avg       0.03      0.10      0.05      1032
        weighted avg       0.11      0.33      0.16      1032


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
