In [70]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
import re
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

In [1]:
import os
# Change to the parent directory of the notebook
os.chdir('..')
os.chdir('..')

In [87]:
TRAIN_DF = pd.read_csv("Data/TRAIN_DF.csv", index_col=0)
TEST_DF = pd.read_csv("Data/TEST_DF.csv", index_col=0)

In [88]:
TRAIN_DF['label'].value_counts()

label
normal        4287
hatespeech    3409
offensive     1304
Name: count, dtype: int64

In [89]:
# Load data into DataFrames
hermes_rag_df = pd.read_csv("classification_experiments/rag/classified_test_df_Hermes-2_Q5_top4.csv", index_col=0)
mistral_rag_df = pd.read_csv("classification_experiments/rag/classified_test_df_mistral_7b_q8_top4.csv", index_col=0)
llama_rag_df = pd.read_csv("classification_experiments/rag/classified_test_df_llama-3.3_70b_Q6_top4.csv", index_col=0)

hermes_rag_df : RAG_Hermes-3-Llama-3.1-70B-Q5_K_S:latest
mistral_rag_df : RAG_mistral:7b-instruct-v0.2-q8_0
llama_rag_df : RAG_llama3.3:70b-instruct-q6_K

In [90]:
TEST_DF = TEST_DF.merge(hermes_rag_df, how='left')
TEST_DF = TEST_DF.merge(mistral_rag_df, how='left')
TEST_DF = TEST_DF.merge(llama_rag_df, how='left')

In [91]:
import pickle

# Load using pickle
with open("C:\\MachineLearning\\UniTrier\\RCS\\dataset_mit_embeddings_sfr.pkl", "rb") as f:
    embeddings_df = pickle.load(f)

In [92]:
TRAIN_DF = TRAIN_DF.join(embeddings_df["X_train"], how="left", rsuffix="_emb")
TEST_DF = TEST_DF.join(embeddings_df["X_train"], how="left", rsuffix="_emb")

In [93]:
normalization_map = {
    "offensive": "offensive",
    "Offensive": "offensive",
    "[offensive]": "offensive",
    "[Offensive]": "offensive",
    "<assistant> offensitive": "offensive",
    "offensive.": "offensive",
    
    "hate speech": "hatespeech",
    "Hatespeech": "hatespeech",
    "hate speech"
    "hatespeech": "hatespeech",
    "Hate Speech": "hatespeech",
    "Hate speech": "hatespeech",
    "Hate speech.": "hatespeech",
    "hate speech.": "hatespeech",
    "hatedspeech": "hatespeech",
    "hatemspeech": "hatespeech",
    "haterspeech": "hatespeech",
    "[hate speech]": "hatespeech",
    "[Hate Speech]": "hatespeech",
    "<hate speech>": "hatespeech",
    "'hatespeech'": "hatespeech",
    "hatredspeech": "hatespeech",
    
    "normal": "normal",
    "Normal": "normal",
    "Normal.": "normal",
    "[normal]": "normal",
    "[Normal]": "normal",
    
    # Handle cases where the label is inside a list-like structure
    "[Userinput, \"normal\", 1]": "normal",
    "[Userinput, \"offensive\", 1]": "offensive",
    "[Userinput, \"hatespeech\", 1]": "hatespeech",
}


columns_to_normalize = [
    "label",
    "Hermes-3-Llama-3.1-70B-Q5_K_S",
    "llama3.3:70B-Instruct-Q2_K",
    "llama3.3:70b-instruct-q6_K-SEEN_DATA",
    "llama3.3:70b-instruct-q6_K",
    "mistral:7b-instruct-v0.2-q8_0",

    "RAG_llama3.3:70b-instruct-q6_K",
    "RAG_mistral:7b-instruct-v0.2-q8_0",
    "RAG_llama3.3:70b-instruct-q6_K",
]
def clean_text(text):
    if isinstance(text, str):  # Ensure it's a string before applying regex
        # return re.split(r'[.\n]', text, 1)[0].strip()  # Keep only the part before the first "." or "\n"
        return re.split(r'[.\n"]|\s*\(', text, 1)[0].strip()  # Split at ".", "\n", or " (" and keep the first part
    return text
    
for column in columns_to_normalize:
    TEST_DF[column] = TEST_DF[column].str.strip('"').str.strip().apply(clean_text)
    TEST_DF[column] = TEST_DF[column].replace(normalization_map)

In [94]:
TEST_DF[columns_to_normalize[8]].value_counts()

RAG_llama3.3:70b-instruct-q6_K
hatespeech    420
offensive     344
normal        166
Name: count, dtype: int64

In [95]:
TRAIN_DF['X_train_emb']

0       [-3.8870854, 5.516928, 1.5130757, -0.4188277, ...
1       [0.5292801, 1.2109683, -0.64650404, 2.0842671,...
2       [1.5791895, -0.32219458, 0.5753123, 1.6588185,...
3       [4.513079, -1.212101, -0.9618452, -1.1730635, ...
4       [3.9823904, -1.1007478, 0.04596758, 3.8737075,...
                              ...                        
8995    [-0.35139227, 3.5876145, -3.2650406, 3.1035538...
8996    [-4.0064135, 4.1975603, -0.6081386, 1.9110469,...
8997    [4.365764, 0.40833455, -0.82745516, -0.1222669...
8998    [7.199881, 2.5057359, 5.326466, 0.2398137, 2.4...
8999    [3.3631604, 1.5252637, -2.2813034, -3.4720714,...
Name: X_train_emb, Length: 9000, dtype: object

In [83]:
from sklearn.utils import resample

# Define the minimum count (smallest class)
min_count = TRAIN_DF['label'].value_counts().min()

# Downsample each class to `min_count`
TRAIN_DF = (TRAIN_DF.groupby('label')
               .apply(lambda x: x.sample(n=min_count, random_state=42))
               .reset_index(drop=True))

# Check the new distribution
print(TRAIN_DF['label'].value_counts())

label
hatespeech    1304
normal        1304
offensive     1304
Name: count, dtype: int64


  .apply(lambda x: x.sample(n=min_count, random_state=42))


In [96]:
# Step 2: Prepare the labels (encoded labels for classification)
label_encoder = LabelEncoder()
TRAIN_DF['label_encoded'] = label_encoder.fit_transform(TRAIN_DF['label'])
TEST_DF['label_encoded'] = label_encoder.transform(TEST_DF['label'])

# Step 3: Convert 'X_train' column to a list of embeddings
X_train = TRAIN_DF['X_train_emb'].tolist()
X_test = TEST_DF['X_train_emb'].tolist()
y_train = TRAIN_DF['label_encoded'].tolist()
y_test = TEST_DF['label_encoded'].tolist()


# Initialize the XGBoost classifier
xgb_model = xgb.XGBClassifier(
    objective='multi:softmax',  # Use 'multi:softmax' for multi-class classification
    num_class=len(label_encoder.classes_),  # Number of classes
    eval_metric='mlogloss',  # Multi-class log loss
    use_label_encoder=False  # Avoid warnings about label encoding
)

# Train the XGBoost model
xgb_model.fit(X_train, y_train)

# Make predictions
y_pred_xgb = xgb_model.predict(X_test)

# Evaluate the XGBoost model
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print(f"XGBoost Accuracy: {accuracy_xgb:.4f}")

# Detailed classification report for XGBoost
print("\nXGBoost Classification Report:")
print(classification_report(y_test, y_pred_xgb, target_names=label_encoder.classes_))





other_classifier_columns = [
    "Hermes-3-Llama-3.1-70B-Q5_K_S",
    "llama3.3:70B-Instruct-Q2_K",
    "llama3.3:70b-instruct-q6_K-SEEN_DATA",
    "llama3.3:70b-instruct-q6_K",
    "mistral:7b-instruct-v0.2-q8_0",

    "RAG_llama3.3:70b-instruct-q6_K",
    "RAG_mistral:7b-instruct-v0.2-q8_0",
    "RAG_llama3.3:70b-instruct-q6_K",
    
]

# Convert string predictions from other classifiers to label-encoded integers

for column in other_classifier_columns:
    # Initialize a list to hold the valid label-encoded predictions
    y_pred_other = []

    for pred in TEST_DF[column].astype(str):
        try:
            # Try to transform the prediction into an encoded label
            y_pred_other.append(label_encoder.transform([pred])[0])
        except ValueError:
            # Handle unseen label: treat it as a default value (e.g., -1)
            y_pred_other.append(-1)

    y_pred_other = np.array(y_pred_other)

    # Remove the '-1' values (unseen labels) from both y_pred_other and y_test
    valid_indices = (y_pred_other != -1) & (y_test != -1)
    y_pred_other_valid = y_pred_other[valid_indices]
    y_test_valid = np.array(y_test)[valid_indices]

    # Accuracy for this classifier's predictions
    accuracy_other = accuracy_score(y_test_valid, y_pred_other_valid)
    print(f"\nAccuracy for {column}: {accuracy_other:.4f}")

    # Detailed classification report for each other classifier
    print(f"\n{column} Classification Report:")
    print(classification_report(y_test_valid, y_pred_other_valid, target_names=label_encoder.classes_))

Parameters: { "use_label_encoder" } are not used.



XGBoost Accuracy: 0.2962

XGBoost Classification Report:
              precision    recall  f1-score   support

  hatespeech       0.28      0.32      0.30       628
      normal       0.30      0.45      0.36       628
   offensive       0.32      0.12      0.17       628

    accuracy                           0.30      1884
   macro avg       0.30      0.30      0.28      1884
weighted avg       0.30      0.30      0.28      1884


Accuracy for Hermes-3-Llama-3.1-70B-Q5_K_S: 0.5210

Hermes-3-Llama-3.1-70B-Q5_K_S Classification Report:
              precision    recall  f1-score   support

  hatespeech       0.49      0.88      0.63       628
      normal       0.70      0.40      0.51       626
   offensive       0.44      0.29      0.35       627

    accuracy                           0.52      1881
   macro avg       0.54      0.52      0.50      1881
weighted avg       0.54      0.52      0.50      1881


Accuracy for llama3.3:70B-Instruct-Q2_K: 0.4613

llama3.3:70B-Instruct-Q2_

In [86]:
TRAIN_DF['label'].value_counts()

label
hatespeech    1304
normal        1304
offensive     1304
Name: count, dtype: int64