# Experiment example usage

In [1]:
%cd C:\Users\Sebastian\Desktop\GitHubRepositories\llm-workshop\intermediate
%cd ..

C:\Users\Sebastian\Desktop\GitHubRepositories\llm-workshop\intermediate
C:\Users\Sebastian\Desktop\GitHubRepositories\llm-workshop


In [2]:
from core.base_model import BaseModel
from basic.huggingface_model import HuggingFaceModel
from basic.llm_model import LLMModel
from intermediate.llm_size_experiment import LLMSizeExperiment
from tmp.custom_model import EmotionBERT, EmotionClassifier

from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification, pipeline
from torch.utils.data import DataLoader
from typing import Union
from datasets import Dataset, load_dataset
import pandas as pd
import random
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

## Load data

In [3]:
go_emotions_simplified_data = load_dataset("go_emotions", "simplified")

In [4]:
with open("tmp/dict.txt", "r") as f:
    emotions = [line.strip() for line in f.readlines()]

def prepare_df(df, emotions):
    df["label"] = df["labels"].apply(lambda x: x[0])
    df["sentiment"] = df["label"].apply(lambda idx: emotions[idx])
    return df[["text", "sentiment"]]

In [5]:
train_df = prepare_df(go_emotions_simplified_data["train"].to_pandas(), emotions)
val_df = prepare_df(go_emotions_simplified_data["validation"].to_pandas(), emotions)
test_df = prepare_df(go_emotions_simplified_data["test"].to_pandas(), emotions)
print(train_df.head())

                                                text  sentiment
0  My favourite food is anything I didn't have to...    neutral
1  Now if he does off himself, everyone will thin...    neutral
2                     WHY THE FUCK IS BAYLESS ISOING      anger
3                        To make her feel threatened       fear
4                             Dirty Southern Wankers  annoyance


In [6]:
x_train = train_df['text'].tolist()
y_train = train_df['sentiment'].tolist()
x_val = val_df['text'].tolist()
y_val = val_df['sentiment'].tolist()
x_test = test_df['text'].tolist()
y_test = test_df['sentiment'].tolist()

train_data = pd.read_csv("basic\\clearly_seperated.csv").rename(columns={"target":"output"}).head(20)
x_train_raw = train_data["input"].tolist()
y_train_raw = train_data["output"].tolist()
training_pairs = [(x, y) for x, y in zip(x_train_raw, y_train_raw) if pd.notna(x) and pd.notna(y)]
x_train_llm = [pair[0] for pair in training_pairs]
y_train_llm = [pair[1] for pair in training_pairs]
print(x_train[:5])
print(y_train[:5])

dataset_name='go_emotions'
concept=concept="sentiment analysis"
concept_keywords=concept_keywords=["sentiment", "emotion"]

llm_models={
        "opt-125m": LLMModel(model_name="facebook/opt-125m"),
        # "opt-350m": LLMModel(model_name="facebook/opt-350m"),
        # "opt-1.3b": LLMModel(model_name="facebook/opt-1.3b"),
        # "opt-2.7b": LLMModel(model_name="facebook/opt-2.7b"),
        # "opt-6.7b": LLMModel(model_name="facebook/opt-6.7b"),
        # "opt-13b": LLMModel(model_name="facebook/opt-13b"),
        # "opt-30b": LLMModel(model_name="facebook/opt-30b"),
        # "opt-66b": LLMModel(model_name="facebook/opt-66b")
    }

# Temporary for testing
x_train = x_train[:5]
y_train = y_train[:5]
x_val = x_train
y_val = y_train
x_test = x_train
y_test = y_train

["My favourite food is anything I didn't have to cook myself.", 'Now if he does off himself, everyone will think hes having a laugh screwing with people instead of actually dead', 'WHY THE FUCK IS BAYLESS ISOING', 'To make her feel threatened', 'Dirty Southern Wankers']
['neutral', 'neutral', 'anger', 'fear', 'annoyance']
Loading model: facebook/opt-125m
Model loaded successfully on cpu


## Run experiment

In [7]:
experiment = LLMSizeExperiment()

In [8]:
model_statistics = pd.DataFrame()
prediction_statistics = pd.DataFrame()
data_statistics = pd.DataFrame()

### EmotionClassifier

In [9]:
custom_classifier = EmotionClassifier(device='cpu')

In [10]:
model_statistics_tmp, prediction_statistics_tmp, data_statistics_tmp = experiment.run(
    x_train=x_train,
    y_train=y_train,
    x_test=x_test,
    y_test=y_test,
    x_val=x_val,
    y_val=y_val,
    max_samples_for_llm_train=15,
    dataset_name=dataset_name,
    concept=concept,
    concept_keywords=concept_keywords,
    max_samples_for_concept=10,
    
    classifier_name="custom_model",
    classifier=custom_classifier,
    train_classifier=True,
    classifier_train_arguments = {"epochs": 2, "batch_size": 2},
    
    llm_models=llm_models,
    
    prompt_header_llm_concept    = "In 2 words guess, what task is the model doing:\n",
    prompt_content_llm_concept   = "{x_test} -> {y_test}\n",
    prompt_tail_llm_concept      = "What is this task?",
    
    prompt_header_llm_train    = "You are a classificator\n",
    prompt_content_llm_train   = "{x_train} -> {y_train}\n",
    prompt_tail_llm_train      = "Learn based on this.",
    
    prompt_llm_simulation= "{x_test}"
)

Labels setup: 4 unique labels found: ['anger', 'annoyance', 'fear', 'neutral']
Epoch 1/2g batch 3/3
  Train Loss: 1.4977
  Val Accuracy: 0.8000
  Val F1 Macro: 0.7000
  Val ROC AUC (Macro OVR): 1.0000
  New best model saved with F1 Macro: 0.7000
Epoch 2/2g batch 3/3
  Train Loss: 1.0414
  Val Accuracy: 1.0000
  Val F1 Macro: 1.0000
  Val ROC AUC (Macro OVR): 1.0000
  New best model saved with F1 Macro: 1.0000
Training finished. Loading best model state.
Running experiment for LLM: opt-125m
Training completed with 5 examples




In [11]:
model_statistics = pd.concat([model_statistics, model_statistics_tmp], ignore_index=True)
prediction_statistics = pd.concat([prediction_statistics, prediction_statistics_tmp], ignore_index=True)
data_statistics = pd.concat([data_statistics, data_statistics_tmp], ignore_index=True)

### roberta-base-go_emotions

In [12]:
HGF = HuggingFaceModel(model_name="SamLowe/roberta-base-go_emotions", use_gpu=False)

In [13]:
model_statistics_tmp, prediction_statistics_tmp, data_statistics_tmp = experiment.run(
    x_train=x_train,
    y_train=y_train,
    x_test=x_test,
    y_test=y_test,
    x_val=x_val,
    y_val=y_val,
    max_samples_for_llm_train=15,
    dataset_name=dataset_name,
    concept=concept,
    concept_keywords=concept_keywords,
    max_samples_for_concept=10,
    
    classifier_name="roberta-base-go_emotions",
    classifier=HGF,
    train_classifier=False,
    classifier_train_arguments = {},
    
    llm_models=llm_models,
    
    prompt_header_llm_concept    = "In 2 words guess, what task is the model doing:\n",
    prompt_content_llm_concept   = "{x_test} -> {y_test}\n",
    prompt_tail_llm_concept      = "What is this task?",
    
    prompt_header_llm_train    = "You are a classificator\n",
    prompt_content_llm_train   = "{x_train} -> {y_train}\n",
    prompt_tail_llm_train      = "Learn based on this.",
    
    prompt_llm_simulation= "{x_test}"
)

Device set to use cpu


Running experiment for LLM: opt-125m
Training completed with 5 examples




In [14]:
model_statistics = pd.concat([model_statistics, model_statistics_tmp], ignore_index=True)
prediction_statistics = pd.concat([prediction_statistics, prediction_statistics_tmp], ignore_index=True)
data_statistics = pd.concat([data_statistics, data_statistics_tmp], ignore_index=True)

## View results

In [15]:
model_statistics.head()

Unnamed: 0,run_id,dataset_name,classifier,llm,classifier_accuracy,classifier_precision,classifier_recall,classifier_f1,classifier_balanced_accuracy,classifier_cohen_kappa,classifier_mcc,llm_concept_accuracy,llm_simulation_accuracy,llm_simulation_precision,llm_simulation_recall,llm_simulation_f1,llm_simulation_balanced_accuracy,llm_simulation_cohen_kappa,llm_simulation_mcc,llm_direct_prediction_accuracy,llm_direct_precision,llm_direct_recall,llm_direct_f1,llm_direct_balanced_accuracy,llm_direct_cohen_kappa,llm_direct_mcc,prompt_header_llm_concept,prompt_content_llm_concept,prompt_tail_llm_concept,prompt_header_llm_train,prompt_content_llm_train,prompt_tail_llm_train,prompt_llm_simulation,llm_predicted_concept
0,1,go_emotions,custom_model,opt-125m,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"In 2 words guess, what task is the model doing:\n",{x_test} -> {y_test}\n,What is this task?,You are a classificator\n,{x_train} -> {y_train}\n,Learn based on this.,{x_test},No response generated
1,2,go_emotions,roberta-base-go_emotions,opt-125m,0.4,0.2,0.3,0.233333,0.375,0.210526,0.235702,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"In 2 words guess, what task is the model doing:\n",{x_test} -> {y_test}\n,What is this task?,You are a classificator\n,{x_train} -> {y_train}\n,Learn based on this.,{x_test},No response generated


In [16]:
prediction_statistics.head()

Unnamed: 0,run_id,dataset_name,classifier_name,llm_name,x_test,y_test,classifier_predicted_label,classifier_predicted_label_confidence,llm_simulation_predicted_label
0,1,go_emotions,custom_model,opt-125m,My favourite food is anything I didn't have to...,neutral,neutral,0.48228,or or all ( or anything else I or the the or t...
1,1,go_emotions,custom_model,opt-125m,"Now if he does off himself, everyone will thin...",neutral,neutral,0.547152,or or the main or cause cause shooting or shoo...
2,1,go_emotions,custom_model,opt-125m,WHY THE FUCK IS BAYLESS ISOING,anger,anger,0.351205,or or if or a or anything else** or any\n11 or...
3,1,go_emotions,custom_model,opt-125m,To make her feel threatened,fear,fear,0.28816,the a or a
4,1,go_emotions,custom_model,opt-125m,Dirty Southern Wankers,annoyance,annoyance,0.593957,or or the point or any to to the shooting you ...


In [17]:
data_statistics.head()

Unnamed: 0,run_id,dataset_name,classifier_name,partition,num_samples,label_counts,label_proportions,avg_text_length,avg_word_count
0,1,go_emotions,custom_model,train,5,"{'neutral': 2, 'anger': 1, 'fear': 1, 'annoyan...","{'neutral': 0.4, 'anger': 0.2, 'fear': 0.2, 'a...",50.0,9.0
1,1,go_emotions,custom_model,val,5,"{'neutral': 2, 'anger': 1, 'fear': 1, 'annoyan...","{'neutral': 0.4, 'anger': 0.2, 'fear': 0.2, 'a...",50.0,9.0
2,1,go_emotions,custom_model,test,5,"{'neutral': 2, 'anger': 1, 'fear': 1, 'annoyan...","{'neutral': 0.4, 'anger': 0.2, 'fear': 0.2, 'a...",50.0,9.0
3,2,go_emotions,roberta-base-go_emotions,train,5,"{'neutral': 2, 'anger': 1, 'fear': 1, 'annoyan...","{'neutral': 0.4, 'anger': 0.2, 'fear': 0.2, 'a...",50.0,9.0
4,2,go_emotions,roberta-base-go_emotions,val,5,"{'neutral': 2, 'anger': 1, 'fear': 1, 'annoyan...","{'neutral': 0.4, 'anger': 0.2, 'fear': 0.2, 'a...",50.0,9.0
