# Experiment example usage

In [1]:
%cd C:\Users\Sebastian\Desktop\GitHubRepositories\llm-workshop\intermediate
%cd ..

C:\Users\Sebastian\Desktop\GitHubRepositories\llm-workshop\intermediate
C:\Users\Sebastian\Desktop\GitHubRepositories\llm-workshop


In [2]:
from core.base_model import BaseModel
from basic.huggingface_model import HuggingFaceModel
from basic.llm_model import LLMModel
from intermediate.llm_size_experiment import LLMSizeExperiment
from tmp.custom_model import EmotionBERT, EmotionClassifier

from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification, pipeline
from torch.utils.data import DataLoader
from typing import Union
from datasets import Dataset, load_dataset
import pandas as pd
import random

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

## Load data

In [3]:
go_emotions_simplified_data = load_dataset("go_emotions", "simplified")

In [4]:
with open("tmp/dict.txt", "r") as f:
    emotions = [line.strip() for line in f.readlines()]

def prepare_df(df, emotions):
    df["label"] = df["labels"].apply(lambda x: x[0])
    df["sentiment"] = df["label"].apply(lambda idx: emotions[idx])
    return df[["text", "sentiment"]]

In [5]:
train_df = prepare_df(go_emotions_simplified_data["train"].to_pandas(), emotions)
val_df = prepare_df(go_emotions_simplified_data["validation"].to_pandas(), emotions)
test_df = prepare_df(go_emotions_simplified_data["test"].to_pandas(), emotions)
print(train_df.head())

                                                text  sentiment
0  My favourite food is anything I didn't have to...    neutral
1  Now if he does off himself, everyone will thin...    neutral
2                     WHY THE FUCK IS BAYLESS ISOING      anger
3                        To make her feel threatened       fear
4                             Dirty Southern Wankers  annoyance


In [6]:
x_train = train_df['text'].tolist()
y_train = train_df['sentiment'].tolist()
x_val = val_df['text'].tolist()
y_val = val_df['sentiment'].tolist()
x_test = test_df['text'].tolist()
y_test = test_df['sentiment'].tolist()
print(x_train[:5])
print(y_train[:5])

dataset_name='go_emotions'
concept=concept="sentiment analysis"
concept_keywords=concept_keywords=["sentiment", "emotion"]

llm_models={
        "opt-125m": LLMModel(model_name="facebook/opt-125m", use_gpu=False),
        "opt-350m": LLMModel(model_name="facebook/opt-350m", use_gpu=False),
        # "opt-1.3b": LLMModel(model_name="facebook/opt-1.3b", use_gpu=False),
        # "opt-2.7b": LLMModel(model_name="facebook/opt-2.7b", use_gpu=False),
    
        # "opt-6.7b": LLMModel(model_name="facebook/opt-6.7b", use_gpu=False),
        # "opt-13b": LLMModel(model_name="facebook/opt-13b", use_gpu=False),
        # "opt-30b": LLMModel(model_name="facebook/opt-30b", use_gpu=False),
        # "opt-66b": LLMModel(model_name="facebook/opt-66b", use_gpu=False)
    }

# Temporary for testing
x_train = x_train[:50]
y_train = y_train[:50]
x_val = x_train
y_val = y_train
x_test = x_train
y_test = y_train

["My favourite food is anything I didn't have to cook myself.", 'Now if he does off himself, everyone will think hes having a laugh screwing with people instead of actually dead', 'WHY THE FUCK IS BAYLESS ISOING', 'To make her feel threatened', 'Dirty Southern Wankers']
['neutral', 'neutral', 'anger', 'fear', 'annoyance']


In [7]:
# e = LLMModel(model_name="facebook/opt-6.7b", use_gpu=False)
# e.predict('I am very annoyed!')

## Run experiment

In [8]:
experiment = LLMSizeExperiment()

In [9]:
model_statistics = pd.DataFrame()
prediction_statistics = pd.DataFrame()
data_statistics = pd.DataFrame()

### EmotionClassifier

In [10]:
custom_classifier = EmotionClassifier(device='cpu')

In [11]:
model_statistics_tmp, prediction_statistics_tmp, data_statistics_tmp = experiment.run(
    x_train=x_train,
    y_train=y_train,
    x_test=x_test,
    y_test=y_test,
    x_val=x_val,
    y_val=y_val,
    dataset_name=dataset_name,
    concept=concept,
    concept_keywords=concept_keywords,
    
    classifier_name="custom_model",
    classifier=custom_classifier,
    train_classifier=True,
    classifier_train_arguments = {"epochs": 2, "batch_size": 2},
    
    llm_models=llm_models,
    
    prompt_header_llm_concept    = "In 2 words guess, what task is the model doing:\n",
    prompt_content_llm_concept   = "{x_test} -> {y_test}\n",
    prompt_tail_llm_concept      = "What is this task?",
    
    prompt_header_llm_train    = "You are a classificator\n",
    prompt_content_llm_train   = "{x_train} -> {y_train}\n",
    prompt_tail_llm_train      = "Learn based on this.",
    
    prompt_llm_simulation= "{x_test}"
)

Labels setup: 18 unique labels found: ['admiration', 'amusement', 'anger', 'annoyance', 'caring', 'confusion', 'curiosity', 'desire', 'disapproval', 'embarrassment', 'fear', 'gratitude', 'grief', 'joy', 'neutral', 'optimism', 'sadness', 'surprise']
Epoch 1/2g batch 25/25
  Train Loss: 2.8435
  Val Accuracy: 0.3400
  Val F1 Macro: 0.0456
  Val ROC AUC (Macro OVR): 0.8927
  New best model saved with F1 Macro: 0.0456
Epoch 2/2g batch 25/25
  Train Loss: 2.3018
  Val Accuracy: 0.3800
  Val F1 Macro: 0.0754
  Val ROC AUC (Macro OVR): 0.9926
  New best model saved with F1 Macro: 0.0754
Training finished. Loading best model state.
Running experiment for LLM: opt-125m




Running experiment for LLM: opt-350m




In [12]:
model_statistics = pd.concat([model_statistics, model_statistics_tmp], ignore_index=True)
prediction_statistics = pd.concat([prediction_statistics, prediction_statistics_tmp], ignore_index=True)
data_statistics = pd.concat([data_statistics, data_statistics_tmp], ignore_index=True)

### roberta-base-go_emotions

In [13]:
HGF = HuggingFaceModel(model_name="SamLowe/roberta-base-go_emotions", use_gpu=False)

In [14]:
model_statistics_tmp, prediction_statistics_tmp, data_statistics_tmp = experiment.run(
    x_train=x_train,
    y_train=y_train,
    x_test=x_test,
    y_test=y_test,
    x_val=x_val,
    y_val=y_val,
    dataset_name=dataset_name,
    concept=concept,
    concept_keywords=concept_keywords,
    
    classifier_name="roberta-base-go_emotions",
    classifier=HGF,
    train_classifier=False,
    classifier_train_arguments = {},
    
    llm_models=llm_models,
    
    prompt_header_llm_concept    = "In 2 words guess, what task is the model doing:\n",
    prompt_content_llm_concept   = "{x_test} -> {y_test}\n",
    prompt_tail_llm_concept      = "What is this task?",
    
    prompt_header_llm_train    = "You are a classificator\n",
    prompt_content_llm_train   = "{x_train} -> {y_train}\n",
    prompt_tail_llm_train      = "Learn based on this.",
    
    prompt_llm_simulation= "{x_test}"
)

Device set to use cpu


Running experiment for LLM: opt-125m




Running experiment for LLM: opt-350m




In [15]:
model_statistics = pd.concat([model_statistics, model_statistics_tmp], ignore_index=True)
prediction_statistics = pd.concat([prediction_statistics, prediction_statistics_tmp], ignore_index=True)
data_statistics = pd.concat([data_statistics, data_statistics_tmp], ignore_index=True)

## View results

In [16]:
model_statistics.head()

Unnamed: 0,run_id,dataset_name,classifier,llm,classifier_accuracy,classifier_precision,classifier_recall,classifier_f1,classifier_balanced_accuracy,classifier_cohen_kappa,classifier_mcc,llm_concept_accuracy,llm_simulation_accuracy,llm_simulation_precision,llm_simulation_recall,llm_simulation_f1,llm_simulation_balanced_accuracy,llm_simulation_cohen_kappa,llm_simulation_mcc,llm_direct_prediction_accuracy,llm_direct_precision,llm_direct_recall,llm_direct_f1,llm_direct_balanced_accuracy,llm_direct_cohen_kappa,llm_direct_mcc,prompt_header_llm_concept,prompt_content_llm_concept,prompt_tail_llm_concept,prompt_header_llm_train,prompt_content_llm_train,prompt_tail_llm_train,prompt_llm_simulation,llm_predicted_concept
0,1,go_emotions,custom_model,opt-125m,0.38,0.130787,0.085185,0.075356,0.085185,0.075179,0.196844,0.0,0.0,0.0,0.0,0.0,0.0,1.110223e-16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"In 2 words guess, what task is the model doing:\n",{x_test} -> {y_test}\n,What is this task?,You are a classificator\n,{x_train} -> {y_train}\n,Learn based on this.,{x_test},or
1,1,go_emotions,custom_model,opt-350m,0.38,0.130787,0.085185,0.075356,0.085185,0.075179,0.196844,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"In 2 words guess, what task is the model doing:\n",{x_test} -> {y_test}\n,What is this task?,You are a classificator\n,{x_train} -> {y_train}\n,Learn based on this.,{x_test},’’’’’
2,2,go_emotions,roberta-base-go_emotions,opt-125m,0.74,0.51553,0.528342,0.510698,0.645752,0.69555,0.698837,0.0,0.02,0.001161,0.047619,0.002268,0.052632,0.003660024,0.007088,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"In 2 words guess, what task is the model doing:\n",{x_test} -> {y_test}\n,What is this task?,You are a classificator\n,{x_train} -> {y_train}\n,Learn based on this.,{x_test},or
3,2,go_emotions,roberta-base-go_emotions,opt-350m,0.74,0.51553,0.528342,0.510698,0.645752,0.69555,0.698837,0.0,0.0,0.0,0.0,0.0,0.0,-2.220446e-16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"In 2 words guess, what task is the model doing:\n",{x_test} -> {y_test}\n,What is this task?,You are a classificator\n,{x_train} -> {y_train}\n,Learn based on this.,{x_test},’’’’’


In [17]:
prediction_statistics.head()

Unnamed: 0,run_id,dataset_name,classifier_name,llm_name,x_test,y_test,classifier_predicted_label,classifier_predicted_label_confidence,llm_simulation_predicted_label
0,1,go_emotions,custom_model,opt-125m,My favourite food is anything I didn't have to...,neutral,neutral,0.666657,love
1,1,go_emotions,custom_model,opt-125m,"Now if he does off himself, everyone will thin...",neutral,neutral,0.548849,love
2,1,go_emotions,custom_model,opt-125m,WHY THE FUCK IS BAYLESS ISOING,anger,neutral,0.344517,love
3,1,go_emotions,custom_model,opt-125m,To make her feel threatened,fear,neutral,0.192742,love
4,1,go_emotions,custom_model,opt-125m,Dirty Southern Wankers,annoyance,neutral,0.205182,"i,"


In [18]:
data_statistics.head()

Unnamed: 0,run_id,dataset_name,classifier_name,partition,num_samples,label_counts,label_proportions,avg_text_length,avg_word_count
0,1,go_emotions,custom_model,train,50,"{'neutral': 17, 'anger': 5, 'fear': 1, 'annoya...","{'neutral': 0.34, 'anger': 0.1, 'fear': 0.02, ...",61.7,11.4
1,1,go_emotions,custom_model,val,50,"{'neutral': 17, 'anger': 5, 'fear': 1, 'annoya...","{'neutral': 0.34, 'anger': 0.1, 'fear': 0.02, ...",61.7,11.4
2,1,go_emotions,custom_model,test,50,"{'neutral': 17, 'anger': 5, 'fear': 1, 'annoya...","{'neutral': 0.34, 'anger': 0.1, 'fear': 0.02, ...",61.7,11.4
3,2,go_emotions,roberta-base-go_emotions,train,50,"{'neutral': 17, 'anger': 5, 'fear': 1, 'annoya...","{'neutral': 0.34, 'anger': 0.1, 'fear': 0.02, ...",61.7,11.4
4,2,go_emotions,roberta-base-go_emotions,val,50,"{'neutral': 17, 'anger': 5, 'fear': 1, 'annoya...","{'neutral': 0.34, 'anger': 0.1, 'fear': 0.02, ...",61.7,11.4
