### Install the model:
`! pip install ollama llama-index-llms-ollama`
`! sudo snap install ollama`
`! ollama run llama3`

### After downloading the base model and creating the modelfile, we create the parametrised model for our task:
`! ollama create [model name] -f [modelfile name]`

In [1]:
from llama_index.llms.ollama import Ollama
from lib.dataset_utils import load_twitter_data_cleaned, load_goemotions_cleaned
import tqdm
from sklearn import MultiLabelBinarizer
from torch.utils.data import DataLoader
import numpy as np

### Loading Twitter

In [2]:
_, _, twitter_test = load_twitter_data_cleaned() 
twitter_emotions = """'joy', 'sadness','anger', 'fear', 'love', 'surprise'"""

### Loading Goemotions

In [3]:
import json
label_mapping_path = "./dataset/GoEmotionsSplit/label_mapping.json"
_, _, goemotions_test = load_goemotions_cleaned()
json1_file = open(label_mapping_path)
json1_str = json1_file.read()
json1_data = json.loads(json1_str)
goemotions_emotions = str(json1_data.values())

In [17]:
TWITTER_BASE_PROMPT = """<|start_header_id|>system<|end_header_id|> Classify the sentences. Choose ONLY ONE EMOTION among the following: """ + twitter_emotions 

SAMPLES = """ 
text: 1. i left with my bouquet of red and yellow tulips under my arm feeling slightly more optimistic than when i arrived
2. im updating my blog because i feel shitty

answer:{
    "1": "joy"
    "2": "sadness"
    }
<|eot_id|>"""

GOEMOTIONS_SINGLE_BASE_PROMPT = """<|start_header_id|>system<|end_header_id|> Classify the sentences. Choose ONLY ONE EMOTION among the following: """ + goemotions_emotions

GOEMOTIONS_MULTI_BASE_PROMPT = """<|start_header_id|>system<|end_header_id|> Classify the sentences. Choose a maximum of three emotions among the following: """ + goemotions_emotions

SAMPLES_STRING = """Here are some samples:"""

TERMINATOR_STRING = """<|eot_id|>"""

In [10]:
class Llama3():
    def __init__(self, name, timeout = 1000.0, scores={}):
        self.model = Ollama(model=name, request_timeout=timeout)
        self.scores = scores

    def predict(self, dataset_type, test, samples = None, batch_dim = 8, single_label = True, progress_bar = False):
        # executes classification task on model
        # k : number of shots (examples)
        # emotions : list of emotions to be classified 
        # train : training data to fetch examples from
        # test : test data to classify
        # batch_dim : size of batch per prompt
        # single_label : whether to classify single label or multi-label

        test_loader = DataLoader(test, batch_size = batch_dim, shuffle = True)
        predictions = []
        base_prompt = self.generate_base_prompt(dataset_type, samples, single_label) # to avoid recreating the prompt from scratch at every batch
        for data in tqdm(test_loader, disable=not progress_bar):
            batch_prompt = self.add_test_data_to_prompt(data, base_prompt)
            predictions.append(self.classify_batch(batch_prompt))
        if len(predictions == len(test)):
            self.evaluate(test, predictions, self.scores)
        else:
            print(f"Error: predictions and test data do not match: pred: {len(predictions)} vs test:{len(test)}")

    def generate_base_prompt(self, dataset_type, samples, single_label = True):
        # spaghetti code but python 3.8 has no switch-case 
        if dataset_type == "twitter":
            if samples:
                return TWITTER_BASE_PROMPT + SAMPLES_STRING + samples
            return TWITTER_BASE_PROMPT
        
        if dataset_type == "goemotions":
            if single_label:
                if samples:
                    return GOEMOTIONS_SINGLE_BASE_PROMPT + SAMPLES_STRING + samples
                return GOEMOTIONS_SINGLE_BASE_PROMPT    
            if samples:
                return GOEMOTIONS_MULTI_BASE_PROMPT + SAMPLES_STRING + samples
        return GOEMOTIONS_MULTI_BASE_PROMPT 
                
    def add_test_data_to_prompt(self, prompt, test):
        # appends test data to base prompt
        for index, row in enumerate(test['text']):
            prompt += (str(index) + '. ' + row + '\n')
        prompt += TERMINATOR_STRING
        return prompt

    def classify_batch(self, prompt):
        # classify batch of data
        # response is formatted as JSON
        response = self.model.complete(prompt).text
        emotions = self.extract_emotions(response)
        return emotions.values()
    
    def evaluate(self, targets, predictions, scores):
        # evaluate the model
        mlb = MultiLabelBinarizer()
        predictions = np.ndarray(predictions)
        predictions = predictions.join(predictions.DataFrame(mlb.fit_transform(predictions),
                                                          columns=mlb.classes_))
                                                                 # columns=(emotions.append('other')))) # 'other' if emotion not in the allowed ones

        scores = {name: score(targets, predictions) for name, score in scores.items()}
        return scores

    def extract_emotions(self, answers):
        answers[answers.find('{'):]
        return json.loads(answers)

In [18]:
llama3 = Llama3("ParametrisedLlama3")
twitter_prompt = TWITTER_BASE_PROMPT + SAMPLES_STRING + SAMPLES
batch = twitter_test[60:70]
twitter_prompt = llama3.add_test_data_to_prompt(twitter_prompt, batch)
batch


Unnamed: 0,text,anger,fear,joy,love,sadness,surprise
60,i woke up yesterday monday morning feeling a l...,0,0,0,0,1,0
61,i feel so embarrassed,0,0,0,0,1,0
62,i spent wandering around still kinda dazed and...,0,0,1,0,0,0
63,i can honestly say that after each sistahs cha...,0,0,1,0,0,0
64,i still feel stupid to be in that class this i...,0,0,0,0,1,0
65,i feel a little stunned but can t imagine what...,0,0,0,0,0,1
66,i admit im feeling a little bit unloved at thi...,0,0,0,0,1,0
67,i feel a bit stressed even though all the thin...,1,0,0,0,0,0
68,im feeling pretty anxious,0,1,0,0,0,0
69,i feel shocked and sad at the fact that there ...,0,0,0,0,0,1


In [19]:
response = llama3.model.complete(twitter_prompt).text

print(response)

{
"0": "fear",
"1": "anger",
"2": "surprise",
"3": "joy",
"4": "sadness",
"5": "surprise",
"6": "sadness",
"7": "stress",
"8": "fear",
"9": "surprise"
}


In [20]:
llama3.extract_emotions(response)

{'0': 'fear',
 '1': 'anger',
 '2': 'surprise',
 '3': 'joy',
 '4': 'sadness',
 '5': 'surprise',
 '6': 'sadness',
 '7': 'stress',
 '8': 'fear',
 '9': 'surprise'}

't'