### Install the model:
`! pip install ollama llama-index-llms-ollama`
`! sudo snap install ollama`
`! ollama pull llama3`

### After downloading the base model and creating the modelfile, we create the parametrised model for our task:
`! ollama create [model name] -f [modelfile name]`

In [1]:
from llama_index.llms.ollama import Ollama
from lib.dataset_utils import load_twitter_data_cleaned, load_goemotions_cleaned
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer, OneHotEncoder
from sklearn.metrics import accuracy_score, jaccard_score, f1_score, classification_report
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd


### Loading Twitter

In [2]:
_, _, twitter_test = load_twitter_data_cleaned() 
twitter_emotions = """'joy', 'sadness','anger', 'fear', 'love', 'surprise'"""

### Loading Goemotions

In [3]:
import json
label_mapping_path = "./dataset/GoEmotionsSplit/label_mapping.json"
_, _, goemotions_test = load_goemotions_cleaned()
json1_file = open(label_mapping_path)
json1_str = json1_file.read()
json1_data = json.loads(json1_str)
goemotions_emotions = str(json1_data.values())

In [4]:
TWITTER_BASE_PROMPT = """<|start_header_id|>system<|end_header_id|> Classify the sentences. Choose ONLY ONE EMOTION among the following: """ + twitter_emotions 

SAMPLES = """ 
text: 1. i left with my bouquet of red and yellow tulips under my arm feeling slightly more optimistic than when i arrived
2. im updating my blog because i feel shitty

answer:{
    "1": "joy"
    "2": "sadness"
    }
<|eot_id|>"""

GOEMOTIONS_SINGLE_BASE_PROMPT = """<|start_header_id|>system<|end_header_id|> Classify the sentences. Choose ONLY ONE EMOTION among the following: """ + goemotions_emotions

GOEMOTIONS_MULTI_BASE_PROMPT = """<|start_header_id|>system<|end_header_id|> Classify the sentences. Choose a maximum of three emotions among the following: """ + goemotions_emotions

SAMPLES_STRING = """Here are some samples:"""

TERMINATOR_STRING = """<|eot_id|>"""

In [5]:
class Llama3():
    def __init__(self, name, timeout = 1000.0, scores={}):
        self.model = Ollama(model=name, request_timeout=timeout)
        self.scores = scores

    def predict(self, dataset_type, test, samples = None, batch_dim = 8, single_label = True, progress_bar = False):
        # executes classification task on model
        # k : number of shots (examples)
        # emotions : list of emotions to be classified 
        # train : training data to fetch examples from
        # test : test data to classify
        # batch_dim : size of batch per prompt
        # single_label : whether to classify single label or multi-label

        test_loader = DataLoader(test, batch_size = batch_dim, shuffle = False)
        predictions = []
        base_prompt = self.generate_base_prompt(dataset_type, samples, single_label) # to avoid recreating the prompt from scratch at every batch
        for data in tqdm(test_loader, disable=not progress_bar):
            batch_prompt = self.add_test_data_to_prompt(base_prompt, data[0])
            predictions.append(self.classify_batch(batch_prompt))
        predictions = flatten(predictions)
        if len(predictions) == len(test):
            results = self.evaluate(test.targets, predictions, self.scores)
            print(results)
        else:
            print(f"Error: predictions and test data do not match: pred: {len(predictions)} vs test:{len(test)}")

    def generate_base_prompt(self, dataset_type, samples, single_label = True):
        # spaghetti code but python 3.8 has no switch-case 
        if dataset_type == "twitter":
            if samples:
                return TWITTER_BASE_PROMPT + SAMPLES_STRING + samples
            return TWITTER_BASE_PROMPT
        
        if dataset_type == "goemotions":
            if single_label:
                if samples:
                    return GOEMOTIONS_SINGLE_BASE_PROMPT + SAMPLES_STRING + samples
                return GOEMOTIONS_SINGLE_BASE_PROMPT    
            if samples:
                return GOEMOTIONS_MULTI_BASE_PROMPT + SAMPLES_STRING + samples
        return GOEMOTIONS_MULTI_BASE_PROMPT 
                
    def add_test_data_to_prompt(self, prompt, test):
        # appends data to classify to base prompt
        for index, row in enumerate(test):
            prompt += (str(index) + '. ' + row + '\n')
        prompt += TERMINATOR_STRING
        return prompt

    def classify_batch(self, prompt):
        # classify batch of data
        # response is formatted as JSON
        response = self.model.complete(prompt).text
        emotions = self.extract_emotions(response)
        return list(emotions.values())
    
    def evaluate(self, targets, predictions, scores):
        # TODO: fix one hot encoding and handle 'other' case
        # evaluate the model
        ohe = OneHotEncoder()
        predictions = ohe.transform(predictions)
        predictions = pd.DataFrame(predictions, columns = ohe.categories_) # 'other' if emotion not in the allowed ones)
        scores = {name: score(targets, predictions) for name, score in scores.items()}
        return scores

    def extract_emotions(self, answers):
        answers[answers.find('{'):]
        return json.loads(answers)



class Llama_EmotionsData(Dataset):
    def __init__(self, dataframe) -> None:
        self.text = dataframe['text']
        self.targets = dataframe.drop(columns=['text']).to_numpy()

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        return self.text[index], self.targets[index]


def flatten(xss):
    return [x for xs in xss for x in xs]

In [6]:
llama3 = Llama3("ParametrisedLlama3", scores = {"accuracy": accuracy_score})
twitter_test_dataset = Llama_EmotionsData(twitter_test[:16])
llama3.predict("twitter", twitter_test_dataset, samples = SAMPLES, single_label = True, progress_bar = True)


  0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 2/2 [00:50<00:00, 25.32s/it]


NotFittedError: This OneHotEncoder instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.