## Run models locally and tests

In [None]:
from llama_cpp import Llama

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import pandas as pd
import json

In [None]:
# Use llm = Llama(model_id = path_to_model) to load a model from a local path

llm = Llama.from_pretrained(repo_id="microsoft/Phi-3-mini-4k-instruct-gguf", filename= "*q4.gguf", 
                            n_gpu_layers = -1,
                            n_ctx=512,
                            max_tokens=20,
                            n_batch=512,
                            n_threads=16,
                            verbose=True,
                            flash_attn=True,
                            chat_format='chatml')

### r/Jokes dataset

In [None]:
joke = "Why did the scarecrow win an award? Because he was outstanding in his field!"

schema = {
    "type": "object",
    "properties": {
        "rating": {
            "type": "number",
            "minimum": 0,
            "maximum": 4,
            "description": "The rating of the joke, from 0 to 4.",
        }
    },
}

llm.create_chat_completion(    
    messages=[
    {
        "role": "system",
        "content": f"You are a joke evaluator that answers in JSON. Here's the json schema you must adhere to:\n{schema}",
    },
    {"role": "user",
        "content": f""" Your task is to evaluate jokes based on their funniness on a scale from 0 to 4, 
                        where 0 represents the least funny and 4 represents the most funny. 
                        \n "{joke}" """},
],
#response_format= {"type": "json_object"}, #"schema": schema}, # uncomment this line to enforce the schema slows down the completion
temperature=0.2,
top_p=0.9,
max_tokens=10,
)

Set it up as a function

In [None]:
joke = "Why did the scarecrow win an award? Because he was outstanding in his field!"

schema = {
    "type": "object",
    "properties": {
        "rating": {
            "type": "number",
            "minimum": 0,
            "maximum": 4,
            "description": "The rating of the joke, from 0 to 4.",
        }
    },
}

def rate_joke(joke: str):
    completion = llm.create_chat_completion(    
        messages=[
        {
            "role": "system",
            "content": f"You are a joke evaluator that answers in JSON. Here's the json schema you must adhere to:\n{schema}",
        },
        {"role": "user",
            "content": f""" Your task is to evaluate jokes based on their funniness on a scale from 0 to 4, where 0 represents the least funny and 4 represents the most funny.\n "{joke}" """},
    ],
    #response_format= {"type": "json_object"}, #"schema": schema}, # uncomment this line to enforce the schema
    temperature=0.2,
    top_p=0.9,
    max_tokens=10,
    )

    return (completion['choices'][0]['message']['content'])

rate_joke(joke)

In [None]:
# Load the data
data = pd.read_csv("../data/interim/ready_for_model.csv")
data = data[['joke_new', 'score_class']]
data = data.rename(columns = {'joke_new': 'text', 'score_class': 'label'})

# Subsample to speed up testing
data = data.sample(frac=0.25, random_state=42)

# Make all columns objects
data["text"] = data["text"].astype("object")
data["label"] = data["label"].astype(int)


# Split the data
train, test = train_test_split(data, test_size=0.2, random_state=42, shuffle=True, stratify=data["label"])
test, val = train_test_split(test, test_size=0.5, random_state=42, stratify=test["label"])

test['rating'] = test['text'].apply(rate_joke)

In [None]:
test['rating'].value_counts()

In [None]:
test['label'].value_counts()

In [None]:
# Extract the rating from the response
test['rating_n'] = test['rating'].str.extract('(\d+)')

# Drop rows where the rating is missing (e.g. the model did not return a rating)
test = test.dropna(subset=['rating_n'])

# Convert the rating to an integer
test['rating'] = test['rating'].astype(int)


accuracy_score(test['rating'], test['label'])


In [None]:
test['rating'].value_counts()

In [None]:
from sklearn.metrics import classification_report

print(classification_report(test['label'], test['rating']))

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

cm = confusion_matrix(test['label'], test['rating'])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1, 2, 3, 4])
disp.plot()
plt.show()

## AG News dataset

In [None]:
title = "Fears for T N pension after talks"
description = "Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul."

schema = {
    "type": "object",
    "properties": {
        "topic": {
            "type": "string",
            "enum": ["world", "sports", "business", "science/tech"],
            "description": "The topic of the news article.",
        }
    },
}

def rate_news(title: str, description: str):
    completion = llm.create_chat_completion(    
        messages=[
        {
            "role": "system",
            "content": f"You are a classifying expert that answers in JSON. Here's the json schema you must adhere to:\n{schema}",
        },
        {"role": "user",
            "content": f""" Your task is to classify news articles into one of the following topics: world, sports, business, or science/tech. Title:\n "{title}" \n Description:\n "{description}"""},
    ],
    #response_format= {"type": "json_object"}, #"schema": schema}, # uncomment this line to enforce the schema
    temperature=0.2,
    top_p=0.9,
    max_tokens=10,
    )

    return (completion['choices'][0]['message']['content'])

rate_news(title, description)

In [None]:
data = pd.read_csv('../data/external/ag_news_data.csv')
data.head()

In [None]:
data = data.sample(frac=1, random_state=42) # Adjust the fraction to speed up testing

In [None]:
# Dict of label to class
label2class = {1: 'world', 2: 'sports', 3: 'business', 4: 'science/tech'}
class2label = {v: k for k, v in label2class.items()}

In [None]:
# Rate news articles with columns Title and	Description
data['class'] = data.apply(lambda x: rate_news(x['Title'], x['Description']), axis=1)

In [None]:
data['class'].value_counts()

In [None]:
data['class_n']= data['class'].apply(lambda x: x.strip())
data['class_n'].value_counts()

In [None]:
# Remove some commen formatting issues
data['class_n'] = data['class_n'].apply(lambda x: x.replace("'", "\""))
data['class_n'] =data['class_n'].apply(lambda x: x.rstrip('"'))
data['class_n'] = data['class_n'].apply(lambda x: x.rstrip('}'))
data['class_n'] = data['class_n'].apply(lambda x: x + '}')
data['class_n'].value_counts()


In [None]:
# Use if JSON is not formatted correctly
# If one of the classes are in string add it to that class
def clean (x):
    if 'world' in x:
        return 'world'
    elif 'sports' in x:
        return 'sports'
    elif 'business' in x:
        return 'business'
    elif 'science/tech' in x:
        return 'science/tech'
    else:
        return 'unknown'

data['class_n'] = data['class_n'].apply(lambda x: clean(x))
data['class_n'].value_counts()

In [None]:
# Use if JSON is formatted correctly
def get_class_text(json_string):
    try:
        return json.loads(json_string)['topic']
    except:
        return 'unknown'

data['class_n'] = data['class_n'].apply(get_class_text)

data['class_n'].value_counts()

In [None]:
# map the class to the label
data['class_n'] = data['class_n'].map(class2label)

In [None]:
# Remove nan/Unknown classes
data = data.dropna(subset=['class_n'])

In [None]:
data['Class Index'].value_counts()

In [None]:
from sklearn.metrics import classification_report

print(classification_report(data['Class Index'], data['class_n']))