In [1]:
from llama_cpp import Llama
import os
import json
import pandas as pd
import httpx
from sklearn.metrics import accuracy_score

### Pre Process Input Data

In [12]:
# DEFINTE PATHS
LYRICS_DIR = "../processed_data/lyrics"
ANNOTATIONS_PATH = "../processed_data/CLEANED_cal500_annotations.csv"
MODEL_PATH_MISTRAL = "../models/mistral-7b-instruct-v0.2.Q3_K_M.gguf"
MODEL_PATH_LLAMA =  "../models/llama-2-7b-chat.Q4_0.gguf"

In [13]:
# Define a function to read lyrics from a folder
def read_files_in_folder(folder_path):
    file_contents = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if os.path.isfile(file_path):
            with open(file_path, "r") as file:
                content = file.read()
                file_contents.append(content)
    return pd.Series(file_contents)

In [14]:
annotations_to_use = [
    "Emotion-Angry_/_Agressive",
    "Emotion-Arousing_/_Awakening",
    "Emotion-Bizarre_/_Weird",
    "Emotion-Calming_/_Soothing",
    "Emotion-Carefree_/_Lighthearted",
    "Emotion-Cheerful_/_Festive",
    "Emotion-Emotional_/_Passionate",
    "Emotion-Exciting_/_Thrilling",
    "Emotion-Happy",
    "Emotion-Laid-back_/_Mellow",
    "Emotion-Light_/_Playful",
    "Emotion-Loving_/_Romantic",
    "Emotion-Pleasant_/_Comfortable",
    "Emotion-Positive_/_Optimistic",
    "Emotion-Powerful_/_Strong",
    "Emotion-Sad",
    "Emotion-Tender_/_Soft",
    "Emotion-Touching_/_Loving",
]

In [15]:
lyrics = read_files_in_folder(LYRICS_DIR)
annotations = pd.read_csv(ANNOTATIONS_PATH)
annotations = annotations[annotations_to_use]

### Initiate LLM

In [16]:
%%time
default_model = Llama(
    model_path=MODEL_PATH_MISTRAL,
    device="cuda",
    n_gpu_layers=-1,
    n_ctx=2048,
    verbose=False,
)

# default_model = Llama(
#     model_path=MODEL_PATH_LLAMA,
#     device="cuda",
#     n_gpu_layers=-1,
#     n_ctx=2048,
#     verbose=False,
# )

ggml_cuda_init: GGML_CUDA_FORCE_MMQ:   no
ggml_cuda_init: CUDA_USE_TENSOR_CORES: yes
ggml_cuda_init: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 3050 Laptop GPU, compute capability 8.6, VMM: yes


CPU times: user 4.95 s, sys: 825 ms, total: 5.78 s
Wall time: 22.6 s


In [17]:
prompt_template_default = """
[INST] 

<<SYS>>
You are a helpful assistant. 
Always answer the user's questions in brief and clear sentences. 
Keep your outputs short while being helpful.
<</SYS>>

%%%prompt%%%

[/INST]
"""

In [18]:
def query_llm(prompt, enforce_json=True, model=default_model):
    if enforce_json is True:
        response = model.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": """
                You are a sentiment analysis model. You are asked to analyze the sentiment of a song based on its lyrics.
                """,
                },
                {"role": "user", "content": lyrics[1]},
            ],
            response_format={
                "type": "json_object",
                "schema": {
                    "type": "object",
                    "properties": {
                        "Emotion-Angry_/_Agressive": {"type": "boolean"},
                        "Emotion-Arousing_/_Awakening": {"type": "boolean"},
                        "Emotion-Bizarre_/_Weird": {"type": "boolean"},
                        "Emotion-Calming_/_Soothing": {"type": "boolean"},
                        "Emotion-Carefree_/_Lighthearted": {"type": "boolean"},
                        "Emotion-Cheerful_/_Festive": {"type": "boolean"},
                        "Emotion-Emotional_/_Passionate": {"type": "boolean"},
                        "Emotion-Exciting_/_Thrilling": {"type": "boolean"},
                        "Emotion-Happy": {"type": "boolean"},
                        "Emotion-Laid-back_/_Mellow": {"type": "boolean"},
                        "Emotion-Light_/_Playful": {"type": "boolean"},
                        "Emotion-Loving_/_Romantic": {"type": "boolean"},
                        "Emotion-Pleasant_/_Comfortable": {"type": "boolean"},
                        "Emotion-Positive_/_Optimistic": {"type": "boolean"},
                        "Emotion-Powerful_/_Strong": {"type": "boolean"},
                        "Emotion-Sad": {"type": "boolean"},
                        "Emotion-Tender_/_Soft": {"type": "boolean"},
                        "Emotion-Touching_/_Loving": {"type": "boolean"},
                    },
                    "required": [
                        "Emotion-Angry_/_Agressive",
                        "Emotion-Arousing_/_Awakening",
                        "Emotion-Bizarre_/_Weird",
                        "Emotion-Calming_/_Soothing",
                        "Emotion-Carefree_/_Lighthearted",
                        "Emotion-Cheerful_/_Festive",
                        "Emotion-Emotional_/_Passionate",
                        "Emotion-Exciting_/_Thrilling",
                        "Emotion-Happy",
                        "Emotion-Laid-back_/_Mellow",
                        "Emotion-Light_/_Playful",
                        "Emotion-Loving_/_Romantic",
                        "Emotion-Pleasant_/_Comfortable",
                        "Emotion-Positive_/_Optimistic",
                        "Emotion-Powerful_/_Strong",
                        "Emotion-Sad",
                        "Emotion-Tender_/_Soft",
                        "Emotion-Touching_/_Loving",
                    ],
                },
            },
            temperature=0.7,
        )
        return response["choices"][0]["message"]["content"]
    else:
        response = model(
            prompt_template_default.replace("%%%prompt%%%", prompt),
            max_tokens=20000,
            temperature=0.7,
        )
        return response["choices"][0]["text"]

In [19]:
%%time
query_llm("Who is the Prime Minister of India? Answer in 1 sentence", False, default_model)

CPU times: user 6.23 s, sys: 2.19 s, total: 8.42 s
Wall time: 8.66 s


'The current Prime Minister of India is Narendra Modi (as of my knowledge up to 2021).'

### Testing Lyrics

In [20]:
lyric_testing_template = """
### Lyrics:

%%%prompt%%%

### Prompt:
Fill up the following JSON with 0 or 1 value with respect to the song lyrics. 
0 means the song does not contain the emotion/genre/usage and 1 means the song contains the emotion/genre/usage.
Make sure to fill up every JSON key wiht a value of 0 or 1.
If no lyrics are present, respond with an empty JSON object.

### JSON
{
    "Emotion-Angry_/_Agressive": ""
    "Emotion-Arousing_/_Awakening": ""
    "Emotion-Bizarre_/_Weird": ""
    "Emotion-Calming_/_Soothing": ""
    "Emotion-Carefree_/_Lighthearted": ""
    "Emotion-Cheerful_/_Festive": ""
    "Emotion-Emotional_/_Passionate": ""
    "Emotion-Exciting_/_Thrilling": ""
    "Emotion-Happy": ""
    "Emotion-Laid-back_/_Mellow": ""
    "Emotion-Light_/_Playful": ""
    "Emotion-Loving_/_Romantic": ""
    "Emotion-Pleasant_/_Comfortable": ""
    "Emotion-Positive_/_Optimistic": ""
    "Emotion-Powerful_/_Strong": ""
    "Emotion-Sad": ""
    "Emotion-Tender_/_Soft": ""
    "Emotion-Touching_/_Loving": ""
}
"""

In [21]:
def process_json_output(json_output):
    json_output = json.loads(json_output)
    json_output = {k: int(v) for k, v in json_output.items()}
    json_output = pd.Series(json_output)
    return json_output

def test_by_index(index):
    test_output = query_llm(lyrics[index], True, default_model)
    test_output = process_json_output(test_output)
    ground_truth = (annotations.iloc[index])

    print(ground_truth.compare(test_output))
    accuracy = accuracy_score(ground_truth, test_output)
    accuracy = round(accuracy, 3)
    return accuracy

### Manual Tests

In [22]:
%%time
print(test_by_index(1))

                              self  other
Emotion-Arousing_/_Awakening   1.0    0.0
Emotion-Exciting_/_Thrilling   1.0    0.0
0.889
CPU times: user 19.8 s, sys: 4.15 s, total: 24 s
Wall time: 24.1 s


In [23]:
%%time
print(test_by_index(500))

                                 self  other
Emotion-Angry_/_Agressive         0.0    1.0
Emotion-Carefree_/_Lighthearted   1.0    0.0
Emotion-Emotional_/_Passionate    0.0    1.0
Emotion-Happy                     1.0    0.0
Emotion-Light_/_Playful           1.0    0.0
Emotion-Positive_/_Optimistic     1.0    0.0
0.667
CPU times: user 18 s, sys: 1.34 s, total: 19.3 s
Wall time: 19.4 s


In [24]:
%%time
print(test_by_index(69))

                                self  other
Emotion-Bizarre_/_Weird          1.0    0.0
Emotion-Emotional_/_Passionate   0.0    1.0
Emotion-Exciting_/_Thrilling     1.0    0.0
Emotion-Powerful_/_Strong        1.0    0.0
Emotion-Sad                      1.0    0.0
0.722
CPU times: user 21.8 s, sys: 1.39 s, total: 23.2 s
Wall time: 23.2 s


In [25]:
%%time
print(test_by_index(43))

                                 self  other
Emotion-Angry_/_Agressive         0.0    1.0
Emotion-Carefree_/_Lighthearted   1.0    0.0
Emotion-Cheerful_/_Festive        1.0    0.0
Emotion-Happy                     1.0    0.0
Emotion-Pleasant_/_Comfortable    1.0    0.0
Emotion-Powerful_/_Strong         0.0    1.0
0.667
CPU times: user 16.1 s, sys: 997 ms, total: 17.1 s
Wall time: 17.1 s


In [26]:
%%time
print(test_by_index(43))

                                 self  other
Emotion-Angry_/_Agressive         0.0    1.0
Emotion-Carefree_/_Lighthearted   1.0    0.0
Emotion-Cheerful_/_Festive        1.0    0.0
Emotion-Happy                     1.0    0.0
Emotion-Pleasant_/_Comfortable    1.0    0.0
0.722
CPU times: user 18.6 s, sys: 1.22 s, total: 19.8 s
Wall time: 19.9 s


Intial Test Accuracy:
| index | llama2-7b | mistral-7b |
|-------|--------|---------|
|   1   |   83    |    88    |
|  500  |   27    |    66    |
|   69  |   77    |    72    |
|   43  |   72    |    66    |
|   99  |   38    |    72    |
|---|---|---|
| Avg: |   59.4  |    72.8    |

Hence, we can see llama2 is better than mistral in most cases, but mistral is very consistent across the board.