In [1]:
from llama_cpp import Llama
import os
import json
import pandas as pd
import httpx
from sklearn.metrics import accuracy_score

### Pre Process Input Data

In [5]:
# DEFINTE PATHS
LYRICS_DIR = "processed_data/lyrics"
ANNOTATIONS_PATH = "processed_data/CLEANED_cal500_annotations.csv"
MODEL_PATH_MISTRAL = "models/mistral-7b-instruct-v0.2.Q3_K_M.gguf"
MODEL_PATH_LLAMA =  "models/llama-2-7b-chat.Q4_0.gguf"
MODEL_PATH_PHI = "models/Phi-3-mini-4k-instruct-fp16.gguf"
TEST_OUTCOMES_CHROMA = "processed_data/y_test.csv"
TEST_PRED_CHROMA = "processed_data/y_pred_chroma.csv"

In [6]:
# Define a function to read lyrics from a folder
def read_files_in_folder(folder_path):
    file_contents = []
    file_names = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if os.path.isfile(file_path):
            with open(file_path, "r") as file:
                content = file.read()
                file_contents.append(content)
                file_names.append(filename.replace('.txt', ''))
    return pd.Series(file_contents, index=file_names)

In [7]:
lyrics = read_files_in_folder(LYRICS_DIR)
annotations = pd.read_csv(ANNOTATIONS_PATH)
annotations.rename(columns={'0': 'track_name'}, inplace=True)

In [8]:
annotations.head()

Unnamed: 0,track_name,Emotion-Angry_/_Agressive,NOT-Emotion-Angry_/_Agressive,Emotion-Arousing_/_Awakening,NOT-Emotion-Arousing_/_Awakening,Emotion-Bizarre_/_Weird,NOT-Emotion-Bizarre_/_Weird,Emotion-Calming_/_Soothing,NOT-Emotion-Calming_/_Soothing,Emotion-Carefree_/_Lighthearted,...,Genre-Best-World,Instrument_-_Acoustic_Guitar-Solo,Instrument_-_Electric_Guitar_(clean)-Solo,Instrument_-_Electric_Guitar_(distorted)-Solo,Instrument_-_Female_Lead_Vocals-Solo,Instrument_-_Harmonica-Solo,Instrument_-_Male_Lead_Vocals-Solo,Instrument_-_Piano-Solo,Instrument_-_Saxophone-Solo,Instrument_-_Trumpet-Solo
0,10cc-for_you_and_i,0,1,0,1,0,1,1,0,0,...,0,0,0,0,0,0,1,0,0,0
1,2pac-trapped,1,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0
2,5th_dimension-one_less_bell_to_answer,0,0,0,0,0,1,1,0,0,...,0,1,0,0,0,0,0,0,0,0
3,a_tribe_called_quest-bonita_applebum,0,1,0,1,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,aaron_neville-tell_it_like_it_is,0,1,0,1,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
y_test = pd.read_csv(TEST_OUTCOMES_CHROMA).set_index('audio_path')
y_pred = pd.read_csv(TEST_PRED_CHROMA).set_index('audio_path')

In [7]:
y_pred.head()

Unnamed: 0_level_0,Emotion-Happy,Emotion-Positive_/_Optimistic,Emotion-Sad,Emotion-Emotional_/_Passionate,Emotion-Angry_/_Agressive,Emotion-Exciting_/_Thrilling,Emotion-Laid-back_/_Mellow,Emotion-Loving_/_Romantic,Emotion-Cheerful_/_Festive,Emotion-Bizarre_/_Weird
audio_path,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
rage_against_the_machine-maggies_farm,0,0,0,0,0,1,0,0,0,0
blood_sweat_and_tears-sometimes_in_winter,0,0,0,0,0,0,0,0,0,0
roy_orbison-running_scared,0,0,0,0,0,0,1,0,0,0
creedence_clearwater_revival-travelin_band,1,0,0,0,0,1,0,0,0,0
built_to_spill-i_would_hurt_a_fly,0,0,0,0,0,0,0,0,0,0


In [8]:
annotations_to_use = [
    "Emotion-Happy",
    "Emotion-Positive_/_Optimistic",
    "Emotion-Sad",
    "Emotion-Emotional_/_Passionate",
    "Emotion-Angry_/_Agressive",
    "Emotion-Exciting_/_Thrilling",
    "Emotion-Laid-back_/_Mellow",
    "Emotion-Loving_/_Romantic",
    "Emotion-Cheerful_/_Festive",
    "Emotion-Bizarre_/_Weird",
]

annotations = annotations[annotations_to_use]

### Initiate LLM

In [13]:
%%time
default_model = Llama(
    model_path=MODEL_PATH_MISTRAL,
    device="cuda",
    n_gpu_layers=-1,
    n_ctx=2048,
    verbose=False,
)

# default_model = Llama(
#     model_path=MODEL_PATH_LLAMA,
#     device="cuda",
#     n_gpu_layers=-1,
#     n_ctx=2048,
#     verbose=False,
# )

# default_model = Llama(
#     model_path=MODEL_PATH_PHI,
#     device="cuda",
#     n_gpu_layers=-1,
#     n_ctx=2048,
#     verbose=False,
# )

CPU times: user 103 ms, sys: 458 ms, total: 560 ms
Wall time: 9.99 s


In [15]:
prompt_template_default = """
[INST]

<<SYS>>
You are a helpful assistant. 
Always answer the user's questions in brief and clear sentences. 
Keep your outputs short while being helpful.
<</SYS>>

%%%prompt%%%

[/INST]
"""

In [16]:
def query_llm(prompt, enforce_json=True, model=default_model):
    if enforce_json is True:
        response = model.create_chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": f"""
                You are a sentiment analysis model. 
                You are asked to analyze the sentiment of a song based on its lyrics.
                You have to predict the following emotions with a boolean value: 
                {", ".join(annotations_to_use)}
                """,
                },
                {"role": "user", "content": lyrics[1]},
            ],
            response_format={
                "type": "json_object",
                "schema": {
                    "type": "object",
                    "properties": dict.fromkeys(annotations_to_use, {"type": "boolean"}),
                    "required": annotations_to_use,
                },
            },
            temperature=0.7,
        )
        return response["choices"][0]["message"]["content"]
    else:
        response = model(
            prompt_template_default.replace("%%%prompt%%%", prompt),
            max_tokens=20000,
            temperature=0.7,
        )
        return response["choices"][0]["text"]

In [17]:
%%time
query_llm("Who is the Prime Minister of India? Answer in 1 sentence", False, default_model)

CPU times: user 2min 7s, sys: 7.44 s, total: 2min 14s
Wall time: 12.5 s


'Narendra Modi is the current Prime Minister of India (as of 2023).'

### Testing Lyrics

In [18]:
def process_json_output(json_output):
    json_output = json.loads(json_output)
    json_output = {k: int(v) for k, v in json_output.items()}
    json_output = pd.Series(json_output)
    return json_output

def test_by_index(index):
    test_output = query_llm(lyrics.iloc[index], True, default_model)
    test_output = process_json_output(test_output)
    ground_truth = (annotations.iloc[index])

    print(ground_truth.compare(test_output))
    accuracy = accuracy_score(ground_truth, test_output)
    accuracy = round(accuracy, 3)
    return accuracy

### Manual Tests

In [19]:
%%time
print(test_by_index(1))

  {"role": "user", "content": lyrics[1]},


                               self  other
Emotion-Positive_/_Optimistic   0.0    1.0
Emotion-Angry_/_Agressive       1.0    0.0
Emotion-Exciting_/_Thrilling    1.0    0.0
0.7
CPU times: user 15.5 s, sys: 3.69 s, total: 19.2 s
Wall time: 19.2 s


In [51]:
%%time
print(test_by_index(500))

  {"role": "user", "content": lyrics[1]},


                                self  other
Emotion-Emotional_/_Passionate   0.0    1.0
0.9
CPU times: user 9.09 s, sys: 1.06 s, total: 10.1 s
Wall time: 10.2 s


In [52]:
%%time
print(test_by_index(69))

  {"role": "user", "content": lyrics[1]},


                                self  other
Emotion-Positive_/_Optimistic    0.0    1.0
Emotion-Sad                      1.0    0.0
Emotion-Emotional_/_Passionate   0.0    1.0
Emotion-Angry_/_Agressive        1.0    0.0
Emotion-Exciting_/_Thrilling     1.0    0.0
Emotion-Cheerful_/_Festive       0.0    1.0
Emotion-Bizarre_/_Weird          1.0    0.0
0.3
CPU times: user 9.52 s, sys: 866 ms, total: 10.4 s
Wall time: 10.4 s


In [53]:
%%time
print(test_by_index(43))

  {"role": "user", "content": lyrics[1]},


                               self  other
Emotion-Happy                   1.0    0.0
Emotion-Positive_/_Optimistic   0.0    1.0
Emotion-Cheerful_/_Festive      1.0    0.0
0.7
CPU times: user 10.4 s, sys: 769 ms, total: 11.2 s
Wall time: 11.2 s


In [54]:
%%time
print(test_by_index(43))

  {"role": "user", "content": lyrics[1]},


                               self  other
Emotion-Happy                   1.0    0.0
Emotion-Positive_/_Optimistic   0.0    1.0
0.8
CPU times: user 8.36 s, sys: 679 ms, total: 9.03 s
Wall time: 9.04 s


Intial Test Accuracy:
| index | llama2-7b | mistral-7b |
|-------|--------|---------|
|   1   |   83    |    88    |
|  500  |   27    |    66    |
|   69  |   77    |    72    |
|   43  |   72    |    66    |
|   99  |   38    |    72    |
|---|---|---|
| Avg: |   59.4  |    72.8    |

Hence, we can see llama2 is better than mistral in most cases, but mistral is very consistent across the board.

## Comparision Testing

In [128]:
def test_data(X_test):
    """
    - Get dataframe
    - Apply function to return 
    """
    outputs = X_test.apply(lambda x: json.loads(query_llm(x, True, default_model)))
    # output_df = outputs.apply(pd.Series).astype(int)

    return outputs.apply(pd.Series).astype(int)

In [133]:
%%time
y_pred = test_data(X_test)
y_pred.to_csv('../processed_data/y_pred_lyrics.csv')
y_pred

  {"role": "user", "content": lyrics[1]},


CPU times: user 16min 51s, sys: 1min 5s, total: 17min 56s
Wall time: 18min


Unnamed: 0,Emotion-Happy,Emotion-Positive_/_Optimistic,Emotion-Sad,Emotion-Emotional_/_Passionate,Emotion-Angry_/_Agressive,Emotion-Exciting_/_Thrilling,Emotion-Laid-back_/_Mellow,Emotion-Loving_/_Romantic,Emotion-Cheerful_/_Festive,Emotion-Bizarre_/_Weird
electric_frankenstein-teenage_shutdown,0,1,0,1,0,0,0,0,0,0
pj_harvey-dry,0,0,1,1,0,0,0,0,0,0
curandero-aras,0,1,0,1,0,0,0,0,0,0
jewel-enter_from_the_east,0,1,0,1,0,0,0,0,0,0
kourosh_zolani-peaceful_planet,0,1,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
red_hot_chili_peppers-give_it_away,0,1,0,1,0,0,0,0,1,0
thin_lizzy-dont_believe_a_word,0,1,0,1,0,0,0,0,0,0
glen_bledsoe-p_pop,0,1,0,1,0,0,0,0,0,0
bots-take_the_power_back,0,0,1,1,0,0,0,0,0,0


In [147]:
y_pred.to_csv('../processed_data/y_pred_lyrics.csv')