In [1]:
from llama_cpp import Llama
fast_llm = Llama(
  model_path="/home/p.kuznetsov/script/test/mistral-7b-instruct-v0.2.Q5_K_M.gguf", 
  n_ctx=2048,  # lets not be greedy
  n_threads=4,           
  n_gpu_layers=-1,
  main_gpu = 1,
  verbose=False
)

ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no
ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes
ggml_init_cublas: found 2 CUDA devices:
  Device 0: NVIDIA GeForce RTX 2080 Ti, compute capability 7.5, VMM: yes
  Device 1: NVIDIA GeForce RTX 2080 Ti, compute capability 7.5, VMM: yes
llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /home/p.kuznetsov/script/test/mistral-7b-instruct-v0.2.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:  

In [2]:
from llama_cpp import Llama
larger_llm = Llama(
  model_path="/home/p.kuznetsov/script/test/mistral-7b-instruct-v0.2.Q5_K_M.gguf", 
  n_ctx=5600,  # lets not be greedy
  n_threads=4,           
  n_gpu_layers=-1,
  main_gpu = 1,
  verbose=False
)

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /home/p.kuznetsov/script/test/mistral-7b-instruct-v0.2.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loade

In [3]:
import re
import json

def genre(text, genres):
    genres_text = ", ".join(str(item) for item in genres)
    
    prompt = f"""Given a movie plot summary identify the single most predominant genre from the following list: {genres_text}.
    Your output should be a formatted json with the field "genre" set to the identified genre from the list. Include only one genre.
Example 1:
Input: In a dystopian future, a young hacker stumbles upon a government conspiracy and must risk everything to expose the truth. 
Output: {{"genre":"Triller"}}
Example 2:
Input: A clumsy, aspiring chef inherits a struggling restaurant and, with the help of her quirky family, whips up delicious dishes and heartwarming community spirit.
Output: {{"genre":"Comedy"}}
Example 3:
Input: A seasoned detective investigates a series of gruesome murders and races against time to stop the killer before they strike again.
Output: {{"genre":"Crime Thriller"}}

You will output in JSON format, without any other text.
"""
    
    import tiktoken

    encoding = tiktoken.get_encoding("cl100k_base")
    def count_tokens(text):
        return len(encoding.encode(text))

    # Count the tokens in the system message and user content
    system_tokens = count_tokens(prompt)
    user_tokens = count_tokens("Sentence:"+text)

    # Calculate the total prompt tokens
    total_tokens = system_tokens + user_tokens
    
    if total_tokens > 1700:
        llm = larger_llm
    else:
        llm = fast_llm
    messages = [
        {"role": "system", "content": prompt},
        {"role": "user", "content": "Sentence:"+text}
    ]
    iterations = 0
    while iterations < 8:
        iterations += 1
        resp = llm.create_chat_completion(
            messages=messages,
            temperature = 0.01,
            max_tokens = 100,
        )
        response = resp['choices'][0]['message']['content']
        try:
            match = re.search(r'{(.+?)}', response)
            if match:
                response = "{" + match.group(1) + "}"
            else:
                raise json.JSONDecodeError("No json found", response, 0)

            genre = json.loads(response)['genre']
            if genre in genres:
                return genre.replace(",", "")
            else:
                print("Genre not in list")
                print(response)
                messages.append(
                    {
                        "role": "user",
                        "content": f"Please select only one genre from the list with the exact wording: {genres_text}"
                    }
                )
        except json.JSONDecodeError:
            print("JSON Decode Error")
            print(response)
            messages.append(
                {
                    "role": "user",
                    "content": "Please follow the json format, do not include any other text."
                }
            )
    return resp["choices"][0]["message"]["content"]

In [4]:
import pandas as pd
train = pd.read_csv('/home/p.kuznetsov/script/test/train_data.csv')

In [5]:
column_names = train.columns

# Columns to remove
columns_to_remove = ['id', 'plot', 'title']

# Create a new list of column names without the specified columns
genres = [col for col in column_names if col not in columns_to_remove]

In [None]:
test = pd.read_csv('test_features.csv')
test = test.head(100)
for column in genres:
    test[column] = 0

In [None]:
import time

start = time.time()
test['result'] = test['plot'].apply(lambda x: genre(x, genres))
end = time.time()

errors = 0
def update_column(row):
    column_to_update = row['result']
    if column_to_update in genres:
        test.at[row.name, column_to_update] = 1
    else:
        global errors
        errors += 1

# Apply the function to each row
test.apply(lambda row: update_column(row), axis=1)

# Drop the temporary 'result' column
# test = test.drop('result', axis=1)

print(errors)
print(end - start)

In [8]:
y_pred = pd.DataFrame(columns=genres)
train = train.head(10)
for index, row in train.iterrows():
    predicted = genre(row['plot'], genres)
    y_pred.at[index, predicted] = 1

from sklearn.metrics import f1_score
y_pred = y_pred.fillna(0)
score = f1_score(train[genres], y_pred[genres], average='micro')
print(score)

Genre not in list
{"genre":"Thriller, Crime Fiction"}
Genre not in list
{"genre":"Mystery, Crime Thriller"}
JSON Decode Error
 Based on the given input, the genre would be: "Science Fiction".
0.2608695652173913
