In [12]:
from openai import OpenAI
from tensorflow.keras.datasets import imdb
import numpy as np
from sklearn.metrics import accuracy_score


# IMDB dataset preparation
word_index = imdb.get_word_index()
reverse_word_index = {value: key for (key, value) in word_index.items()}


def decode_review(sequence):
    return " ".join([reverse_word_index.get(i - 3, "?") for i in sequence[1:]])


(train_x, train_y), (test_x, test_y) = imdb.load_data(num_words=100000)


# Reverse the word index to create a mapping from integer indices to words
reverse_word_index = {value: key for (key, value) in word_index.items()}
# Point to the local server
client = OpenAI(base_url="http://127.0.0.1:1234/v1", api_key="lm-studio")


# API access to the model
def prompt_model(prompt):
    completion = client.chat.completions.create(
        model="llama-3-8b-lexi-uncensored",
        messages=[
            {
                "role": "system",
                "content": "Decide whether the entire prompt you receive is either a positive or a negative movie review. Only answer with a single digit, use '1' for positive or with '0' for negativ. Do not answer with anything else than '1' or '0'.",
            },
            {"role": "user", "content": prompt},
        ],
        temperature=0,
    )
    return completion.choices[0].message.content


# Helper funtion to handle weird outputs
def convert_outputs(strings):
    result = []
    weird = 0
    skipped = 0
    skipped_indices = []

    for i, s in enumerate(strings):
        if s != "1" and s != "0":
            print(f"Weird format detected'{s}'.")
            weird += 1
        # Filter out any characters that are not '1' or '0'
        cleaned = "".join([char for char in s if char in "10"])

        # Convert to integer if the cleaned string is exactly "1" or "0"
        if cleaned == "1":
            result.append(1)
        elif cleaned == "0":
            result.append(0)
        else:
            # Handle unexpected cases if needed; here we skip them
            print(f"Warning: Unrecognized format '{s}', skipping.")
            skipped += 1
            skipped_indices.append(i)
    if skipped > 0:
        print(f"Skipped {skipped} outputs.")
    return result, skipped_indices

In [None]:
x = [decode_review(x) for x in test_x[:50]]
cats, ids = convert_outputs([prompt_model(i) for i in x])
print(f"Accuracy: {accuracy_score(train_y[:50], cats)}")

[1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0]


In [40]:
train_y

array([1, 0, 0, ..., 0, 1, 0], dtype=int64)

In [24]:
for i in cats:
    print(i)
    print("\n")

1.


0


0


I'll decide whether each sentence in the given text is a positive or negative movie review, and answer with '1' for positive or with '0' for negative. Here's the analysis:

1. "the scots excel at storytelling" - Positive (1)
2. "traditional sort many years after the event i can still see in my mind' eye an elderly lady" - Neutral
3. "my friend's mother retelling the battle of culloden she makes the characters come alive her passion is that of an eyewitness one to the events on the sodden heath a mile or so from where she lives" - Positive (1)
4. "of course it happened many years before she was born but you wouldn't guess from the way she tells it" - Positive (1)
5. "the same story is told in bars the length and breadth of scotland as i discussed it with a friend one night in mallaig" - Neutral
6. "a local cut in to give his version the discussion continued to closing time" - Positive (1)
7. "stories passed down like this become part of our being who doesn't remember the st

In [15]:
train_y[0]

1

In [None]:

print(completion.choices[0].message.content)
