In [1]:
##Functions and libraries
from openai import OpenAI
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np
from scipy.spatial.distance import cosine
from tensorflow.keras.datasets import imdb
from fpdf import FPDF
from sklearn.metrics import accuracy_score

##Point to the local server and the model to be used and set up the stable prompt contents
client = OpenAI(base_url="http://127.0.0.1:1234/v1", api_key="lm-studio")
model = SentenceTransformer("all-MiniLM-L6-v2")

system_prompt =  "Decide whether the entire prompt you receive is either a positive or a negative movie review. Only answer with a single digit, use '1' for positive or with '0' for negativ. Do not answer with anything else than '1' or '0'."
system_prompt_RAG = "Decide whether the entire prompt you receive is either a positive or a negative movie review. Only answer with a single digit, use '1' for positive or with '0' for negativ. Do not answer with anything else than '1' or '0'. Use the ratings of the following similar reviews to make your decision. "
user_prompt_stable =  "Decide whether the entire prompt you receive is either a positive or a negative movie review. Only answer with a single digit, use '1' for positive or with '0' for negativ. Do not answer with anything else than '1' or '0'. The review goes as follows: "

##Dataset
# Decode the review froms the dataset
def decode_reviews(sequences):
    
    return [
        " ".join([reverse_word_index.get(i - 3, "?") for i in sequence[1:]])
        for sequence in sequences
    ]


##Normal Prompting procedure
# API access to the model
def prompt_model(prompt):
    completion = client.chat.completions.create(
        model="llama-3-8b-lexi-uncensored",
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {"role": "user", "content": user_prompt_stable + prompt},
        ],
        temperature=0,
    )
    return completion.choices[0].message.content


##RAG Magic
# Create vector embeddings for the reviews
def encode_text_list(text_list):
    embeddings = [model.encode(text) for text in text_list]
    return embeddings


# return the indices of the top n relevant reviews for retrieval
def calculate_top_n_similarities(prompt, stored_embeddings, top_n=3):
    # Encode the new prompt
    prompt_embedding = model.encode(prompt)

    # Calculate cosine similarity with each stored embedding
    similarities = [
        1 - cosine(prompt_embedding, emb) for emb in stored_embeddings
    ]  # 1 - cosine_distance = cosine_similarity. The higher the value, the more similar the vectors are

    # Get the indices of the top n similarities
    top_n_indices = np.argsort(similarities)[-top_n:][::-1]

    return top_n_indices


# construct context query for the model
def integrate_knowledge(stored_reviews, top_n_indices):
    # Define ordinal words for readability
    ordinals = [
        "The most similar",
        "The second most similar",
        "The third most similar",
        "The fourth most similar",
        "The fifth most similar",
        "The sixth most similar",
        "The seventh most similar",
        "The eighth most similar",
        "The ninth most similar",
        "The tenth most similar",
    ]
    # Construct the formatted string by integrating the reviews in the correct order
    integrated_string = system_prompt_RAG
    for i, idx in enumerate(top_n_indices):
        # Get the ordinal word based on index position, or use "next" if ordinals are exceeded
        ordinal_word = ordinals[i] if i < len(ordinals) else "next"
        # Append the formatted review to the integrated string
        integrated_string += f"{ordinal_word} review, {stored_reviews[idx]}"

    return integrated_string.strip()


# RAG-prompts for the model
def RAG_prompt_model(prompt):
    top_n_indices = calculate_top_n_similarities(prompt, stored_embeddings, top_n=3)
    integragted_prompt = integrate_knowledge(review_list, top_n_indices)
    completion = client.chat.completions.create(
        model="llama-3-8b-lexi-uncensored",
        messages=[
            {
                "role": "system",
                "content": integragted_prompt,
            },
            {"role": "user", 
             "content": user_prompt_stable+ prompt},
        ],
        temperature=0,
    )
    return completion.choices[0].message.content


# Generate RAG document for AnythingLLM
def write_reviews_to_txt(reviews, ratings, output_filename="movie_reviews.txt"):
    # Check that both lists have the same length
    if len(reviews) != len(ratings):
        raise ValueError("The number of reviews and ratings must be the same.")

    try:
        with open(output_filename, "w", encoding="utf-8") as file:
            # Iterate through reviews and ratings
            for review, rating in zip(reviews, ratings):
                # Format the review text
                formatted_review = f'The review: "{review}" was rated "{rating}".\n'

                # Write the formatted review to the file
                file.write(formatted_review)
                file.write("\n")  # Add a blank line between reviews

        print(f"Text file '{output_filename}' has been created successfully.")
    except Exception as e:
        print(f"Error saving text file: {e}")


# generate list of reviews and ratings
def store_reviews_with_ratings(reviews, ratings):
    # Check that both lists have the same length
    if len(reviews) != len(ratings):
        raise ValueError("The number of text inputs and ratings must be the same.")

    # List to store the formatted reviews with ratings
    formatted_reviews = []

    # Iterate through reviews and ratings
    for review, rating in zip(reviews, ratings):
        # Format the review text
        formatted_review = f'the review: "{review}" was rated "{rating}".'

        # Append the formatted review to the list
        formatted_reviews.append(formatted_review)

    return formatted_reviews


# Working in bigger batches with progress feedback
def batch_prompt_model(prompts, mode="normal"):
    results = []
    for p in prompts:
        if mode == "normal":
            results.append(prompt_model(p))
        elif mode == "RAG":
            results.append(RAG_prompt_model(p))
        else:
            raise ValueError(f"Invalid mode '{mode}'.")
        if len(results) % 10 == 0:
            print(f"Completed {len(results)} prompts.")
    return results


##Handling unusual outputs
# Helper funtion to handle weird outputs
def convert_outputs(strings):
    result = []
    weird = 0
    skipped = 0
    skipped_indices = []

    for i, s in enumerate(strings):
        if s != "1" and s != "0":
            weird += 1
        # Filter out any characters that are not '1' or '0'
        cleaned = "".join([char for char in s if char in "10"])

        # Convert to integer if the cleaned string is exactly "1" or "0"
        if cleaned == "1":
            result.append(1)
        elif cleaned == "0":
            result.append(0)
        else:
            # Handle unexpected cases if needed; here we skip them
            print(f"Warning: Unrecognized format '{s}', skipping.")
            print("")
            skipped += 1
            skipped_indices.append(i)
    if weird > 0:
        print(f"This batch query produced {weird} weird outputs.")
    if skipped > 0:
        print(
            f"Additionally, it skipped {skipped} outputs that did not contain 1 or 0 at all."
        )
    return result, skipped_indices


# delete the skipped indices from the list if needed
def clean_y(y, skipped_indices):
    if len(skipped_indices) > 0:
        print(f"Warning: removing {len(skipped_indices)} skipped outputs from y.")
        return [y[i] for i in range(len(y)) if i not in skipped_indices]
    else:
        return y

  from tqdm.autonotebook import tqdm, trange







We will test normal, non-augemented prompting first.
The dataset contains movie reviews either rated positive or negative

In [2]:
##Dataset
# IMDB dataset preparation
# Reverse the word index to create a mapping from integer indices to words
word_index = imdb.get_word_index()
reverse_word_index = {value: key for (key, value) in word_index.items()}

#Load dataset
(train_x, train_y), (test_x, test_y) = imdb.load_data(num_words=100000, seed=None)
set_size = 10
x = decode_reviews(test_x[:set_size])

In [3]:
#Select a subset of the dataset and try the basic model
cats, ids = convert_outputs(batch_prompt_model(x))
valid_test_y = clean_y(test_y[:set_size], ids)
print(f"Accuracy of standard prompting: {accuracy_score(valid_test_y, cats)}")

Completed 10 prompts.
Accuracy of standard prompting: 1.0


No we will test the simple RAG implementation set-up in this script

In [5]:
#Generate RAG Knowledge-Base
RAG_size = 1000
train_y_RAG = ["positive" if i == 1 else "negative" for i in train_y]
review_list = store_reviews_with_ratings(decode_reviews(train_x[:RAG_size]), train_y_RAG[:RAG_size])
stored_embeddings = encode_text_list(review_list)

In [6]:
cats, ids = convert_outputs(batch_prompt_model(x, mode="RAG"))
valid_test_y = clean_y(test_y[:set_size], ids)
print(f"Accuracy of RAG prompting: {accuracy_score(valid_test_y, cats)}")

Completed 10 prompts.
Accuracy of RAG prompting: 1.0
