# Execute GPT requests with RAG

- TODO: Format different function calls in ./tools/templates
- TODO: Preprocess data from non-movie sets
- TODO: Make gpt call separate module, to swap out with local LLM
- TODO: Add better eval metrics

In [1]:
! pip install openai langchain -q

In [37]:
import sys
sys.path.append('tools')

from rows import extract_rows
from preferences import format_preferences

from sklearn.metrics import f1_score, recall_score, roc_auc_score

import os
import json
import csv

opai_api_key = "YOUR-OPENAI-KEY"

In [38]:
# Retrieve samples at random from formatted dataset
sample_size = 500
rows = extract_rows(sample_size)

In [39]:
# Print all reviews by this user
first = list(rows[0].values())[0]

for review in first:
    print(f"Title: {review['title']}")
    print(f"Review: {review['reviewText']}")

Title: Black Rain VHS
Review: A great movie starring Michael Douglas and Andy Garcia as two New York Detectives that witnessed a Japanese criminal murder a Japanese man in New York, and was selected to return the prisoner to Japan.  Upon arrival a fake Japanese police officer took the prisoner which both men swore to track the killer down in Japan.  Garcia was killed by some of the thugs during their quest for the location of the killer.  Ken Takakura was the Japanese liaison and worked with the detectives to find the killer.  Kate Capshaw was a gorgeous Amerikan lady that worked in a expensive Japanese club that helped the boys in their search for missing plates for printing Amerikan counterfit dollars and brought a climax of a large shootout and capture of the killer.
Excellent movie.
Title: War
Review: Hero is a FBI po-leese man whose  asian partner gets murdered along with his family and burned with his home, after gettin' too close to the big assassins.  Got between two Asian gang

In [40]:
prefs = format_preferences(rows[0])
print(json.dumps(prefs, indent=4))

## Define model and function calling
**Only run one of the below cells**

In [42]:
from langchain.prompts.chat import ChatPromptTemplate
from langchain_community.chat_models import ChatOpenAI

In [43]:
# Defines Prediction + Explanation chain

prompt = ChatPromptTemplate.from_template("You are a reccomender system. User Liked {likes}, User Disliked {dislikes}. Will the user like {target}?")
model = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=opai_api_key)

functions = [
    {
        "name": "reccomendation",
        "description": "A reccomender system",
        "parameters": {
            "type": "object",
            "properties": {
                "reccomend": {
                    "type": "boolean",
                    "description": "Whether the user will like the target"
                },
                "explanation": {
                    "type": "string",
                    "description": "Step-by-step rationale for your reccomendation, including user demographics. 3 sentences",
                },
            },
            "required": ["reccomend", "explanation"],
        },
    }
]
chain = prompt | model.bind(function_call={"name": "reccomendation"}, functions=functions)

### Run inference and evaluate

In [48]:
pred = []
truth = []
title = []
explanations = []

evaluated = 0

# Run inference on the chain
for i, user in enumerate(rows):    
    try:
        prefs = format_preferences(user)
    except Exception as e:
        print(f"Error: {e} for user {user}")
    response = chain.invoke(prefs)
    
    prediction = json.loads(response.additional_kwargs['function_call']['arguments'])
    
    # Log response
    pred.append(prediction['reccomend'])
    truth.append(prefs['truth'])
    title.append(prefs['target'])
    explanations.append(response.additional_kwargs['function_call']['arguments'])
    
    print(json.dumps(prefs, indent=4))
    print(f"pred: {prediction['reccomend']}, {prediction['explanation']}")
    evaluated += 1
    
    break


{
    "likes": [
        "Black Rain VHS",
        "War",
        "Deadwood: Season 2",
        "Chaos",
        "The Tom Selleck Western Collection",
        "Jericho - The Complete Series",
        "The Tom Selleck Western Collection",
        "Jericho - The Complete Series",
        "Star Trek",
        "Blue Bloods: Season 1",
        "13",
        "Band of Brothers(Elite SC/BD+DCExp12-21)",
        "Jesse Stone: Lost in Paradise"
    ],
    "dislikes": [
        "Revolver (2005) (Region 2)",
        "Crank 2 [Theatical Release] [Theatrical Release]",
        "Born to Battle"
    ],
    "target": "Star Trek XI",
    "truth": true
}
pred: True, Based on the user's liking for similar action and drama movies, as well as their interest in TV series like 'Star Trek' and 'Jericho', it is likely that they will enjoy 'Star Trek XI'.


## WIP

In [None]:
# f1 = f1_score(pred, truth)
# recall = recall_score(pred, truth)
# auc = roc_auc_score(pred, truth)

In [None]:
# print(f1, recall, auc)

In [None]:
# # Combine collected data into a list of tuples
# data = list(zip(pred, truth, title, explanations))

# # Define the file name for the CSV
# csv_filename = 'data_output.csv'

# # Save data to a CSV file
# with open(csv_filename, 'w', newline='') as csvfile:
#     csv_writer = csv.writer(csvfile)
    
#     # Write header
#     csv_writer.writerow(['Prediction', 'Truth', 'Title', 'Explanation'])
    
#     # Write rows
#     csv_writer.writerows(data)

# print(f"Data saved to {csv_filename}")