# Experimenting with Self-Reflection Mechanism & Recommendation Systems


## Git Dependencies

In [1]:
!git clone github.com/AsafAvr/RefRec.git

Cloning into 'RefRec'...
remote: Enumerating objects: 96, done.[K
remote: Counting objects: 100% (44/44), done.[K
remote: Compressing objects: 100% (36/36), done.[K
remote: Total 96 (delta 12), reused 26 (delta 3), pack-reused 52[K
Receiving objects: 100% (96/96), 49.63 MiB | 11.40 MiB/s, done.
Resolving deltas: 100% (16/16), done.
Updating files: 100% (52/52), done.


In [None]:
%cd /content/RefRec
!pip install -q -r requirements.txt

# Outline
- Load validation dataset - Movielens-1M
- Load recommendation model that was finetuned on specific embedding spce (TBC)
- Create a self-reflection mechanism on the validation dataset
  - Predict the next sequence for a specific user by prompting the llm with the data about the user
    - Generate prompt to encode specific features
  - reflect on the prediction vs the ground truth
  - suggest a better feature and reflect on why the mistake happened
  - continue to the next sequence

## Imports

In [3]:
device = 'cuda'

In [4]:
from pathlib import Path
cur_path = Path.cwd()
cur_path

PosixPath('/content/RefRec')

In [48]:
klg_folder = Path.cwd().joinpath('knowledge_encoding')
rs_folder = Path.cwd().joinpath('RS')
ml1m_folder = Path.cwd().joinpath('data').joinpath('ml-1m')
preprocess_folder = Path.cwd().joinpath('preprocess')

In [49]:
import sys
sys.path.append(klg_folder.as_posix())
sys.path.append(rs_folder.as_posix())
sys.path.append(preprocess_folder.as_posix())
from lm_encoding import get_text_data_loader
from pre_utils import GENDER_MAPPING, AGE_MAPPING, OCCUPATION_MAPPING

In [7]:
import numpy as np
import pandas as pd
import torch
import torch.utils.data as Data
import re

from dataset import AmzDataset
from main_ctr import eval
from utils import load_json
from sklearn.metrics import log_loss, roc_auc_score

In [8]:
class ShortDataLoader:
    def __init__(self, dataloader, num_batches):
        self.dataloader = dataloader
        self.num_batches = num_batches

    def __iter__(self):
        count = 0
        for batch in self.dataloader:
            if count >= self.num_batches:
                break
            yield batch
            count += 1

    def __len__(self):
        return min(len(self.dataloader), self.num_batches)

from torch.utils.data import Dataset
class DictDataset(Dataset):
    def __init__(self, data_list):
        self.data_list = data_list

    def __getitem__(self, index):
        return self.data_list[index]

    def __len__(self):
        return len(self.data_list)

## Loading Train & Test datasets


In [9]:
model_path = rs_folder.joinpath('model').joinpath('ml-1m').joinpath('ctr').joinpath('DIN').joinpath('DIN.pt').as_posix()
inference_model = torch.load(model_path,map_location=torch.device(device))
train_set = AmzDataset(ml1m_folder.joinpath('proc_data').as_posix(), 'train', 'ctr', 5, True, 'bert_avg') #test
train_loader = Data.DataLoader(dataset=train_set, batch_size=2048, shuffle=False)

In [10]:
len(train_set)

751436

In [11]:
test_set = AmzDataset(ml1m_folder.joinpath('proc_data').as_posix(), 'test', 'ctr', 5, True, 'bert_avg') #test
len(test_set)

92024

In [12]:
user_vec_dict = load_json(ml1m_folder.joinpath('proc_data').joinpath('bert_avg_augment.hist'))
list(user_vec_dict.keys())[:10]

['2179', '3813', '2108', '778', '4232', '2903', '2449', '5286', '4235', '3020']

In [13]:
item_vec_dict = load_json(ml1m_folder.joinpath('proc_data').joinpath('bert_avg_augment.item'))
list(item_vec_dict.keys())[:10]

['1', '2', '4', '3', '5', '6', '7', '9', '8', '11']

## Extracting metadata for LLM

In [None]:
hist_loader, hist_idxes, item_loader, item_idxes = get_text_data_loader(ml1m_folder.joinpath('knowledge').as_posix(), 1)

In [19]:
def get_user_hist(user_vec,hist_idxes):
    user_vec = user_vec.tolist()
    for key, v in user_vec_dict.items():
        if v == user_vec:
            user_index = hist_idxes.index(key)
            return key,user_index,hist_loader.dataset[user_index]
    return None

def get_item_desc(item_vec, item_idxes):
    item_vec = item_vec.tolist()
    for key,v in item_vec_dict.items():
        if v == item_vec:
            item_index = item_idxes.index(key)
            return key,item_index, item_loader.dataset[item_index]
    return None

In [45]:
user_prompt_dict = load_json(ml1m_folder.joinpath('proc_data').joinpath('prompt.hist').as_posix())
item_prompt_dict = load_json(ml1m_folder.joinpath('proc_data').joinpath('prompt.item').as_posix())
datamap = load_json(ml1m_folder.joinpath('proc_data').joinpath('datamaps.json').as_posix())
sequence_data = load_json(ml1m_folder.joinpath('proc_data').joinpath('sequential_data.json').as_posix())
train_test_split = load_json(ml1m_folder.joinpath('proc_data').joinpath('train_test_split.json').as_posix())
id2user = datamap['id2user']
id2item = datamap['id2item']

In [21]:
data_list = []
for i in range(10000):
    data = train_set[i]
    user_key, user_idx, user_hist = get_user_hist(data['hist_aug_vec'], hist_idxes)
    item_key, item_idx, item_desc = get_item_desc(data['item_aug_vec'], item_idxes)
    data_list.append({'test_index': i, 'user_idx': user_idx, 'user_key': user_key, 'user_hist': user_hist, 'item_idx': item_idx,'item_key': item_key, 'item_desc': item_desc })
df_train = pd.DataFrame(data_list)

In [22]:
data_list = []
for i in range(10000):
    data = test_set[i]
    user_key, user_idx, user_hist = get_user_hist(data['hist_aug_vec'], hist_idxes)
    item_key, item_idx, item_desc = get_item_desc(data['item_aug_vec'], item_idxes)
    data_list.append({'test_index': i, 'user_idx': user_idx, 'user_key': user_key, 'user_hist': user_hist, 'item_idx': item_idx,'item_key': item_key, 'item_desc': item_desc })
df_test = pd.DataFrame(data_list)

In [35]:
data.keys()
for key in data.keys():
  if key != 'hist_aug_vec' and key != 'item_aug_vec':
    print(key,data[key])

iid tensor(297)
aid tensor([4])
lb tensor(1)
hist_iid_seq tensor([ 292,  980,  427, 1277,  107])
hist_aid_seq tensor([[4],
        [4],
        [3],
        [5],
        [3]])
hist_rate_seq tensor([2, 5, 4, 4, 5])
hist_seq_len tensor(5)


In [23]:
df_train.loc[0]

test_index                                                    0
user_idx                                                   4130
user_key                                                   2336
user_hist     Given a male user who is aged 35-44 and a trad...
item_idx                                                   3440
item_key                                                   3508
item_desc     The Outlaw Josey Wales is a 1976 American West...
Name: 0, dtype: object

In [28]:
id2user[df_train['user_key'][0]]

'4593'

In [29]:
user_prompt_dict[id2user[df_train['user_key'][0]]]

'Given a male user who is aged 35-44 and a tradesman/craftsman, this user\'s movie viewing history over time is listed below. "Honey, I Shrunk the Kids (1989)", 1 stars; "The Parent Trap (1998)", 3 stars; "Home Alone (1990)", 3 stars; "Ghost (1990)", 3 stars; "Aladdin (1992)", 4 stars; "Waiting to Exhale (1995)", 2 stars; "Father of the Bride Part II (1995)", 3 stars; "Ghosts of Mississippi (1996)", 3 stars; "The Insider (1999)", 3 stars; "Roger & Me (1989)", 4 stars; "The Purple Rose of Cairo (1985)", 2 stars; "Michael (1996)", 2 stars; "Doctor Zhivago (1965)", 5 stars; "Fanny and Alexander (1982)", 4 stars; "The Trip to Bountiful (1985)", 4 stars; "Dances with Wolves (1990)", 2 stars; "Batman (1989)", 2 stars; "The Silence of the Lambs (1991)", 4 stars; "Pretty Woman (1990)", 3 stars; "Tender Mercies (1983)", 4 stars; "And the Band Played On (1993)", 4 stars; "Evita (1996)", 3 stars; "Blue Velvet (1986)", 3 stars; "Little Big Man (1970)", 3 stars; "Kolya (1996)", 5 stars; "Dogma (199

In [25]:
item_prompt_dict[df_train['item_key'][0]]

'Introduce movie The Outlaw Josey Wales (1976) and describe its attributes (including but not limited to genre, director/cast, country, character, plot/theme, mood/tone, critical acclaim/award, production quality, and soundtrack).'

In [35]:
len(df_train)

10000

In [36]:
len(df_test)

92024

In [54]:
question = 'Analyze user\'s preferences on movies (consider factors like genre, director/actors, time ' \
                    'period/country, character, plot/theme, mood/tone, critical acclaim/award, production quality, ' \
                    'and soundtrack). Provide clear explanations based on relevant details from the user\'s movie ' \
                    'viewing history and other pertinent factors.'

In [60]:
lm_hist_idx = train_test_split['lm_hist_idx']
itemid2title = datamap['itemid2title']
user2attribute = datamap['user2attribute']

for uid, item_rating in sequence_data.items():
    user = id2user[uid]
    item_seq, rating_seq = item_rating
    cur_idx = lm_hist_idx[uid]
    hist_item_seq = item_seq[:cur_idx]
    hist_rating_seq = rating_seq[:cur_idx]
    history_texts = []
    movie_history = []
    for iid, rating in zip(hist_item_seq, hist_rating_seq):
        tmp = '"{}", {} stars; '.format(itemid2title[str(iid)], int(rating))
        movie_history.append({'title': itemid2title[str(iid)], 'rating': rating})
        history_texts.append(tmp)
    break
# uid, item_seq, rating_seq
# history_texts
len(movie_history)

30

In [53]:
gender, age, occupation = user2attribute[uid]
user_text = 'Given a {} user who is aged {} and {}, this user\'s movie viewing history over time' \
            ' is listed below. '.format(GENDER_MAPPING[gender], AGE_MAPPING[age],
                                        OCCUPATION_MAPPING[occupation])
user_text

"Given a female user who is aged under 18 and a K-12 student, this user's movie viewing history over time is listed below. "

In [77]:
def return_user_details(uid):
    user = id2user[uid]
    item_seq, rating_seq = sequence_data[uid]
    cur_idx = lm_hist_idx[uid]
    hist_item_seq = item_seq[:cur_idx]
    hist_rating_seq = rating_seq[:cur_idx]
    history_texts = []
    movie_history = []
    for iid, rating in zip(hist_item_seq, hist_rating_seq):
        movie_history.append({'title': itemid2title[str(iid)], 'rating': rating})
    gender, age, occupation = user2attribute[uid]
    gender = GENDER_MAPPING[gender]
    age = AGE_MAPPING[age]
    occupation = OCCUPATION_MAPPING[occupation]
    return gender, age, occupation, movie_history


## Identifying Classification Mistakes

In [None]:
train_loader = Data.DataLoader(dataset=train_set, batch_size=256, shuffle=False)
test_loader = Data.DataLoader(dataset=test_set, batch_size=256, shuffle=False)

In [None]:
auc, ll, loss, eval_time, labels, preds = eval(inference_model, train_loader) #short_loader
train_data['labels'] = labels
train_data['preds'] = preds

In [None]:
auc, ll, loss, eval_time, labels, preds = eval(inference_model, train_loader) #short_loader
print("train loss: %.5f, inference time: %.5f, auc: %.5f, logloss: %.5f" % (loss, eval_time, auc, ll))

preds_r = [1 if pred[0] > 0.5 else 0 for pred in preds]

mistake_indexes = [i for i, (label, pred) in enumerate(zip(labels, preds_r)) if label != pred]
mistake_indexes[:10]

In [20]:
data_list = []
for i in mistake_indexes:
    data = train_set[i]
    user_idx, user_hist = get_user_hist(data['hist_aug_vec'], hist_idxes)
    item_idx, item_desc = get_item_desc(data['item_aug_vec'], item_idxes)
    label = labels[i]
    pred = preds[i][0]
    data_list.append({'test_index': i, 'user_idx': user_idx, 'user_hist': user_hist, 'item_idx': item_idx, 'item_desc': item_desc, 'label': label, 'pred': pred})

df_results = pd.DataFrame(data_list)
df_results


Unnamed: 0,test_index,user_idx,user_hist,item_idx,item_desc,label,pred
0,4,2336,Given a male user who is aged 35-44 and a trad...,2922,Hang 'em High is a classic Western film releas...,0,0.774568
1,18,1034,Given a female user who is aged 35-44 and an a...,924,2001: A Space Odyssey is a science fiction fil...,0,0.716184
2,24,1034,Given a female user who is aged 35-44 and an a...,1608,Air Force One is a 1997 action-thriller film d...,0,0.559843
3,29,1034,Given a female user who is aged 35-44 and an a...,3217,"""A Star is Born"" is a 1937 American drama film...",1,0.384421
4,31,1034,Given a female user who is aged 35-44 and an a...,1617,L.A. Confidential is a neo-noir crime film dir...,0,0.821938
...,...,...,...,...,...,...,...
209075,751414,4736,Given a male user who is aged 18-24 and a writ...,1792,"""U.S. Marshals"" is a 1998 American action thri...",1,0.199561
209076,751426,4736,Given a male user who is aged 18-24 and a writ...,1060,Swingers is a 1996 American comedy-drama film ...,0,0.554905
209077,751427,4736,Given a male user who is aged 18-24 and a writ...,1061,Sleepers is a 1996 American legal crime drama ...,1,0.470843
209078,751428,4736,Given a male user who is aged 18-24 and a writ...,531,"Sure, I'd be happy to introduce the movie Secr...",0,0.554470


In [21]:
# df_results = pd.read_csv('/content/train_preds_labels.csv')
# df_results.to_csv('train_preds_labels.csv')

## Encoding the text to vectors with BERT

In [22]:
from transformers import AutoTokenizer, AutoModel
from torch.utils.data import DataLoader
from lm_encoding import inference

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased',  trust_remote_code=True)
encoding_model = AutoModel.from_pretrained('bert-base-uncased',  trust_remote_code=True).half().cuda()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

### Validating the encoder

In [23]:
items = df_results['item_desc'].tolist()
item_loader = DataLoader(items[:4], 1, shuffle=False)

In [24]:
users = df_results['user_hist'].tolist()
user_loader = DataLoader(users[:1], 1, shuffle=False)

In [25]:
new_item_vec = inference(encoding_model, tokenizer, item_loader, 'bert', 'avg')
len(new_item_vec)

100%|██████████| 4/4 [00:01<00:00,  2.27it/s]


4

In [26]:
new_user_vec = inference(encoding_model, tokenizer, user_loader, 'bert', 'avg')
len(new_user_vec)

100%|██████████| 1/1 [00:00<00:00, 32.37it/s]


1

In [27]:
n_item_vec = [i for i in new_item_vec]
len(n_item_vec)

4

In [28]:
for i in range(4):
    a = np.array(item_vec_dict[str(df_results['item_idx'][i])])
    b = np.array(new_item_vec[i])
    print(np.linalg.norm(a - b))

0.003536781410996269
0.003649090919045039
0.0034479879603599322
0.0040963562191398675


In [29]:
for i in range(1):
    a = np.array(user_vec_dict[str(df_results['user_idx'][i])])
    b = np.array(new_user_vec[i])
    print(np.linalg.norm(a - b))

0.003975717616505557


## Loading LLM Model

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

torch.random.manual_seed(0)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-128k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")

messages = [
    {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
    {"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
    {"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"},
]

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "temperature": 0.0,
    "do_sample": False,
}

output = pipe(messages, **generation_args)
print(output[0]['generated_text'])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/3.35k [00:00<?, ?B/s]

configuration_phi3.py:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-128k-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi3.py:   0%|          | 0.00/73.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-128k-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/568 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


 To solve the equation 2x + 3 = 7, you need to isolate the variable x. Here are the steps:

1. Subtract 3 from both sides of the equation to get rid of the +3 on the left side. This gives you: 2x = 7 - 3, which simplifies to 2x = 4.

2. Now, divide both sides of the equation by 2 to solve for x. This gives you: x = 4 / 2, which simplifies to x = 2.

So, the solution to the equation 2x + 3 = 7 is x = 2.


In [11]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
from langchain_community.llms import HuggingFaceHub
# llm = HuggingFacePipeline(pipeline=pipe)
import os
os.environ['HUGGINGFACEHUB_API_TOKEN'] = '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'
llm = HuggingFaceHub(
    repo_id="microsoft/Phi-3-mini-128k-instruct",
    task="text-generation",
    model_kwargs={
        "max_new_tokens": 512,
        "top_k": 30,
        "temperature": 0.1,
        "repetition_penalty": 1.03,
    },
)

template = """Question: {question}
Answer:"""

prompt = PromptTemplate(
    input_variables=["question"],
    template=template
)

question = "Solve 2x + 3 = 7 equation?"

llm_chain = LLMChain(prompt=prompt, llm=llm)
llm_chain.run(question)

HfHubHTTPError: 429 Client Error: Too Many Requests for url: https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-128k-instruct (Request ID: -r5BpaegCTw4SvrAyAyqA)

Rate limit reached. You reached free usage limit (reset hourly). Please subscribe to a plan at https://huggingface.co/pricing to use the API at this rate

## Creating a Simple Reflexion Mechanism

In [55]:
import datetime
import os
from langchain_groq import ChatGroq

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.pydantic_v1 import BaseModel, Field, ValidationError
from langchain_openai import ChatOpenAI
from langsmith import traceable

from collections import defaultdict
from typing import List

from langchain.output_parsers.openai_tools import (
    JsonOutputToolsParser,
    PydanticToolsParser,
)
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage
from langgraph.prebuilt.tool_executor import ToolExecutor, ToolInvocation

from dotenv import load_dotenv
load_dotenv(Path.cwd().joinpath('.env'))


True

In [61]:
def create_analysis_prompt(gender, age, occupation, movie_history):
    """
    Constructs a prompt for analyzing user preferences in movie recommendations,
    given limited data (only titles and ratings).

    Parameters:
    - gender (str): The gender of the user.
    - age (int): The age of the user.
    - occupation (str): The occupation of the user.
    - movie_history (list of dicts): A list of movies watched by the user,
                                     each entry is a dict with 'title' and 'rating'.

    Returns:
    - str: A detailed prompt for analysis.
    """
    prompt = f"Analyze the movie preferences of a {age}-year-old {gender} who works as a {occupation}. "
    prompt += "Consider their movie rating history and infer preferences from the following aspects:\n\n"

    # Titles and ratings summary
    titles_ratings = ', '.join([f"{movie['title']} (rated {movie['rating']}/5)" for movie in movie_history])
    prompt += f"- Movies and ratings: {titles_ratings}\n"

    # Encourage consideration of aspects where detailed data is not available
    prompt += "- Infer genres and subgenres from the movie titles.\n"
    prompt += "- Deduce potential favorite directors and actors based on common movie industry knowledge.\n"
    prompt += "- Assume preferences for film eras or years based on the release dates of the listed movies.\n"
    prompt += "- Consider common themes and motifs in these movies, such as love, conflict, adventure, etc.\n"
    prompt += "- Analyze the critical reception versus user ratings by considering how these types of films are generally received.\n"
    prompt += "- Predict preferences in sequels or franchises if any of the movies belong to such categories.\n"
    prompt += "- Consider how demographics (like age and gender) might influence the preferences inferred from the titles and ratings provided.\n"

    return prompt


In [95]:
def llm_revised_prompt(prompt_text,llm):
    system = "You are a helpful assistant in the movie recommendation domain."
    human = "{text}"
    prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

    chain = prompt | llm
    initial = chain.invoke({"text": prompt_text}).content
    return initial

In [None]:
uid_results = {}

['1', '2', '3', '4', '5']

In [113]:
# Configure the LLM
llm = ChatOpenAI(api_key=os.getenv('OPENAI_API_KEY'), model='gpt-3.5-turbo-0125')

# Main loop for self-reflection based on user movie ratings
for uid in list(sequence_data.keys())[:5]:
    print(uid)
    gender, age, occupation, user_movie_history  = return_user_details(uid)
    iterations = 5
    results = []
    first = True
    for i in range(iterations,0,-1):
        # Create a slice of the movie history for the current iteration
        current_history = user_movie_history[:-i]  # Items 0 through i+5

        # Generate the analysis prompt
        if first:
            prompt = create_analysis_prompt(gender, age, occupation, current_history)
            # Ask the LLM for analysis
            analysis = llm_revised_prompt(prompt,llm)
            first = False

        # Predict the rating for the next movie
        answer_according_to_analysis = llm_revised_prompt(f"""According to this analysis: {analysis}
                                                            What would be the rating for: {user_movie_history[-i]['title']}?
                                                            Answer with a single number between 1 and 5 - mark it with * before and after * the rating, like this
                                                            The rating will be: *5* """,llm)
        extract_rating = re.search(r"\*(\d+)\*", answer_according_to_analysis)
        next_movie_rating = int(extract_rating.group(1))

        # Check if the prediction matches the actual rating
        actual_rating = user_movie_history[-i]['rating']
        print(next_movie_rating,actual_rating)
        # Provide feedback and refine if necessary
        if next_movie_rating != actual_rating:
            feedback_prompt = f"""According to this analysis: {analysis}.
                                  The model predicted {next_movie_rating} but the expected rating was {actual_rating}.
                                  Please adjust your analysis.
                                  Remember here are the guidelines:
                                  {prompt}"""
            new_analysis = llm_revised_prompt(feedback_prompt,llm)
            results.append((analysis, answer_according_to_analysis, next_movie_rating, actual_rating,new_analysis))
            analysis = new_analysis
        else:
            print("No feedback needed.")
            results.append((analysis, answer_according_to_analysis, next_movie_rating, actual_rating,None))

    print(results)
    uid_results[uid] = results

1
4 3
3 3
No feedback needed.
4 3
3 3
No feedback needed.
4 4
No feedback needed.
[("Based on the movie ratings provided by the under 18-year-old female K-12 student, we can infer the following preferences:\n\nGenres and Subgenres:\n1. Drama: Music of the Heart, The Insider, Fried Green Tomatoes, The Unbearable Lightness of Being, Dead Again, Men Don't Leave, Felicia's Journey, The Thin Red Line\n2. Thriller: Ghost, The Bone Collector, The Silence of the Lambs, Mortal Thoughts\n3. Action: The Rock, Eraser, Air Force One\n4. Romance: Fried Green Tomatoes, Raise the Red Lantern, Edward Scissorhands, Playing by Heart\n5. Crime: L.A. Confidential, Donnie Brasco\n6. Mystery: Dead Again, Spellbound\n7. Horror: Mimic, Poison Ivy\n8. Historical Drama: Raise the Red Lantern, The Thin Red Line\n9. Biographical Drama: The Insider, Music of the Heart\n10. Psychological Thriller: The Silence of the Lambs\n\nFavorite Directors and Actors:\n- Potential favorite directors could include Steven Spielber

In [116]:
uid_results['2'][0][0]

'Based on the movie preferences of the 56+-year-old male who works as self-employed, we can infer the following:\n\nGenres and Subgenres:\n1. Classic Films: The majority of the movies listed are classic films released between the 1930s and 1990s, indicating a preference for timeless cinema.\n2. War Films: Movies like "The Bridge on the River Kwai" and "Dances with Wolves" suggest an interest in war-themed films.\n3. Comedy: Comedies like "Duck Soup," "Young Frankenstein," "Some Like It Hot," and "Animal House" indicate a preference for comedy films.\n4. Drama: The inclusion of movies like "Gone with the Wind," "It\'s a Wonderful Life," and "Mr. Smith Goes to Washington" suggests a liking for dramatic storytelling.\n5. Mystery/Thriller: Movies like "Chinatown," "Rear Window," and "Witness" indicate an attraction to mystery and thriller genres.\n\nFavorite Directors and Actors:\n- Favorite Directors: Given the selection of classic films, the individual may appreciate directors like Alfre

In [117]:
uid_results['2'][-1][-1]

'Based on the movie preferences and ratings provided by the 56+-year-old male who works as self-employed, here is an adjusted analysis:\n\nGenres and Subgenres:\n1. Classic Films: The individual has a strong affinity for classic films spanning different decades, evident from movies like "Casablanca," "Gone with the Wind," "It\'s a Wonderful Life," and "The Wizard of Oz."\n2. War Films: The inclusion of movies like "The Bridge on the River Kwai," "Dances with Wolves," and "Mr. Smith Goes to Washington" indicates an interest in war-themed or politically charged films.\n3. Comedy: Comedies such as "Duck Soup," "Young Frankenstein," "Some Like It Hot," and "Animal House" are among the favorites, highlighting a preference for humor and witty storytelling.\n4. Mystery/Thriller: Movies like "Chinatown," "Rear Window," and "Witness" suggest a liking for mystery and thriller genres with engaging plots.\n\nFavorite Directors and Actors:\n- Favorite Directors: It is likely that the individual app

In [119]:
#save dict to jason
# import json
# with open('uid_results.json', 'w') as f:
#     json.dump(uid_results, f)
# uid_results

In [97]:
answer_according_to_analysis = llm_revised_prompt(f"""According to this analysis: {analysis}
                                                    What would be the rating for: {user_movie_history[-i]['title']}?
                                                    Answer with a single number between 1 and 5 - mark it with * before and after * the rating, like this
                                                    The rating will be: *5* """,llm)
extract_rating = re.search(r"\*(\d+)\*", answer_according_to_analysis)
next_movie_rating = int(extract_rating.group(1))

# Check if the prediction matches the actual rating
actual_rating = user_movie_history[i+6]['rating']

In [99]:
extract_rating.group(1)

'4'

In [100]:
user_movie_history[i+6]['rating']

4

In [38]:
parser = JsonOutputToolsParser(return_id=True)

In [39]:
actor_prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are expert researcher.
            Current time: {time}

            1. {first_instruction}
            2. Reflect and critique your answer. Be severe to maximize improvement.
            3. Recommend search queries to research information and improve your answer.""",
        ),
            MessagesPlaceholder(variable_name="messages"),
            ("system", "Answer the user's question above using the required format."),
        ]
).partial(
    time=lambda: datetime.datetime.now().isoformat(),
)


class Reflection(BaseModel):
    missing: str = Field(description="Critique of what is missing.")
    superfluous: str = Field(description="Critique of what is superfluous")


class AnswerQuestion(BaseModel):
    """Answer the question."""

    answer: str = Field(description="~250 word detailed answer to the question.")
    reflection: Reflection = Field(description="Your reflection on the initial answer.")
    search_queries: List[str] = Field(
        description="1-3 search queries for researching improvements to address the critique of your current answer."
    )

In [40]:
# llm = ChatGroq(groq_api_key = os.getenv('GROQ_API_KEY'),model = 'llama3-8b-8192')
llm = ChatOpenAI(api_key=os.getenv('OPENAI_API_KEY'), model='gpt-3.5-turbo-0125') # gpt-4-turbo-2024-04-09
initial_answer_chain = actor_prompt_template.partial(
    first_instruction="Provide a detailed answer."
) | llm.bind_tools(tools=[AnswerQuestion], tool_choice="AnswerQuestion")
validator = PydanticToolsParser(tools=[AnswerQuestion])

In [41]:
class ResponderWithRetries:
    def __init__(self, runnable, validator):
        self.runnable = runnable
        self.validator = validator

    @traceable
    def respond(self, state: List[BaseMessage]):
        response = []
        for attempt in range(3):
            try:
                response = self.runnable.invoke({"messages": state})
                self.validator.invoke(response)
                return response
            except ValidationError as e:
                state = state + [HumanMessage(content=repr(e))]
        return response

first_responder = ResponderWithRetries(
    runnable=initial_answer_chain, validator=validator
)

In [36]:
round(df_results['pred'][0])

1

In [37]:
def generate_prompt_from_df(row):
    user_hist = row['user_hist']
    item_desc = row['item_desc']
    real_label = row['label']
    pred = round(row['pred'])
    prompt =    f"""User has the following history: {user_hist}.
                    Item has the following description: {item_desc}.
                    A mistake was made in the model's prediction.
                    The real label is {real_label} and the model predicted {pred}.
                    How would you rephrase the user history and item description to improve the model's prediction?
                    Answer formst should like this:
                    Revised User History: <revised user history> \n\n
                    Revised Item Description: <revised item description>
                    use two lines at the end of user history and at the end of item description.
                    Remember that your new rephrasing should help the model to predict better next time but without overfitting"""
    return prompt

In [38]:
# def generate_prompt_from_df(df_restuls,idx):
#     user_hist = df_restuls['user_hist'][idx]
#     item_desc = df_restuls['item_desc'][idx]
#     real_label = df_restuls['label'][idx]
#     pred = df_restuls['pred'][idx]

#     prompt =    f"""Task:Rephrase the user history and item description to better match the prediction to the actual label. Assume that the model's prediction is either a match or a mismatch to the label, and modify the descriptions to improve the fit between user preferences (as inferred from the user history) and the characteristics of the movie (as described in the item description).

#                 Output Format:
#                 Revised User History:
#                 Try to rephrase the user history to better reflect the user's preferences.
#                 You can adjust the length, tone, and content of the user history to better align with the user's likely interests.
#                 Finish the Revised User History with ;
#                 Revised Item Description:
#                 The item description should be rephrased to highlight aspects of the movie that are more aligned with the user's adjusted preferences.
#                 Key elements to focus on might include genre, notable performances, thematic elements, and any particular production features.
#                 Finish the Revised User Item Description with ;

#                 Here is the user history: {user_hist}.
#                 Here is the item description: {item_desc}.
#                 A mistake was made in the model's prediction.
#                 The real label is {real_label} and the model predicted {pred}.
#                 How would you rephrase the user history and item description to improve the model's prediction?

#                 """
#     return prompt

In [40]:
def extract_revised_prompt(initial):
  revised_prompt = initial.content + ";"
  # String containing the revised User History and Revised Item Description
  # Extracting the revised User History
  user_history = re.search(r"Revised User History:\n(.*?)\n\n", revised_prompt, re.DOTALL)
  if user_history:
      user_history = user_history.group(1).strip()

  # Extracting the Revised Item Description
  item_description = re.search(r"Revised Item Description:\n(.*?);", revised_prompt, re.DOTALL)
  if item_description:
      item_description = item_description.group(1).strip()

  # Printing the extracted information
  # print("Revised User History:", user_history)
  # print("Revised Item Description:", item_description)
  return user_history, item_description


In [41]:
def reflect_prompt(row):
    prompt = generate_prompt_from_df(row)
    initial = llm_revised_prompt(prompt)
    user_history, item_description = extract_revised_prompt(initial)
    return user_history, item_description, initial.content

In [42]:
df_results['user_idx'].value_counts()

user_idx
1680    618
1941    465
2909    444
3841    415
2907    353
       ... 
2510      1
3104      1
4228      1
4129      1
2336      1
Name: count, Length: 5382, dtype: int64

In [43]:
sub_df = df_results.iloc[:30]
sub_df['user_reflect'] = ""
sub_df['item_reflect'] = ""
sub_df['init_context'] = ""


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df['user_reflect'] = ""
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df['item_reflect'] = ""
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df['init_context'] = ""


In [44]:
u_list =[]
i_list = []
for i, row in sub_df.iterrows():
    user_history, item_description, init_context = reflect_prompt(row)
    sub_df.loc[i,'user_reflect'] = user_history
    sub_df.loc[i,'item_reflect'] = item_description
    sub_df.loc[i,'init_context'] = init_context


In [45]:
sub_df_clean = sub_df.dropna()
sub_df_clean.reset_index(drop=True, inplace=True)
sub_df_clean

Unnamed: 0,test_index,user_idx,user_hist,item_idx,item_desc,label,pred,user_reflect,item_reflect,init_context
0,29,1034,Given a female user who is aged 35-44 and an a...,3217,"""A Star is Born"" is a 1937 American drama film...",1,0.384421,Given a female user who is aged 35-44 and an a...,A Star is Born is a 1937 American drama film t...,Revised User History:\nGiven a female user who...
1,34,1034,Given a female user who is aged 35-44 and an a...,2278,Ronin is a 1998 thriller movie directed by Joh...,0,0.514711,Given a female user who is aged 35-44 and an a...,Ronin is a 1998 thriller movie directed by Joh...,Revised User History:\n\nGiven a female user w...
2,39,1034,Given a female user who is aged 35-44 and an a...,949,East of Eden is a classic drama film released ...,0,0.663418,Given a female user who is aged 35-44 and an a...,East of Eden is a timeless classic drama film ...,Revised User History:\nGiven a female user who...
3,59,1034,Given a female user who is aged 35-44 and an a...,3408,Erin Brockovich is a biographical drama film t...,1,0.47359,A 35-44-year-old academic/educator with a dive...,Erin Brockovich is a biographical drama film t...,Here are the revised user history and item des...
4,60,1034,Given a female user who is aged 35-44 and an a...,2617,The Mummy is a 1999 American action-adventure ...,1,0.270938,Given a female user who is aged 35-44 and an a...,The Mummy is a 1999 American action-adventure ...,Revised User History:\nGiven a female user who...
5,65,1034,Given a female user who is aged 35-44 and an a...,2640,Superman is a 1978 superhero film directed by ...,1,0.380954,Given a female user who is aged 35-44 and an a...,Superman is a 1978 superhero film directed by ...,Revised User History:\nGiven a female user who...
6,79,1034,Given a female user who is aged 35-44 and an a...,2861,For Love of the Game is a romantic sports dram...,1,0.425896,Given a female user who is aged 35-44 and an a...,For Love of the Game is a romantic sports dram...,Here are the revised user history and item des...
7,84,1034,Given a female user who is aged 35-44 and an a...,477,What's Love Got to Do with It? is a 1993 biogr...,0,0.540011,This user is a 35-44 year old academic/educato...,"""What's Love Got to Do with It?"" is a 1993 bio...",Here are the revised user history and item des...
8,86,1034,Given a female user who is aged 35-44 and an a...,2100,Splash is a romantic comedy film released in 1...,1,0.355477,Given a female user who is aged 35-44 and an a...,Splash is a charming romantic comedy film rele...,Revised User History:\nGiven a female user who...
9,87,1034,Given a female user who is aged 35-44 and an a...,3844,Steel Magnolias is a 1989 American comedy-dram...,1,0.434559,Given a female user who is aged 35-44 and an a...,Steel Magnolias is a 1989 American comedy-dram...,Here are the revised user history and item des...


In [46]:
sub_df.loc[0,'user_reflect']

In [47]:
sub_df.loc[0,'user_hist']

'Given a male user who is aged 35-44 and a tradesman/craftsman, this user\'s movie viewing history over time is listed below. Last of the Mohicans, The (1992), 4 stars; Unforgiven (1992), 5 stars; Pale Rider (1985), 5 stars; Gattaca (1997), 2 stars; Legends of the Fall (1994), 5 stars; Milk Money (1994), 3 stars; Kelly\'s Heroes (1970), 4 stars; One Flew Over the Cuckoo\'s Nest (1975), 4 stars; Star Wars: Episode V - The Empire Strikes Back (1980), 3 stars; Raiders of the Lost Ark (1981), 4 stars. Based on the user\'s movie viewing history, it seems that he enjoys action and adventure movies with strong male leads. The user has given high ratings to movies like "Unforgiven," "Pale Rider," and "Legends of the Fall," which all feature rugged, tough male protagonists. The user also enjoys classic war movies like "Kelly\'s Heroes" and "Raiders of the Lost Ark," both of which have strong male leads.\n\nHowever, the user also seems to appreciate movies with more complex themes and characters

In [48]:
new_user_history = sub_df_clean['user_reflect'].tolist()
new_item_description = sub_df_clean['item_reflect'].tolist()

In [49]:
data_l = DataLoader(new_user_history,2, shuffle=False)
new_user_vec = inference(encoding_model, tokenizer, data_l, 'bert', 'avg')

100%|██████████| 9/9 [00:00<00:00, 37.81it/s]


In [50]:
data_l = DataLoader(new_item_description,2, shuffle=False)
new_item_vec = inference(encoding_model, tokenizer, data_l, 'bert', 'avg')

100%|██████████| 9/9 [00:00<00:00, 59.03it/s]


In [51]:
l = []
cnt= 0
for item in train_set:
    l.append(item)
    cnt+=1
    if cnt>200:
        break
train_dataset = DictDataset(l)

In [52]:
train_dataset[18]['hist_aug_vec'][:10]

tensor([-0.4031,  0.0147,  0.1539, -0.0502,  0.4482,  0.1345,  0.3013,  0.1704,
        -0.1854,  0.3938])

In [53]:
for idx, row in sub_df_clean.iterrows():
    train_idx = row['test_index']
    print(train_idx)
    train_dataset[train_idx]['hist_aug_vec'] = 0
    train_dataset[train_idx]['hist_aug_vec'] = torch.tensor(new_user_vec[idx])
    train_dataset[train_idx]['item_aug_vec'] = 0
    train_dataset[train_idx]['item_aug_vec'] = torch.tensor(new_item_vec[idx])


29
34
39
59
60
65
79
84
86
87
88
90
94
96
97
99
100
105


In [54]:
train_dataset[18]['hist_aug_vec'][:10]

tensor([-0.4031,  0.0147,  0.1539, -0.0502,  0.4482,  0.1345,  0.3013,  0.1704,
        -0.1854,  0.3938])

In [55]:
test_loader = Data.DataLoader(dataset=test_dataset, batch_size=2, shuffle=False)
short_test = ShortDataLoader(test_loader, num_batches=100)
auc, ll, loss, eval_time, labels, preds = eval(inference_model, short_test)

In [56]:
labels[18], preds[18]

(0, [0.708030104637146])

In [57]:
sub_df['test_index'].to_list()

[4,
 18,
 24,
 29,
 31,
 34,
 39,
 54,
 57,
 59,
 60,
 64,
 65,
 69,
 74,
 79,
 84,
 85,
 86,
 87,
 88,
 90,
 94,
 95,
 96,
 97,
 99,
 100,
 105,
 107]

In [58]:
# prompt: take from list only the indexes from another list

def get_indexes(list1, list2):
  return [e for i, e in enumerate(list1) if i in list2]


In [59]:
new_labels = [v for i , v in enumerate(labels) if i in sub_df_clean['test_index'].to_list()]
new_preds = [v[0] for i , v  in enumerate(preds) if i in sub_df_clean['test_index'].to_list()]
new_labels

[1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1]

In [60]:
new_preds

[0.4336511194705963,
 0.4532131254673004,
 0.7217369079589844,
 0.6745705604553223,
 0.31332603096961975,
 0.4512385129928589,
 0.40330809354782104,
 0.5837703347206116,
 0.3552521765232086,
 0.506752073764801,
 0.38986751437187195,
 0.5957568287849426,
 0.5156009793281555,
 0.5262630581855774,
 0.5289294719696045,
 0.4980555474758148,
 0.6564141511917114,
 0.4624698758125305]

In [61]:
sub_df_clean['new label'] = new_labels
sub_df_clean['new pred'] = new_preds
sub_df_clean

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df_clean['new label'] = new_labels
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df_clean['new pred'] = new_preds


Unnamed: 0,test_index,user_idx,user_hist,item_idx,item_desc,label,pred,user_reflect,item_reflect,init_context,new label,new pred
0,29,1034,Given a female user who is aged 35-44 and an a...,3217,"""A Star is Born"" is a 1937 American drama film...",1,0.384421,Given a female user who is aged 35-44 and an a...,A Star is Born is a 1937 American drama film t...,Revised User History:\nGiven a female user who...,1,0.433651
1,34,1034,Given a female user who is aged 35-44 and an a...,2278,Ronin is a 1998 thriller movie directed by Joh...,0,0.514711,Given a female user who is aged 35-44 and an a...,Ronin is a 1998 thriller movie directed by Joh...,Revised User History:\n\nGiven a female user w...,0,0.453213
2,39,1034,Given a female user who is aged 35-44 and an a...,949,East of Eden is a classic drama film released ...,0,0.663418,Given a female user who is aged 35-44 and an a...,East of Eden is a timeless classic drama film ...,Revised User History:\nGiven a female user who...,0,0.721737
3,59,1034,Given a female user who is aged 35-44 and an a...,3408,Erin Brockovich is a biographical drama film t...,1,0.47359,A 35-44-year-old academic/educator with a dive...,Erin Brockovich is a biographical drama film t...,Here are the revised user history and item des...,1,0.674571
4,60,1034,Given a female user who is aged 35-44 and an a...,2617,The Mummy is a 1999 American action-adventure ...,1,0.270938,Given a female user who is aged 35-44 and an a...,The Mummy is a 1999 American action-adventure ...,Revised User History:\nGiven a female user who...,1,0.313326
5,65,1034,Given a female user who is aged 35-44 and an a...,2640,Superman is a 1978 superhero film directed by ...,1,0.380954,Given a female user who is aged 35-44 and an a...,Superman is a 1978 superhero film directed by ...,Revised User History:\nGiven a female user who...,1,0.451239
6,79,1034,Given a female user who is aged 35-44 and an a...,2861,For Love of the Game is a romantic sports dram...,1,0.425896,Given a female user who is aged 35-44 and an a...,For Love of the Game is a romantic sports dram...,Here are the revised user history and item des...,1,0.403308
7,84,1034,Given a female user who is aged 35-44 and an a...,477,What's Love Got to Do with It? is a 1993 biogr...,0,0.540011,This user is a 35-44 year old academic/educato...,"""What's Love Got to Do with It?"" is a 1993 bio...",Here are the revised user history and item des...,0,0.58377
8,86,1034,Given a female user who is aged 35-44 and an a...,2100,Splash is a romantic comedy film released in 1...,1,0.355477,Given a female user who is aged 35-44 and an a...,Splash is a charming romantic comedy film rele...,Revised User History:\nGiven a female user who...,1,0.355252
9,87,1034,Given a female user who is aged 35-44 and an a...,3844,Steel Magnolias is a 1989 American comedy-dram...,1,0.434559,Given a female user who is aged 35-44 and an a...,Steel Magnolias is a 1989 American comedy-dram...,Here are the revised user history and item des...,1,0.506752


In [62]:
y_true = sub_df_clean['label'].to_list()
y_pred = sub_df_clean['pred'].to_list()
log_loss(y_true, y_pred)

0.9047640904842554

In [63]:

y_true = sub_df_clean['label'].to_list()
y_pred = sub_df_clean['new pred'].to_list()

log_loss(y_true, y_pred)


0.8367030164913698