In [110]:
!git clone github.com/AsafAvr/RefRec.git

fatal: destination path 'RefRec' already exists and is not an empty directory.


In [111]:
%cd /content/RefRec
!pip install -r requirements.txt

/content/RefRec


# Outline
- Load validation dataset - Movielens-1M
- Load recommendation model that was finetuned on specific embedding spce (TBC)
- Create a self-reflection mechanism on the validation dataset
  - Predict the next sequence for a specific user by prompting the llm with the data about the user
    - Generate prompt to encode specific features
  - reflect on the prediction vs the ground truth
  - suggest a better feature and reflect on why the mistake happened
  - continue to the next sequence

## Imports

In [112]:
device = 'cuda'

In [113]:
from pathlib import Path
cur_path = Path.cwd()
cur_path

PosixPath('/content/RefRec')

In [114]:
klg_folder = Path.cwd().joinpath('knowledge_encoding')
rs_folder = Path.cwd().joinpath('RS')
ml1m_folder = Path.cwd().joinpath('data').joinpath('ml-1m')

In [115]:
import sys
sys.path.append(klg_folder.as_posix())
sys.path.append(rs_folder.as_posix())
from lm_encoding import get_text_data_loader

In [116]:
import numpy as np
import pandas as pd
import torch
import torch.utils.data as Data
import re

from dataset import AmzDataset
from main_ctr import eval
from utils import load_json
from sklearn.metrics import log_loss, roc_auc_score

In [117]:
class ShortDataLoader:
    def __init__(self, dataloader, num_batches):
        self.dataloader = dataloader
        self.num_batches = num_batches

    def __iter__(self):
        count = 0
        for batch in self.dataloader:
            if count >= self.num_batches:
                break
            yield batch
            count += 1

    def __len__(self):
        return min(len(self.dataloader), self.num_batches)

from torch.utils.data import Dataset
class DictDataset(Dataset):
    def __init__(self, data_list):
        self.data_list = data_list

    def __getitem__(self, index):
        return self.data_list[index]

    def __len__(self):
        return len(self.data_list)

## Loading Test Set
- Identifying Classification mistakes

In [174]:
model_path = rs_folder.joinpath('model').joinpath('ml-1m').joinpath('ctr').joinpath('DIN').joinpath('DIN.pt').as_posix()
inference_model = torch.load(model_path,map_location=torch.device(device))
test_set = AmzDataset(ml1m_folder.joinpath('proc_data').as_posix(), 'train', 'ctr', 5, True, 'bert_avg') #test
test_loader = Data.DataLoader(dataset=test_set, batch_size=2048, shuffle=False)
metric_scope = [1, 3, 5, 7]

In [175]:
len(test_set)

751436

In [120]:
short_test = ShortDataLoader(test_loader, num_batches=10)
len(short_test)

10

In [176]:
auc, ll, loss, eval_time, labels, preds = eval(inference_model, test_loader) #test_loader
print("test loss: %.5f, test time: %.5f, auc: %.5f, logloss: %.5f" % (loss, eval_time, auc, ll))

test loss: 0.54620, test time: 309.62119, auc: 0.78947, logloss: 0.54619


In [122]:
preds_r = [1 if pred[0] > 0.5 else 0 for pred in preds]
mistake_indexes = [i for i, (label, pred) in enumerate(zip(labels, preds_r)) if label != pred]
mistake_indexes[:10]

[4, 18, 24, 29, 31, 34, 39, 54, 57, 59]

In [123]:
preds[:5] , labels[:5]

([[0.7851797938346863],
  [0.6139041185379028],
  [0.6520099639892578],
  [0.7986049056053162],
  [0.7801293730735779]],
 [1, 1, 1, 1, 0])

In [124]:
user_vec_dict = load_json(ml1m_folder.joinpath('proc_data').joinpath('bert_avg_augment.hist'))
list(user_vec_dict.keys())[:10]

['2179', '3813', '2108', '778', '4232', '2903', '2449', '5286', '4235', '3020']

In [125]:
item_vec_dict = load_json(ml1m_folder.joinpath('proc_data').joinpath('bert_avg_augment.item'))
list(item_vec_dict.keys())[:10]

['1', '2', '4', '3', '5', '6', '7', '9', '8', '11']

## Extracting metadata for LLM

In [126]:
hist_loader, hist_idxes, item_loader, item_idxes = get_text_data_loader(ml1m_folder.joinpath('knowledge').as_posix(), 2)

chatgpt.hist 1 Given a male user who is aged 35-44 and an executive/managerial, this user's movie viewing history over time is listed below. Bridge on the River Kwai, The (1957), 5 stars; Chinatown (1974), 4 stars; Duck Soup (1933), 5 stars; Toy Story (1995), 5 stars; Arsenic and Old Lace (1944), 4 stars; Dances with Wolves (1990), 5 stars; Cool Hand Luke (1967), 5 stars; Young Frankenstein (1974), 5 stars; High Noon (1952), 5 stars; Rear Window (1954), 5 stars; Being There (1979), 5 stars; Some Like It Hot (1959), 5 stars; Casablanca (1942), 5 stars; Wizard of Oz, The (1939), 5 stars; Gone with the Wind (1939), 5 stars; Moonstruck (1987), 5 stars; It's a Wonderful Life (1946), 5 stars; Mr. Smith Goes to Washington (1939), 5 stars; 39 Steps, The (1935), 5 stars; Malcolm X (1992), 5 stars; Witness (1985), 4 stars; Animal House (1978), 4 stars; Do the Right Thing (1989), 5 stars; Frankenstein (1931), 5 stars; Modern Times (1936), 4 stars; War of the Worlds, The (1953), 5 stars; Hollywood

In [127]:
key = '4810'
index = hist_idxes.index(key)
print(index)

hist_loader.dataset[index]

4160


"Given a male user who is aged 18-24 and a programmer, this user's movie viewing history over time is listed below. Honey, I Shrunk the Kids (1989), 1 stars; Negotiator, The (1998), 4 stars; Terminator 2: Judgment Day (1991), 3 stars; Jumanji (1995), 2 stars; Heat (1995), 5 stars; Insider, The (1999), 5 stars; Last of the Mohicans, The (1992), 3 stars; Michael (1996), 3 stars; Batman (1989), 3 stars; Jackie Chan's First Strike (1996), 4 stars; Rocketeer, The (1991), 3 stars; Maltese Falcon, The (1941), 4 stars; Messenger: The Story of Joan of Arc, The (1999), 3 stars; 2001: A Space Odyssey (1968), 3 stars; Jewel of the Nile, The (1985), 2 stars; Romancing the Stone (1984), 3 stars; Saint, The (1997), 5 stars; Sleepy Hollow (1999), 2 stars; World Is Not Enough, The (1999), 2 stars; Anaconda (1997), 1 stars; Titanic (1953), 3 stars; Waterworld (1995), 2 stars; Lost in Space (1998), 3 stars; Teenage Mutant Ninja Turtles (1990), 1 stars; Payback (1999), 3 stars; Red Dawn (1984), 3 stars; R

In [128]:
def get_user_hist(user_vec,hist_idxes):
    user_vec = user_vec.tolist()
    for key, v in user_vec_dict.items():
        if v == user_vec:
            user_index = hist_idxes.index(key)
            return key, hist_loader.dataset[user_index]
    return None

def get_item_desc(item_vec, item_idxes):
    item_vec = item_vec.tolist()
    for key,v in item_vec_dict.items():
        if v == item_vec:
            item_index = item_idxes.index(key)
            return key, item_loader.dataset[item_index]
    return None

In [129]:
data_list = []
for i in mistake_indexes:
    data = test_set[i]
    user_idx, user_hist = get_user_hist(data['hist_aug_vec'], hist_idxes)
    item_idx, item_desc = get_item_desc(data['item_aug_vec'], item_idxes)
    label = labels[i]
    pred = preds[i][0]
    data_list.append({'test_index': i, 'user_idx': user_idx, 'user_hist': user_hist, 'item_idx': item_idx, 'item_desc': item_desc, 'label': label, 'pred': pred})

df_results = pd.DataFrame(data_list)
df_results


Unnamed: 0,test_index,user_idx,user_hist,item_idx,item_desc,label,pred
0,4,2336,Given a male user who is aged 35-44 and a trad...,2922,Hang 'em High is a classic Western film releas...,0,0.780129
1,18,1034,Given a female user who is aged 35-44 and an a...,924,2001: A Space Odyssey is a science fiction fil...,0,0.717540
2,24,1034,Given a female user who is aged 35-44 and an a...,1608,Air Force One is a 1997 action-thriller film d...,0,0.551879
3,29,1034,Given a female user who is aged 35-44 and an a...,3217,"""A Star is Born"" is a 1937 American drama film...",1,0.386345
4,31,1034,Given a female user who is aged 35-44 and an a...,1617,L.A. Confidential is a neo-noir crime film dir...,0,0.825905
...,...,...,...,...,...,...,...
807,2540,550,Given a male user who is aged 45-49 and a farm...,2599,Election is a satirical comedy-drama film rele...,0,0.800991
808,2541,550,Given a male user who is aged 45-49 and a farm...,1948,Tom Jones is a British comedy-drama film relea...,0,0.708786
809,2547,550,Given a male user who is aged 45-49 and a farm...,1028,Mary Poppins is a classic musical movie releas...,0,0.813044
810,2555,550,Given a male user who is aged 45-49 and a farm...,2926,Hairspray is a musical comedy film directed by...,0,0.712927


In [177]:
# df_results = pd.read_csv('/content/train_preds_labels.csv')
# df_results.to_csv('train_preds_labels.csv')

## Encoding the text to vectors with BERT

In [131]:
from transformers import AutoTokenizer, AutoModel
from torch.utils.data import DataLoader
from lm_encoding import inference

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased',  trust_remote_code=True)
encoding_model = AutoModel.from_pretrained('bert-base-uncased',  trust_remote_code=True).half().cuda()

### Validating the encoder

In [132]:
items = df_results['item_desc'].tolist()
item_loader = DataLoader(items[:4], 1, shuffle=False)

In [133]:
users = df_results['user_hist'].tolist()
user_loader = DataLoader(users[:1], 1, shuffle=False)

In [134]:
new_item_vec = inference(encoding_model, tokenizer, item_loader, 'bert', 'avg')
len(new_item_vec)

100%|██████████| 4/4 [00:00<00:00, 21.40it/s]


4

In [135]:
new_user_vec = inference(encoding_model, tokenizer, user_loader, 'bert', 'avg')
len(new_user_vec)

100%|██████████| 1/1 [00:00<00:00, 29.97it/s]


1

In [136]:
n_item_vec = [i for i in new_item_vec]
len(n_item_vec)

4

In [137]:
for i in range(4):
    a = np.array(item_vec_dict[str(df_results['item_idx'][i])])
    b = np.array(new_item_vec[i])
    print(np.linalg.norm(a - b))

0.003536781410996269
0.003649090919045039
0.0034479879603599322
0.0040963562191398675


In [138]:
for i in range(1):
    a = np.array(user_vec_dict[str(df_results['user_idx'][i])])
    b = np.array(new_user_vec[i])
    print(np.linalg.norm(a - b))

0.003975717616505557


## Creating a Simple Reflexion Mechanism

In [139]:
import datetime
import os
from langchain_groq import ChatGroq

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.pydantic_v1 import BaseModel, Field, ValidationError
from langchain_openai import ChatOpenAI
from langsmith import traceable

from collections import defaultdict
from typing import List

from langchain.output_parsers.openai_tools import (
    JsonOutputToolsParser,
    PydanticToolsParser,
)
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage
from langgraph.prebuilt.tool_executor import ToolExecutor, ToolInvocation

from dotenv import load_dotenv
load_dotenv(Path.cwd().joinpath('.env'))


True

In [140]:
parser = JsonOutputToolsParser(return_id=True)

In [141]:
actor_prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are expert researcher.
            Current time: {time}

            1. {first_instruction}
            2. Reflect and critique your answer. Be severe to maximize improvement.
            3. Recommend search queries to research information and improve your answer.""",
        ),
            MessagesPlaceholder(variable_name="messages"),
            ("system", "Answer the user's question above using the required format."),
        ]
).partial(
    time=lambda: datetime.datetime.now().isoformat(),
)


class Reflection(BaseModel):
    missing: str = Field(description="Critique of what is missing.")
    superfluous: str = Field(description="Critique of what is superfluous")


class AnswerQuestion(BaseModel):
    """Answer the question."""

    answer: str = Field(description="~250 word detailed answer to the question.")
    reflection: Reflection = Field(description="Your reflection on the initial answer.")
    search_queries: List[str] = Field(
        description="1-3 search queries for researching improvements to address the critique of your current answer."
    )

In [142]:
llm = ChatGroq(groq_api_key = os.getenv('GROQ_API_KEY'),model = 'llama3-8b-8192')
# llm = ChatOpenAI(api_key=os.getenv('OPENAI_API_KEY'), model='gpt-4-turbo-2024-04-09')

initial_answer_chain = actor_prompt_template.partial(
    first_instruction="Provide a detailed answer."
) | llm.bind_tools(tools=[AnswerQuestion], tool_choice="AnswerQuestion")
validator = PydanticToolsParser(tools=[AnswerQuestion])

In [143]:
class ResponderWithRetries:
    def __init__(self, runnable, validator):
        self.runnable = runnable
        self.validator = validator

    @traceable
    def respond(self, state: List[BaseMessage]):
        response = []
        for attempt in range(3):
            try:
                response = self.runnable.invoke({"messages": state})
                self.validator.invoke(response)
                return response
            except ValidationError as e:
                state = state + [HumanMessage(content=repr(e))]
        return response

first_responder = ResponderWithRetries(
    runnable=initial_answer_chain, validator=validator
)

In [144]:
df_results.iloc[0]['user_hist']

'Given a male user who is aged 35-44 and a tradesman/craftsman, this user\'s movie viewing history over time is listed below. Last of the Mohicans, The (1992), 4 stars; Unforgiven (1992), 5 stars; Pale Rider (1985), 5 stars; Gattaca (1997), 2 stars; Legends of the Fall (1994), 5 stars; Milk Money (1994), 3 stars; Kelly\'s Heroes (1970), 4 stars; One Flew Over the Cuckoo\'s Nest (1975), 4 stars; Star Wars: Episode V - The Empire Strikes Back (1980), 3 stars; Raiders of the Lost Ark (1981), 4 stars. Based on the user\'s movie viewing history, it seems that he enjoys action and adventure movies with strong male leads. The user has given high ratings to movies like "Unforgiven," "Pale Rider," and "Legends of the Fall," which all feature rugged, tough male protagonists. The user also enjoys classic war movies like "Kelly\'s Heroes" and "Raiders of the Lost Ark," both of which have strong male leads.\n\nHowever, the user also seems to appreciate movies with more complex themes and characters

In [145]:
round(df_results['pred'][0])

1

In [146]:
def generate_prompt_from_df(row):
    user_hist = row['user_hist']
    item_desc = row['item_desc']
    real_label = row['label']
    pred = round(row['pred'])
    prompt =    f"""User has the following history: {user_hist}.
                    Item has the following description: {item_desc}.
                    A mistake was made in the model's prediction.
                    The real label is {real_label} and the model predicted {pred}.
                    How would you rephrase the user history and item description to improve the model's prediction?
                    Answer formst should like this:
                    Revised User History: <revised user history> \n\n
                    Revised Item Description: <revised item description>
                    use two lines at the end of user history and at the end of item description.
                    Remember that your new rephrasing should help the model to predict better next time but without overfitting"""
    return prompt

In [147]:
# def generate_prompt_from_df(df_restuls,idx):
#     user_hist = df_restuls['user_hist'][idx]
#     item_desc = df_restuls['item_desc'][idx]
#     real_label = df_restuls['label'][idx]
#     pred = df_restuls['pred'][idx]

#     prompt =    f"""Task:Rephrase the user history and item description to better match the prediction to the actual label. Assume that the model's prediction is either a match or a mismatch to the label, and modify the descriptions to improve the fit between user preferences (as inferred from the user history) and the characteristics of the movie (as described in the item description).

#                 Output Format:
#                 Revised User History:
#                 Try to rephrase the user history to better reflect the user's preferences.
#                 You can adjust the length, tone, and content of the user history to better align with the user's likely interests.
#                 Finish the Revised User History with ;
#                 Revised Item Description:
#                 The item description should be rephrased to highlight aspects of the movie that are more aligned with the user's adjusted preferences.
#                 Key elements to focus on might include genre, notable performances, thematic elements, and any particular production features.
#                 Finish the Revised User Item Description with ;

#                 Here is the user history: {user_hist}.
#                 Here is the item description: {item_desc}.
#                 A mistake was made in the model's prediction.
#                 The real label is {real_label} and the model predicted {pred}.
#                 How would you rephrase the user history and item description to improve the model's prediction?

#                 """
#     return prompt

In [148]:
def llm_revised_prompt(prompt_text):
    system = "You are a helpful assistant."
    human = "{text}"
    prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

    chain = prompt | llm
    initial = chain.invoke({"text": prompt_text})
    return initial

In [149]:
def extract_revised_prompt(initial):
  revised_prompt = initial.content + ";"
  # String containing the revised User History and Revised Item Description
  # Extracting the revised User History
  user_history = re.search(r"Revised User History:\n(.*?)\n\n", revised_prompt, re.DOTALL)
  if user_history:
      user_history = user_history.group(1).strip()

  # Extracting the Revised Item Description
  item_description = re.search(r"Revised Item Description:\n(.*?);", revised_prompt, re.DOTALL)
  if item_description:
      item_description = item_description.group(1).strip()

  # Printing the extracted information
  # print("Revised User History:", user_history)
  # print("Revised Item Description:", item_description)
  return user_history, item_description


In [150]:
def reflect_prompt(row):
    prompt = generate_prompt_from_df(row)
    initial = llm_revised_prompt(prompt)
    user_history, item_description = extract_revised_prompt(initial)
    return user_history, item_description, initial.content

In [179]:
df_results['user_idx'].value_counts()

user_idx
3464    125
4193    101
1034     90
727      84
1294     81
973      68
1193     61
5761     51
150      33
2811     22
550      20
344      12
2001     11
2229     10
5474      9
2460      7
4076      6
1906      5
2673      5
4081      4
2296      2
4874      2
4393      2
2336      1
Name: count, dtype: int64

In [151]:
sub_df = df_results.iloc[:30]
sub_df['user_reflect'] = ""
sub_df['item_reflect'] = ""
sub_df['init_context'] = ""


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df['user_reflect'] = ""
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df['item_reflect'] = ""
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df['init_context'] = ""


In [152]:
u_list =[]
i_list = []
for i, row in sub_df.iterrows():
    user_history, item_description, init_context = reflect_prompt(row)
    sub_df.loc[i,'user_reflect'] = user_history
    sub_df.loc[i,'item_reflect'] = item_description
    sub_df.loc[i,'init_context'] = init_context


In [153]:
sub_df_clean = sub_df.dropna()
sub_df_clean.reset_index(drop=True, inplace=True)
sub_df_clean

Unnamed: 0,test_index,user_idx,user_hist,item_idx,item_desc,label,pred,user_reflect,item_reflect,init_context
0,4,2336,Given a male user who is aged 35-44 and a trad...,2922,Hang 'em High is a classic Western film releas...,0,0.780129,Given a male user who is aged 35-44 and a trad...,Hang 'em High is a 1967 Western film directed ...,Revised User History:\nGiven a male user who i...
1,18,1034,Given a female user who is aged 35-44 and an a...,924,2001: A Space Odyssey is a science fiction fil...,0,0.71754,Given a female user who is aged 35-44 and an a...,2001: A Space Odyssey is a groundbreaking scie...,Here is the revised user history and item desc...
2,29,1034,Given a female user who is aged 35-44 and an a...,3217,"""A Star is Born"" is a 1937 American drama film...",1,0.386345,User is a 35-44 year old academic/educator who...,"""A Star is Born"" is a 1937 American drama film...",Revised User History:\nUser is a 35-44 year ol...
3,31,1034,Given a female user who is aged 35-44 and an a...,1617,L.A. Confidential is a neo-noir crime film dir...,0,0.825905,A female user aged 35-44 and an academic/educa...,L.A. Confidential is a critically acclaimed ne...,Revised User History:\nA female user aged 35-4...
4,34,1034,Given a female user who is aged 35-44 and an a...,2278,Ronin is a 1998 thriller movie directed by Joh...,0,0.502834,Given a female user who is aged 35-44 and an a...,Ronin is a 1998 thriller movie directed by Joh...,Here is the revised user history and item desc...
5,57,1034,Given a female user who is aged 35-44 and an a...,978,"The Blue Angel (Blaue Engel, Der) is a 1930 Ge...",0,0.727337,"As a 35-44 year old academic/educator, this us...","""The Blue Angel"" is a 1930 German drama film d...",Here are the revised user history and item des...
6,60,1034,Given a female user who is aged 35-44 and an a...,2617,The Mummy is a 1999 American action-adventure ...,1,0.272996,This user's movie viewing history reveals a di...,The Mummy is a 1999 American action-adventure ...,Revised User History:\nThis user's movie viewi...
7,65,1034,Given a female user who is aged 35-44 and an a...,2640,Superman is a 1978 superhero film directed by ...,1,0.386297,"As a 35-44 year old academic/educator, this us...","In this 1978 superhero classic, Christopher Re...",Revised User History:\nAs a 35-44 year old aca...
8,69,1034,Given a female user who is aged 35-44 and an a...,1873,Les Misérables (1998) is a French film adaptat...,0,0.62509,This user is a 35-44-year-old academic/educato...,Les Misérables is a critically acclaimed Frenc...,Revised User History:\n\nThis user is a 35-44-...
9,74,1034,Given a female user who is aged 35-44 and an a...,3483,"""The Road to El Dorado"" is an animated adventu...",1,0.278205,Given a female user who is aged 35-44 and an a...,The Road to El Dorado is an animated adventure...,Here are the revised user history and item des...


In [154]:
sub_df.loc[0,'user_reflect']

"Given a male user who is aged 35-44 and a tradesman/craftsman, this user's movie viewing history over time is listed below. Last of the Mohicans, The (1992), 4 stars; Unforgiven (1992), 5 stars; Pale Rider (1985), 5 stars; Gattaca (1997), 2 stars; Legends of the Fall (1994), 5 stars; Milk Money (1994), 3 stars; Kelly's Heroes (1970), 4 stars; One Flew Over the Cuckoo's Nest (1975), 4 stars; Star Wars: Episode V - The Empire Strikes Back (1980), 3 stars; Raiders of the Lost Ark (1981), 4 stars. This user tends to enjoy action-packed movies with strong male leads, often set in historical or adventurous settings. He also appreciates movies with complex themes and characters, exploring deeper issues and emotions."

In [155]:
sub_df.loc[0,'user_hist']

'Given a male user who is aged 35-44 and a tradesman/craftsman, this user\'s movie viewing history over time is listed below. Last of the Mohicans, The (1992), 4 stars; Unforgiven (1992), 5 stars; Pale Rider (1985), 5 stars; Gattaca (1997), 2 stars; Legends of the Fall (1994), 5 stars; Milk Money (1994), 3 stars; Kelly\'s Heroes (1970), 4 stars; One Flew Over the Cuckoo\'s Nest (1975), 4 stars; Star Wars: Episode V - The Empire Strikes Back (1980), 3 stars; Raiders of the Lost Ark (1981), 4 stars. Based on the user\'s movie viewing history, it seems that he enjoys action and adventure movies with strong male leads. The user has given high ratings to movies like "Unforgiven," "Pale Rider," and "Legends of the Fall," which all feature rugged, tough male protagonists. The user also enjoys classic war movies like "Kelly\'s Heroes" and "Raiders of the Lost Ark," both of which have strong male leads.\n\nHowever, the user also seems to appreciate movies with more complex themes and characters

In [156]:
new_user_history = sub_df_clean['user_reflect'].tolist()
new_item_description = sub_df_clean['item_reflect'].tolist()

In [157]:
data_l = DataLoader(new_user_history,2, shuffle=False)
new_user_vec = inference(encoding_model, tokenizer, data_l, 'bert', 'avg')

100%|██████████| 11/11 [00:00<00:00, 53.55it/s]


In [158]:
data_l = DataLoader(new_item_description,2, shuffle=False)
new_item_vec = inference(encoding_model, tokenizer, data_l, 'bert', 'avg')

100%|██████████| 11/11 [00:00<00:00, 65.59it/s]


In [159]:
l = []
cnt= 0
for item in test_set:
    l.append(item)
    cnt+=1
    if cnt>200:
        break
test_dataset = DictDataset(l)

In [160]:
test_dataset[18]['hist_aug_vec'][:10]

tensor([-0.4031,  0.0147,  0.1539, -0.0502,  0.4482,  0.1345,  0.3013,  0.1704,
        -0.1854,  0.3938])

In [161]:
for idx, row in sub_df_clean.iterrows():
    test_idx = row['test_index']
    print(test_idx)
    test_dataset[test_idx]['hist_aug_vec'] = 0
    test_dataset[test_idx]['hist_aug_vec'] = torch.tensor(new_user_vec[idx])
    test_dataset[test_idx]['item_aug_vec'] = 0
    test_dataset[test_idx]['item_aug_vec'] = torch.tensor(new_item_vec[idx])


4
18
29
31
34
57
60
65
69
74
79
85
86
87
88
90
95
96
97
105
107
111


In [162]:
test_dataset[18]['hist_aug_vec'][:10]

tensor([-0.3225,  0.0623,  0.1450, -0.0188,  0.4778,  0.1110,  0.3804,  0.1305,
        -0.2793,  0.4114])

In [163]:
test_loader = Data.DataLoader(dataset=test_dataset, batch_size=2, shuffle=False)
short_test = ShortDataLoader(test_loader, num_batches=100)
auc, ll, loss, eval_time, labels, preds = eval(inference_model, short_test)

In [164]:
labels[18], preds[18]

(0, [0.6602370142936707])

In [165]:
sub_df['test_index'].to_list()

[4,
 18,
 24,
 29,
 31,
 34,
 39,
 54,
 57,
 59,
 60,
 64,
 65,
 69,
 74,
 79,
 84,
 85,
 86,
 87,
 88,
 90,
 94,
 95,
 96,
 97,
 99,
 105,
 107,
 111]

In [166]:
# prompt: take from list only the indexes from another list

def get_indexes(list1, list2):
  return [e for i, e in enumerate(list1) if i in list2]


In [167]:
new_labels = [v for i , v in enumerate(labels) if i in sub_df_clean['test_index'].to_list()]
new_preds = [v[0] for i , v  in enumerate(preds) if i in sub_df_clean['test_index'].to_list()]
new_labels

[0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1]

In [168]:
new_preds

[0.7745561003684998,
 0.6602370142936707,
 0.6377848386764526,
 0.758130669593811,
 0.48564252257347107,
 0.6651780009269714,
 0.3463522791862488,
 0.44794902205467224,
 0.6877689361572266,
 0.34976911544799805,
 0.6094719767570496,
 0.631099283695221,
 0.23065820336341858,
 0.6297541856765747,
 0.44829216599464417,
 0.6686490774154663,
 0.6434418559074402,
 0.4604402482509613,
 0.5197166800498962,
 0.5047423243522644,
 0.6996059417724609,
 0.5078151822090149]

In [169]:
sub_df_clean['new label'] = new_labels
sub_df_clean['new pred'] = new_preds
sub_df_clean

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df_clean['new label'] = new_labels
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df_clean['new pred'] = new_preds


Unnamed: 0,test_index,user_idx,user_hist,item_idx,item_desc,label,pred,user_reflect,item_reflect,init_context,new label,new pred
0,4,2336,Given a male user who is aged 35-44 and a trad...,2922,Hang 'em High is a classic Western film releas...,0,0.780129,Given a male user who is aged 35-44 and a trad...,Hang 'em High is a 1967 Western film directed ...,Revised User History:\nGiven a male user who i...,0,0.774556
1,18,1034,Given a female user who is aged 35-44 and an a...,924,2001: A Space Odyssey is a science fiction fil...,0,0.71754,Given a female user who is aged 35-44 and an a...,2001: A Space Odyssey is a groundbreaking scie...,Here is the revised user history and item desc...,0,0.660237
2,29,1034,Given a female user who is aged 35-44 and an a...,3217,"""A Star is Born"" is a 1937 American drama film...",1,0.386345,User is a 35-44 year old academic/educator who...,"""A Star is Born"" is a 1937 American drama film...",Revised User History:\nUser is a 35-44 year ol...,1,0.637785
3,31,1034,Given a female user who is aged 35-44 and an a...,1617,L.A. Confidential is a neo-noir crime film dir...,0,0.825905,A female user aged 35-44 and an academic/educa...,L.A. Confidential is a critically acclaimed ne...,Revised User History:\nA female user aged 35-4...,0,0.758131
4,34,1034,Given a female user who is aged 35-44 and an a...,2278,Ronin is a 1998 thriller movie directed by Joh...,0,0.502834,Given a female user who is aged 35-44 and an a...,Ronin is a 1998 thriller movie directed by Joh...,Here is the revised user history and item desc...,0,0.485643
5,57,1034,Given a female user who is aged 35-44 and an a...,978,"The Blue Angel (Blaue Engel, Der) is a 1930 Ge...",0,0.727337,"As a 35-44 year old academic/educator, this us...","""The Blue Angel"" is a 1930 German drama film d...",Here are the revised user history and item des...,0,0.665178
6,60,1034,Given a female user who is aged 35-44 and an a...,2617,The Mummy is a 1999 American action-adventure ...,1,0.272996,This user's movie viewing history reveals a di...,The Mummy is a 1999 American action-adventure ...,Revised User History:\nThis user's movie viewi...,1,0.346352
7,65,1034,Given a female user who is aged 35-44 and an a...,2640,Superman is a 1978 superhero film directed by ...,1,0.386297,"As a 35-44 year old academic/educator, this us...","In this 1978 superhero classic, Christopher Re...",Revised User History:\nAs a 35-44 year old aca...,1,0.447949
8,69,1034,Given a female user who is aged 35-44 and an a...,1873,Les Misérables (1998) is a French film adaptat...,0,0.62509,This user is a 35-44-year-old academic/educato...,Les Misérables is a critically acclaimed Frenc...,Revised User History:\n\nThis user is a 35-44-...,0,0.687769
9,74,1034,Given a female user who is aged 35-44 and an a...,3483,"""The Road to El Dorado"" is an animated adventu...",1,0.278205,Given a female user who is aged 35-44 and an a...,The Road to El Dorado is an animated adventure...,Here are the revised user history and item des...,1,0.349769


In [170]:
y_true = sub_df_clean['label'].to_list()
y_pred = sub_df_clean['pred'].to_list()
log_loss(y_true, y_pred)

1.0689344950927078

In [171]:

y_true = sub_df_clean['label'].to_list()
y_pred = sub_df_clean['new pred'].to_list()

log_loss(y_true, y_pred)


0.930473731275092