In [1]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


In [2]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Trainer, TrainingArguments
import torch
import os
import pandas as pd
from tqdm import tqdm
from datasets import Dataset
from fuzzywuzzy import fuzz
import evaluate
import csv
import numpy as np
import torch.nn.functional as F
from sklearn.metrics import top_k_accuracy_score, ndcg_score
import random

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def get_first_five_words(sentence):
    words = sentence.split()  # Split the sentence into a list of words
    return " ".join(words[:10])  # Join the first 5 words back into a string

In [4]:
input_file = "/u/spa-d4/grad/mfe261/Projects/MobileConvRec/dataset/amazon_beauty/splits/train.jsonl"
df_recommender_train = pd.read_json(input_file, lines=True)
for _, row in df_recommender_train.iterrows():
    row["recommended_product"]["product_name"] = row["recommended_product"]["product_name"].lower()

In [5]:
df_recommender_train

Unnamed: 0,user_id,user_previous_interactions,recommended_product,negative_recommended_product,turns
0,AGWDYYVVWM3DC3CASUZKXK67G6IA,[],{'product_name': 'burts bees marshmallow cream...,[],"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
1,AFIJLAW3HIOMRUFSWNH54IJ3XQAA,[],{'product_name': 'crest whitestrips premium - ...,[],"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
2,AGZZXSMMS4WRHHJRBUJZI4FZDHKQ,[],"{'product_name': 'norelco 8894xl', 'parent_asi...","[{'product_name': 'Sunsonic Electric Razor, Re...","[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
3,AHNSZCP3JIOZZVYXFJRGQEKRSTFA,[],{'product_name': 'crest whitestrips dental whi...,[],"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
4,AGWDYYVVWM3DC3CASUZKXK67G6IA,[{'product_name': 'BURTS BEES Marshmallow Crea...,{'product_name': 'komenuka bijin all-natural e...,[],"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
...,...,...,...,...,...
6439,AEUAQ6BAUEBP2C754LR3ILYHP54Q,[{'product_name': 'Hair Straightener Flat Iron...,{'product_name': '10pcs nail tips clips for qu...,[{'product_name': '20pcs Nail Tips Clips for Q...,"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
6440,AHPGHDFIU3BUB3RQBP56RQQA7W4Q,[{'product_name': '8 PCS 40ML Mini Spray Bottl...,{'product_name': 'happygo jade roller for wome...,[],"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
6441,AGRKJRPX6I5DQPODOC4YJ7CT4XWA,"[{'product_name': 'DEROL Lip Plumper, Lip Plum...",{'product_name': 'purple shampoo and condition...,[{'product_name': 'Purple Shampoo and Conditio...,"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
6442,AHV6QCNBJNSGLATP56JAWJ3C4G2A,[{'product_name': 'Scrub Angel Organic Arabica...,{'product_name': 'little moon essentials tropi...,[{'product_name': 'Smooth Lavender Sugar Scrub...,"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."


In [6]:
input_file = "/u/spa-d4/grad/mfe261/Projects/MobileConvRec/dataset/amazon_beauty/splits/val.jsonl"
df_recommender_validation = pd.read_json(input_file, lines=True)
for _, row in df_recommender_validation.iterrows():
    row["recommended_product"]["product_name"] = row["recommended_product"]["product_name"].lower()

In [7]:
df_recommender_validation

Unnamed: 0,user_id,user_previous_interactions,recommended_product,negative_recommended_product,turns
0,AGYVC7KVHP2AWM7BDCEYNHFA6F3Q,[{'product_name': 'MD Complete Bright & Health...,{'product_name': 'nohj secret repair 3d-ampoul...,[],"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
1,AFATNFWGYVVMEZWGDNLDUXRBRFLQ,[{'product_name': '16 Jars Chrome Nail Powder ...,{'product_name': '12pcs floral boho headbands ...,[],"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
2,AFDYIK3FNPY2JFBQYUWC6GSBMIRQ_2,[{'product_name': 'Kaneles Long Wavy Dark Brow...,{'product_name': 'headband wig straight human ...,[{'product_name': 'Kinky Straight Headband Wig...,"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
3,AEID4BJMMM6LCEUTIHBUM5H4E3BQ,"[{'product_name': 'Bold Men, Mens Hair Brush F...","{'product_name': 'lip plumper set, natural lip...",[{'product_name': 'Lip Plumper Set Natural Lip...,"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
4,AEMRAZPNN2NBBUDV4YGGYMGPFC6A,[{'product_name': 'Cleancult Soap Dispenser wi...,{'product_name': 'coppertone kids clear sunscr...,[{'product_name': 'Under Eye Ultraviolet Prote...,"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
...,...,...,...,...,...
1375,AHXDPC5UCJ24S7IIS3CRJDCTOJRA,[{'product_name': 'Deep Wave Wig Natural Human...,{'product_name': 'vtaozi lace front wigs human...,[{'product_name': 'ZILING Straight Lace Front ...,"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
1376,AGCPEDYIF2QXKNLBIOOEKEHJ5V2A,[{'product_name': 'Gold Collagen Under Eye Pat...,{'product_name': 'cocosolis grow hair growth s...,[],"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
1377,AFTLUVGQWKW6XSQ5TB6UER5Q263A,[{'product_name': 'Stainless Steel Gua Sha Scr...,"{'product_name': 'osrsr cryo sticks,unbreakabl...",[],"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."
1378,AE3UKETTR77J4LM2ZE4AEUC4L6KA,[{'product_name': 'RBA’s Firming Serum to Redu...,{'product_name': 'skinesque wake up and makeup...,[{'product_name': 'Premium black caviar facial...,"[{'turn': 1, 'is_rec': False, 'user_accept_rec..."


In [8]:
model = AutoModelForSeq2SeqLM.from_pretrained(pretrained_model_name_or_path = "google/flan-t5-base")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", additional_special_tokens=["computer:", "human:"])
model.resize_token_embeddings(len(tokenizer))
IGNORE_INDEX = -100



In [9]:
prompt_train = []
recommend_train = []
not_founds = 0

for _, row in tqdm(df_recommender_train.iterrows(), total=len(df_recommender_train)):
    prompt = ""
    found = False
    recommended = get_first_five_words(row["recommended_product"]["product_name"])
    
    for index, turn in enumerate(row["turns"]):
        if "COMPUTER" in turn:
            computer = turn["COMPUTER"]
        
            if fuzz.partial_ratio(recommended, computer.lower()) >= 95:
                prompt += "computer: I would recommend the "
                prompt_train.append(prompt)
                recommend_train.append(recommended)
                found = True
                break
            else:
                prompt += "computer: "+ computer + "\n"
        
        if "HUMAN" in turn:
            human = turn["HUMAN"]
            prompt += "human: " + human + "\n"
    
    if not found:
        not_founds += 1

print(f"Could not find {not_founds}")
print(f"len(prompt_train): {len(prompt_train)}")
print(f"len(recommend_train): {len(recommend_train)}")

            
            
prompt_encodings = tokenizer(prompt_train, padding='max_length', max_length=1024, truncation=True, return_tensors='pt')
recommend_encodings = tokenizer(recommend_train, padding='max_length', max_length=32, truncation=True, return_tensors='pt')

labels = recommend_encodings['input_ids']
labels[labels == tokenizer.pad_token_id] = IGNORE_INDEX

dataset = {
    'input_ids': prompt_encodings['input_ids'],
    'attention_mask': prompt_encodings['attention_mask'],
    'labels': labels,
}
dataset_train = Dataset.from_dict(dataset)

  0%|          | 0/6444 [00:00<?, ?it/s]

100%|██████████| 6444/6444 [01:09<00:00, 93.13it/s] 


Could not find 196
len(prompt_train): 6248
len(recommend_train): 6248


In [10]:
prompt_validation = []
recommend_validation = []
not_founds = 0

for _, row in tqdm(df_recommender_validation.iterrows(), total=len(df_recommender_validation)):
    prompt = ""
    found = False
    recommended = get_first_five_words(row["recommended_product"]["product_name"])
    
    for index, turn in enumerate(row["turns"]):
        if "COMPUTER" in turn:
            computer = turn["COMPUTER"]
            
            if fuzz.partial_ratio(recommended, computer.lower()) >= 95:
                prompt += "computer: I would recommend the "
                prompt_validation.append(prompt)
                recommend_validation.append(recommended)
                found = True
                break
            else:
                prompt += "computer: "+ computer + "\n"
        
        if "HUMAN" in turn:
            human = turn["HUMAN"]
            prompt += "human: " + human + "\n"
    
    if not found:
        not_founds += 1
        
print(f"Could not find {not_founds}")
print(f"len(prompt_validation): {len(prompt_validation)}")
print(f"len(recommend_validation): {len(recommend_validation)}")
            
            
prompt_encodings = tokenizer(prompt_validation, padding='max_length', max_length=1024, truncation=True, return_tensors='pt')
recommend_encodings = tokenizer(recommend_validation, padding='max_length', max_length=32, truncation=True, return_tensors='pt')

labels = recommend_encodings['input_ids']
labels[labels == tokenizer.pad_token_id] = IGNORE_INDEX

dataset = {
    'input_ids': prompt_encodings['input_ids'],
    'attention_mask': prompt_encodings['attention_mask'],
    'labels': labels,
}
dataset_validation = Dataset.from_dict(dataset)

  0%|          | 0/1380 [00:00<?, ?it/s]

100%|██████████| 1380/1380 [00:15<00:00, 91.54it/s] 


Could not find 64
len(prompt_validation): 1316
len(recommend_validation): 1316


In [11]:
def data_collator(batch):
    input_ids, attention_mask, labels,  = [], [], []
    for sample in batch:
        input_ids.append(sample['input_ids'])
        attention_mask.append(sample['attention_mask'])
        labels.append(sample['labels'])
    max_encoder_len = max(sum(x) for x in attention_mask)
    max_decoder_len = max(sum([0 if item == IGNORE_INDEX else 1 for item in x]) for x in labels)
    return {
        'input_ids': torch.tensor(input_ids)[:, :max_encoder_len],
        'attention_mask': torch.tensor(attention_mask)[:, :max_encoder_len],
        'labels': torch.tensor(labels)[:, :max_decoder_len]
    }

In [12]:
training_args = TrainingArguments(
    output_dir="/u/spa-d4/grad/mfe261/Projects/MobileConvRec/models/new_models/amazon_beauty/T5_recommender",
    num_train_epochs=5,
    # logging_steps=500,
    # logging_dir=self.cfg.logging_dir,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    save_strategy="steps",
    evaluation_strategy="steps",
    save_steps=0.3,#self.cfg.save_steps,
    eval_steps=0.3, #self.cfg.eval_steps,
    save_total_limit=3,
    gradient_accumulation_steps=3, #gradient_accumulation_steps,
    per_device_train_batch_size=4, #train_batch_size,
    per_device_eval_batch_size=4, #self.cfg.eval_batch_size,
    warmup_steps=100,
    weight_decay=0.01,
    # dataloader_drop_last=True,
    disable_tqdm=False,
    push_to_hub=False
)

trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset_train,
        eval_dataset=dataset_validation,
        data_collator=data_collator,
    )

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [13]:
trainer.train() # resume_from_checkpoint=True
trainer.save_model()

Step,Training Loss,Validation Loss
780,3.4901,2.935113
1560,2.7605,2.860232
2340,2.6297,2.851538


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


# Load the model and test it on the test dataset

In [14]:
torch.cuda.empty_cache()

In [15]:
input_file = "/u/spa-d4/grad/mfe261/Projects/MobileConvRec/dataset/amazon_beauty/splits/test.jsonl"
df_recommender_test = pd.read_json(input_file, lines=True)
for _, row in df_recommender_test.iterrows():
    row["recommended_product"]["product_name"] = row["recommended_product"]["product_name"].lower()

In [16]:
apps_training_path = "/u/spa-d4/grad/mfe261/Projects/MobileConvRec/dataset/amazon_beauty/beauty_df.csv"

all_apps = []
with open(apps_training_path, 'r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        all_apps.append(get_first_five_words(row["title"].lower()))
        
all_apps = list(set(all_apps))

In [None]:
all_apps

In [18]:
df_app = pd.read_csv(apps_training_path)

In [19]:
df_app["categories"].apply(lambda x: len(x)>2).sum()

np.int64(0)

In [20]:
len(df_app["categories"])

4986

In [None]:
import pandas as pd
from fuzzywuzzy import fuzz

# Ensure that you've already defined 'apps_training_path' to point to your data file
cols = ['title', 'book_id', 'genres']
df_app = pd.read_csv(apps_training_path, usecols=cols)
df_app = df_app.drop_duplicates(subset='book_id', keep='first')
df_apps = df_app.applymap(lambda x: x.lower() if isinstance(x, str) else x)

def filter_candidate_apps(rec_app_name):
    candidate_apps = set()  # Using a set to avoid duplicates
    df_rec_app = df_apps[df_apps['title'] == rec_app_name.lower()]
    
    if df_rec_app.empty:
        print("No matching app found.")
        return []
    
    recommended_app_category = df_rec_app['genres'].iloc[0]
    
    df_same_category = df_apps[df_apps['genres'] == recommended_app_category]
    # same category
    if len(candidate_apps) < 25:
        for _, row in df_same_category.iterrows():
            if len(candidate_apps) >= 25:
                break
            candidate_apps.add(row['title'])
    
    if len(candidate_apps) < 25:
        genre_list = [genre.strip() for genre in recommended_app_category.split(",") if genre.strip()]
        while genre_list and len(candidate_apps) < 25:
            # Randomly remove one item
            removed_item = random.choice(genre_list)
            genre_list.remove(removed_item)
            
            recommended_app_category = ", ".join(genre_list)
            
            df_same_category = df_apps[df_apps['genres'] == recommended_app_category]
            for _, row in df_same_category.iterrows():
                if len(candidate_apps) >= 25:
                    break
                candidate_apps.add(row['title'])
            genre_list = [genre.strip() for genre in recommended_app_category.split(",") if genre.strip()]

    # all
    if len(candidate_apps) < 25:
        for _, row in df_apps.iterrows():
            if len(candidate_apps) >= 25:
                break
            candidate_apps.add(row['title'])

    return list(candidate_apps)  # Converting back to list if needed for downstream processes

In [21]:
def candidate_creator(row):
    np.random.seed(row.name)
    selected_values = np.random.choice(np.setdiff1d(all_apps, [get_first_five_words(row["recommended_product"]["product_name"])]), 24, replace=False) #  
    random_position = np.random.randint(0, len(selected_values) + 1)
    
    return np.insert(selected_values, random_position, get_first_five_words(row["recommended_product"]["product_name"])) 

df_recommender_test['candidate'] = df_recommender_test.apply(lambda row: candidate_creator(row), axis=1)

In [22]:
prompt_test = []
recommend_test = []
candidate_books = []
true_candidate_indexes = []
not_founds = 0
for _, row in df_recommender_test.iterrows():
    candidates = []
    for index, candidate_book in enumerate(row["candidate"].tolist()):
        candidates.append(candidate_book)
        if candidate_book == get_first_five_words(row["recommended_product"]["product_name"]):
            true_candidate_index = index
    prompt = ""
    
    found = False
    recommended = get_first_five_words(row["recommended_product"]["product_name"])
    
    for index, turn in enumerate(row["turns"]):
        computer = turn["COMPUTER"]
        
        if fuzz.partial_ratio(recommended, computer.lower()) >= 95:
            prompt += "computer: I would recommend the "
            prompt_test.append(prompt)
            recommend_test.append(recommended)
            candidate_books.append(candidates)
            true_candidate_indexes.append(true_candidate_index)
            found = True
            break
        else:
            prompt += "computer: "+ computer + "\n"
        
        if "HUMAN" in turn:
            human = turn["HUMAN"]
            prompt += "human: " + human + "\n"
    
    if not found:
        not_founds += 1

print(f"Could not find {not_founds}")
print(f"Number of prompt: {len(prompt_test)}")
print(f"Number of generations: {len(recommend_test)}")
print(f"Number of candidate apps: {len(candidate_books)}")
print(f"Number of true candidate indexes: {len(true_candidate_indexes)}")

Could not find 68
Number of prompt: 1314
Number of generations: 1314
Number of candidate apps: 1314
Number of true candidate indexes: 1314


In [23]:
prompt_test[10]

'computer: Hey! What brings you by today?\nhuman: I’m searching for liquid eyeliner. Can you suggest something?\ncomputer: Could you share some more details about it?\nhuman: I\'m expecting it to be long-lasting.\ncomputer: Do you have a favorite store or brand?\nhuman: I am interested in items from LuminaGlow.\ncomputer: Are you interested in popular products?\nhuman: Popular items do not catch my eye.\ncomputer: What type of product quality do you have in mind?\nhuman: Product quality is absolutely important.\ncomputer: I suggest trying out "INGRINC Lying Silkworm Eyeliner Pen Shiny Matte Rotatable Highlighter Makeup Eyeshadow Pencil Brighten Bottom-Eyelid Silkworm Makeup Eyeshadow Eyeliner Pen for Women Girls (5)" based on your interests.\nhuman: I\'ve heard that some eyeliner pens can be quite challenging to apply. Is this one easy to use?\ncomputer: According to a number of user reviews, this eyeliner pen can indeed be challenging to apply effectively.\nhuman: Hmm, I am not sure. 

In [24]:
recommend_test[10]

'luminaglow liquid dual eyeliner waterproof vegan black smudge proof cruelty-free'

In [22]:
candidate_books[10]

['lucyhairwig synthetic lace front wig ombre blonde heat resistant fiber hair wigs body wave glueless lace front blonde wigs for women',
 'temotei 2 in 1 delicate shampoo 13.5oz',
 'fragrantshare makeup brushes professional organizer foundation brush for liquid makeup travel 9pcs sets - odorless fiber hair - light blue',
 '240 pieces extra long ballerina press on nails glitter acrylic coffin false nails full cover fake nails tips with a crystal nail file for nail art salon diy decoration (glitter)',
 'mifengda 5 pieces crystal rhinestones hair barrettes fashion girls small crystal hairpins flower butterfly french clip vintage hair clips bridal wedding hairpins jewelry accessory for women or girls',
 'bvendano 10×10gm face paint kit one stroke split cake water based palette with 3 professional brushes body paint makeup birthday party halloween',
 'brazilian body wave human hair lace frontal wigs 13x4x1 t part lace front wigs for black women natural hairline 18 inches',
 'jiaufmi makeup 

In [25]:
model = AutoModelForSeq2SeqLM.from_pretrained(pretrained_model_name_or_path = "/u/spa-d4/grad/mfe261/Projects/MobileConvRec/models/new_models/amazon_beauty/T5_recommender")
model.eval()
model = model.to('cuda')
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", additional_special_tokens=["computer:", "human:"])
model.resize_token_embeddings(len(tokenizer))
IGNORE_INDEX = -100



In [33]:
def chunk(list_of_elements, batch_size): # using this chunk function, we can split our data to multiple batches
  for i in range(0, len(list_of_elements), batch_size):
    yield list_of_elements[i:i+batch_size]

def evaluate_recommender(prompt_test, recommend_test, model, tokenizer, batch_size=8, threshold=70):
  prompt_batches = list(chunk(prompt_test, batch_size))
  generation_batches = list(chunk(recommend_test, batch_size))

  correctly_predicted = []
  for prompt_batch, generation_batch in tqdm(zip(prompt_batches, generation_batches), total = len(generation_batches)):

    inputs = tokenizer(prompt_batch, max_length=1024, truncation=True, padding="max_length", return_tensors="pt") 

    generations_predicted = model.generate(input_ids=inputs["input_ids"].to('cuda'), attention_mask=inputs["attention_mask"].to('cuda'),
                            max_new_tokens=32,
                            num_beams=8,
                            eos_token_id=tokenizer.eos_token_id,
                            pad_token_id=tokenizer.pad_token_id,
                            bos_token_id=tokenizer.bos_token_id) # length_penalty=0.8, Set length_penalty to values < 1.0 in order to encourage the model to generate shorter sequences, to a value > 1.0 in order to encourage the model to produce longer sequences.

    decoded_generations = [tokenizer.decode(generation, skip_special_tokens=True, clean_up_tokenization_spaces=True) for generation in generations_predicted]
    generation_batch = [generation for generation in generation_batch]
    
    correctly_predicted.extend([1 if fuzz.ratio(predicted, ground_truth) > threshold else 0 for predicted, ground_truth in zip(decoded_generations, generation_batch)])

  return correctly_predicted

In [35]:
correctly_predicted = evaluate_recommender(prompt_test, recommend_test, model, tokenizer, batch_size=8, threshold=70)
success_rate = sum(correctly_predicted) / len(correctly_predicted)
print("success_rate: ", success_rate)

100%|██████████| 165/165 [03:24<00:00,  1.24s/it]

success_rate:  0.03881278538812785





In [28]:
def chunk(list_of_elements, batch_size): # using this chunk function, we can split our data to multiple batches
  for i in range(0, len(list_of_elements), batch_size):
    yield list_of_elements[i:i+batch_size]
    
def convert_to_sublists(numbers, sublist_size):
    return [numbers[i:i+sublist_size] for i in range(0, len(numbers), sublist_size)]

def recommender_rank(prompts, candidate_apps, model, tokenizer, batch_size=8):
  model.eval()
  encoder_max_length = 1024
  decoder_max_length = 32
  prompts_tokenized = tokenizer(prompts, max_length=encoder_max_length, truncation=True, padding="max_length", return_tensors="pt")
  
  input_ids_decoder = []
  attention_mask_decoder = []
  input_ids_encoder = []
  attention_mask_encoder  = []
  for index, candidate_app_elements in enumerate(candidate_apps):
    candidate_app_elements = [tokenizer.pad_token+element for element in candidate_app_elements] # adding pad token to the beginning of each candidate app
    candidate_apps_tokenized = tokenizer(candidate_app_elements, max_length=decoder_max_length, truncation=True, padding="max_length", return_tensors="pt")
    for candidate_app_index in range(len(candidate_app_elements)):
      input_ids_decoder.append(candidate_apps_tokenized["input_ids"][candidate_app_index])
      attention_mask_decoder.append(candidate_apps_tokenized["attention_mask"][candidate_app_index])
      input_ids_encoder.append(prompts_tokenized["input_ids"][index])
      attention_mask_encoder.append(prompts_tokenized["attention_mask"][index])
  
  input_ids_encoder_batches = list(chunk(input_ids_encoder, batch_size))
  attention_mask_encoder_batches = list(chunk(attention_mask_encoder, batch_size))
  input_ids_decoder_batches = list(chunk(input_ids_decoder, batch_size))
  attention_mask_decoder_batches = list(chunk(attention_mask_decoder, batch_size))
  

  scores = []
  for input_ids_encoder_batch, attention_mask_encoder_batch, input_ids_decoder_batch, attention_mask_decoder_batch in tqdm(zip(input_ids_encoder_batches, attention_mask_encoder_batches, input_ids_decoder_batches, attention_mask_decoder_batches), total = len(input_ids_encoder_batches)):
    decoder_input_ids = torch.stack(input_ids_decoder_batch).to("cuda")
    decoder_attention_mask = torch.stack(attention_mask_decoder_batch).to("cuda")
    input_ids = torch.stack(input_ids_encoder_batch).to("cuda")
    attention_mask = torch.stack(attention_mask_encoder_batch).to("cuda")
    with torch.no_grad():
      model_output = model(decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask, 
                           input_ids=input_ids, attention_mask=attention_mask)
    
    logprobs = F.log_softmax(model_output["logits"], dim=-1)[:, :-1, :] # remove the eos token
    output_tokens = decoder_input_ids[:, 1:] # remove the bos token
        
    tokens_logprobs = torch.gather(logprobs, 2, output_tokens[:, :, None]).squeeze(-1).to(torch.float32)
        
    mask = torch.ones(tokens_logprobs.shape, dtype=torch.bool, device="cuda")
    for i, _output in enumerate(output_tokens):
      for j, _token in enumerate(_output):
        if _token == tokenizer.pad_token_id:
          mask[i, j] = False
              
    score = (tokens_logprobs * mask).sum(-1) / mask.sum(-1)
    scores.extend(score.to('cpu').tolist())
    
  # batch_input_representations = torch.cat(batch_input_representations)
  
  scores = convert_to_sublists(scores, len(candidate_apps[0]))
  
  return scores

In [29]:
scores = recommender_rank(prompt_test, candidate_books, model, tokenizer, batch_size=8)

100%|██████████| 4107/4107 [15:25<00:00,  4.44it/s]


Sampled Candidates

In [30]:
[top_k_accuracy_score(true_candidate_indexes, scores, k=k) for k in range(1, 11)]

[np.float64(0.4368340943683409),
 np.float64(0.563165905631659),
 np.float64(0.6377473363774734),
 np.float64(0.6948249619482496),
 np.float64(0.7404870624048706),
 np.float64(0.7663622526636226),
 np.float64(0.7945205479452054),
 np.float64(0.8249619482496194),
 np.float64(0.8462709284627092),
 np.float64(0.8645357686453576)]

In [31]:
true_relevance = [[1 if item == index else 0 for item in range(len(candidate_books[0]))] for index in true_candidate_indexes]

In [32]:
[ndcg_score(true_relevance, scores, k=k) for k in range(1, 11)]

[np.float64(0.4368340943683409),
 np.float64(0.5165405929169422),
 np.float64(0.5538313082898493),
 np.float64(0.578413303613673),
 np.float64(0.5960778153595425),
 np.float64(0.6052947440974975),
 np.float64(0.6146808425246918),
 np.float64(0.6242840351209088),
 np.float64(0.6306986773420591),
 np.float64(0.6359784001971803)]

Similar Candidates

In [None]:
[top_k_accuracy_score(true_candidate_indexes, scores, k=k) for k in range(1, 11)]

In [44]:
true_relevance = [[1 if item == index else 0 for item in range(len(candidate_books[0]))] for index in true_candidate_indexes]

In [None]:
[ndcg_score(true_relevance, scores, k=k) for k in range(1, 11)]