In [1]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


In [2]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Trainer, TrainingArguments
import torch
import os
import pandas as pd
from tqdm import tqdm
from datasets import Dataset
from fuzzywuzzy import fuzz
import evaluate
import csv
import numpy as np
import torch.nn.functional as F
from sklearn.metrics import top_k_accuracy_score, ndcg_score

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def is_approximate_substring(substring, string, threshold=70):
    for i in range(len(string) - len(substring) + 1):
        window = string[i:i+len(substring)]
        similarity_ratio = fuzz.ratio(substring, window)
        if similarity_ratio >= threshold:
            return True
    return False

In [4]:
conversation_training_path = "/u/amo-d1/grad/sma340/project/llmrank/MobileConvRec-Main/dialogs/training"

user_id = []
previous_interactions = []
recommended_app_name = []
turns = []
recommend_indexes = []


# List all files in the folder
files = os.listdir(conversation_training_path)

# Loop through each file in the directory
for filename in tqdm(files):
    # Construct the full file path
    file_path = os.path.join(conversation_training_path, filename)

    # Check if the current item is a file
    if os.path.isfile(file_path):
        # Open and read the file
        with open(file_path, 'r') as file:
            # Read the content of the file
            file_content = file.read().lower()
            
            # finding the user id
            index_1 = file_content.find("user's previous interactions")
            user_id.append(file_content[9:index_1].rstrip('\n'))
            
            # finding the User's Previous Interactions
            index_2 = file_content.find("recommended app name:")
            previous_interactions_arr = file_content[index_1+29:index_2].rstrip('\n').split("app name:")
            previous_interactions_arr_filtered = []
            for previous_interaction in previous_interactions_arr[1:]:
                previous_interactions_arr_filtered.append(previous_interaction[:previous_interaction.find(" | ")])
            if len(previous_interactions_arr_filtered) > 0:
                previous_interactions.append(",".join(previous_interactions_arr_filtered))
            else:
                previous_interactions.append(None)
            
            # finding recommended app name
            index_3 = file_content[index_2:].find("package name")
            recommended = file_content[index_2+22:index_2+index_3-3].rstrip('\n')
            recommended_app_name.append(recommended)
            
            # finding each turns
            dialog_turns = []
            dialog_index = 0
            COMPUTER_index = file_content.find("computer:")
            file_content = file_content[COMPUTER_index:]
            found_recommender = False
            while True:
                HUMAN_index = file_content.find("human:")
                if HUMAN_index == -1:
                    break
                turn = file_content[:HUMAN_index].rstrip('\n') # computer dialog
                if (recommended in turn) and not found_recommender:
                    recommend_indexes.append(dialog_index)
                    found_recommender = True
                dialog_turns.append(turn)
                dialog_index +=1
                file_content = file_content[HUMAN_index:]
                
                COMPUTER_index = file_content.find("computer:")
                turn = file_content[:COMPUTER_index].rstrip('\n') # human dialog
                dialog_turns.append(turn)
                dialog_index +=1
                file_content = file_content[COMPUTER_index:]
                
            if not found_recommender: # approximately finding the recommender turn
                for i, dialog_turn in enumerate(dialog_turns):
                    if is_approximate_substring(recommended, dialog_turn):
                        recommend_indexes.append(i)
                        found_recommender = True
                        break
                    
            if not found_recommender:
                recommend_indexes.append(-1)
                        
            turns.append(dialog_turns)

print(len(user_id))
print(len(previous_interactions))
print(len(recommended_app_name))
print(len(recommend_indexes))
df_recommender_train = pd.DataFrame({"user_id": user_id, "previous_interactions":previous_interactions, "recommended_app_name":recommended_app_name, "turns": turns, "recommend_indexes":recommend_indexes})
print(f"\nnumber of rows: {len(df_recommender_train)}")

100%|██████████| 8720/8720 [00:28<00:00, 300.74it/s]

8720
8720
8720
8720

number of rows: 8720





In [5]:
df_recommender_train = df_recommender_train[(df_recommender_train["recommend_indexes"] != -1) & (df_recommender_train["turns"].apply(lambda x: len(x) > 0))]

In [13]:
apps_training_path = "/u/amo-d1/grad/sma340/project/llmrank/MobileConvRec-Main/data/master_app_data_V1_true.csv"

all_apps = []
with open(apps_training_path, 'r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        all_apps.append(row["app_name"].lower())

In [14]:
def fix_recommended_apps_names(row):
    if row["recommended_app_name"] not in all_apps:
        for app in all_apps:
            if fuzz.ratio(row["recommended_app_name"], app) > 80:
                return app
        return "uno!™"
    else:
        return row["recommended_app_name"]

df_recommender_train['recommended_app_name'] = df_recommender_train.apply(fix_recommended_apps_names, axis=1)

NameError: name 'df_recommender_train' is not defined

In [None]:
cols = ['app_name','app_type','app_category']
df_app = pd.read_csv(apps_training_path, usecols=cols)
df_apps = df_app.applymap(lambda x: x.lower() if isinstance(x, str) else x)
candidate_apps = []
def filter_candidate_apps(rec_app_name):
    
    df_rec_app=df_apps[df_apps['app_name'] == rec_app_name]
    recommended_app_type = df_rec_app['app_type'].iloc[0]
    recommended_app_category = df_rec_app['app_category'].iloc[0]
    category_filtered = df_apps[df_apps['app_category'] == recommended_app_category]
    df_apps_filter_1 = df_apps[df_apps['app_category'] != recommended_app_category]
    #print(recommended_app_type, recommended_app_category)
    #print(len(category_filtered))
    
    max_ratio = 0
    best_match = None
    for index, row in category_filtered.iterrows():
        if len(candidate_apps) >= 25:
            break
        if fuzz.ratio(row["app_type"], recommended_app_type) > 30:
            app = row['app_name']
            
            candidate_apps.append(app)
    
    for index, row in df_apps_filter_1.iterrows():
        if len(candidate_apps) >= 25:
            break
        ratio = fuzz.ratio(row["app_type"], recommended_app_type)
        if ratio > max_ratio:
            max_ratio = ratio
            app = row['app_name']
            best_match = app
            candidate_apps.append(best_match)
    
    apps_to_remove_2 = set(candidate_apps)
    df_apps_filter_2 = df_apps[~df_apps['app_name'].isin(apps_to_remove_2)]

    for index, row in df_apps_filter_1.iterrows():
        if len(candidate_apps) >= 25:
            break
        app = row['app_name']
        candidate_apps.append(app)

    
     
    return candidate_apps

  df_apps = df_app.applymap(lambda x: x.lower() if isinstance(x, str) else x)


In [9]:
def candidate_creator(row):
    selected_values = np.random.choice(np.setdiff1d(filter_candidate_apps(row["recommended_app_name"]), [row["recommended_app_name"]]), 24, replace=False)
    random_position = np.random.randint(0, len(selected_values) + 1)
    
    return np.insert(selected_values, random_position, row["recommended_app_name"]) 

df_recommender_train['candidate_apps'] = df_recommender_train.apply(candidate_creator, axis=1)

In [10]:
df_recommender_train["recommended_app_name"][874] 

'six pack in 30 days'

In [11]:
df_recommender_train["turns"][874][df_recommender_train["recommend_indexes"][874]]

'computer: alright, then i recommend giving the six pack in 30 days app a try.'

In [12]:
df_recommender_train["candidate_apps"][874]

array(['cut the rope', 'sweet escapes: design a bakery with puzzle games',
       'bad piggies', 'fishdom', 'dop 4: draw one part',
       'math | riddles and puzzles maths games',
       'sudoku - classic sudoku puzzle', 'lily’s garden', 'flow free',
       'magic jigsaw puzzles - game hd', 'drop the number® : merge game',
       'six pack in 30 days', 'lemmings', 'sweet fruit candy',
       'bubble shooter: panda pop!', 'the room: old sins',
       'antistress - relaxation toys', 'bubble shooter rainbow',
       'my home design - modern city', 'disney frozen free fall games',
       'candy crush jelly saga', 'bad piggies hd',
       'angry birds dream blast', 'pokémon café remix',
       'human: fall flat'], dtype='<U48')

In [13]:
conversation_validation_path = "/u/amo-d1/grad/sma340/project/llmrank/MobileConvRec-Main/dialogs/validation"

user_id = []
previous_interactions = []
recommended_app_name = []
turns = []
recommend_indexes = []


# List all files in the folder
files = os.listdir(conversation_validation_path)

# Loop through each file in the directory
for filename in tqdm(files):
    # Construct the full file path
    file_path = os.path.join(conversation_validation_path, filename)

    # Check if the current item is a file
    if os.path.isfile(file_path):
        # Open and read the file
        with open(file_path, 'r') as file:
            # Read the content of the file
            file_content = file.read().lower()
            
            # finding the user id
            index_1 = file_content.find("user's previous interactions")
            user_id.append(file_content[9:index_1].rstrip('\n'))
            
            # finding the User's Previous Interactions
            index_2 = file_content.find("recommended app name:")
            previous_interactions_arr = file_content[index_1+29:index_2].rstrip('\n').split("app name:")
            previous_interactions_arr_filtered = []
            for previous_interaction in previous_interactions_arr[1:]:
                previous_interactions_arr_filtered.append(previous_interaction[:previous_interaction.find(" | ")])
            if len(previous_interactions_arr_filtered) > 0:
                previous_interactions.append(",".join(previous_interactions_arr_filtered))
            else:
                previous_interactions.append(None)
            
            # finding recommended app name
            index_3 = file_content[index_2:].find("package name")
            recommended = file_content[index_2+22:index_2+index_3-3].rstrip('\n')
            recommended_app_name.append(recommended)
            
            # finding each turns
            dialog_turns = []
            dialog_index = 0
            COMPUTER_index = file_content.find("computer:")
            file_content = file_content[COMPUTER_index:]
            found_recommender = False
            while True:
                HUMAN_index = file_content.find("human:")
                if HUMAN_index == -1:
                    break
                turn = file_content[:HUMAN_index].rstrip('\n') # computer dialog
                if (recommended in turn) and not found_recommender:
                    recommend_indexes.append(dialog_index)
                    found_recommender = True
                dialog_turns.append(turn)
                dialog_index +=1
                file_content = file_content[HUMAN_index:]
                
                COMPUTER_index = file_content.find("computer:")
                turn = file_content[:COMPUTER_index].rstrip('\n') # human dialog
                dialog_turns.append(turn)
                dialog_index +=1
                file_content = file_content[COMPUTER_index:]
                
            if not found_recommender: # approximately finding the recommender turn
                for i, dialog_turn in enumerate(dialog_turns):
                    if is_approximate_substring(recommended, dialog_turn):
                        recommend_indexes.append(i)
                        found_recommender = True
                        break
                    
            if not found_recommender:
                recommend_indexes.append(-1)
                        
            turns.append(dialog_turns)

print(len(user_id))
print(len(previous_interactions))
print(len(recommended_app_name))
print(len(recommend_indexes))
df_recommender_validation = pd.DataFrame({"user_id": user_id, "previous_interactions":previous_interactions, "recommended_app_name":recommended_app_name, "turns": turns, "recommend_indexes":recommend_indexes})
print(f"\nnumber of rows: {len(df_recommender_validation)}")

100%|██████████| 1285/1285 [00:04<00:00, 295.82it/s]

1285
1285
1285
1285

number of rows: 1285





In [14]:
df_recommender_validation = df_recommender_validation[(df_recommender_validation["recommend_indexes"] != -1) & (df_recommender_validation["turns"].apply(lambda x: len(x) > 0))]

In [15]:
def fix_recommended_apps_names(row):
    if row["recommended_app_name"] not in all_apps:
        for app in all_apps:
            if fuzz.ratio(row["recommended_app_name"], app) > 80:
                return app
        return "uno!™"
    else:
        return row["recommended_app_name"]

df_recommender_validation['recommended_app_name'] = df_recommender_validation.apply(fix_recommended_apps_names, axis=1)

In [16]:
def candidate_creator(row):
    selected_values = np.random.choice(np.setdiff1d(filter_candidate_apps(row["recommended_app_name"]), [row["recommended_app_name"]]), 24, replace=False)
    random_position = np.random.randint(0, len(selected_values) + 1)
    
    return np.insert(selected_values, random_position, row["recommended_app_name"]) 

df_recommender_validation['candidate_apps'] = df_recommender_validation.apply(candidate_creator, axis=1)

In [17]:
model = AutoModelForSeq2SeqLM.from_pretrained(pretrained_model_name_or_path = "google/flan-t5-base")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", additional_special_tokens=["computer:", "human:", "candidate_apps:"])
model.resize_token_embeddings(len(tokenizer))
IGNORE_INDEX = -100

In [18]:
prompt_validation = []
recommend_validation = []
for _, row in df_recommender_validation.iterrows():
    prompt = ""
    for index, turn in enumerate(row["turns"]):
        if index < row["recommend_indexes"]:
            prompt += turn + "\n"
        elif index == row["recommend_indexes"]:
            prompt += "candidate_apps: "
            for app in row["candidate_apps"]:
                prompt += "'" + app + "', "
            prompt += "\n"
            prompt += "computer: I would recommend the "
            prompt_validation.append(prompt)
            recommend_validation.append(row["recommended_app_name"] + " app.")
            break
        else:
            print("error!!")
            
            
prompt_encodings = tokenizer(prompt_validation, padding='max_length', max_length=1024, truncation=True, return_tensors='pt')
recommend_encodings = tokenizer(recommend_validation, padding='max_length', max_length=128, truncation=True, return_tensors='pt')

labels = recommend_encodings['input_ids']
labels[labels == tokenizer.pad_token_id] = IGNORE_INDEX

dataset = {
    'input_ids': prompt_encodings['input_ids'],
    'attention_mask': prompt_encodings['attention_mask'],
    'labels': labels,
}
dataset_validation = Dataset.from_dict(dataset)

In [19]:
prompt_train = []
recommend_train = []
for _, row in df_recommender_train.iterrows():
    prompt = ""
    for index, turn in enumerate(row["turns"]):
        if index < row["recommend_indexes"]:
            prompt += turn + "\n"
        elif index == row["recommend_indexes"]:
            prompt += "candidate_apps: "
            for app in row["candidate_apps"]:
                prompt += "'" + app + "', "
            prompt += "\n"
            prompt += "computer: I would recommend the "
            prompt_train.append(prompt)
            recommend_train.append(row["recommended_app_name"] + " app.")
            break
        else:
            print("error!!")
            
            
prompt_encodings = tokenizer(prompt_train, padding='max_length', max_length=1024, truncation=True, return_tensors='pt')
recommend_encodings = tokenizer(recommend_train, padding='max_length', max_length=128, truncation=True, return_tensors='pt')

labels = recommend_encodings['input_ids']
labels[labels == tokenizer.pad_token_id] = IGNORE_INDEX

dataset = {
    'input_ids': prompt_encodings['input_ids'],
    'attention_mask': prompt_encodings['attention_mask'],
    'labels': labels,
}
dataset_train = Dataset.from_dict(dataset)

In [20]:
recommend_encodings["input_ids"][10]

tensor([24435, 12245,   277,  1120,     5,     1,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
         -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100])

In [None]:
prompt_train

In [None]:
recommend_train

In [21]:
def data_collator(batch):
    input_ids, attention_mask, labels,  = [], [], []
    for sample in batch:
        input_ids.append(sample['input_ids'])
        attention_mask.append(sample['attention_mask'])
        labels.append(sample['labels'])
    max_encoder_len = max(sum(x) for x in attention_mask)
    max_decoder_len = max(sum([0 if item == IGNORE_INDEX else 1 for item in x]) for x in labels)
    return {
        'input_ids': torch.tensor(input_ids)[:, :max_encoder_len],
        'attention_mask': torch.tensor(attention_mask)[:, :max_encoder_len],
        'labels': torch.tensor(labels)[:, :max_decoder_len]
    }

In [22]:
training_args = TrainingArguments(
    output_dir="/u/amo-d1/grad/sma340/project/llmrank/MobileConvRec-Main/metrics/outputs_T5_no_interations_with_candidate_apps",
    num_train_epochs=5,
    # logging_steps=500,
    # logging_dir=self.cfg.logging_dir,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    save_strategy="steps",
    evaluation_strategy="steps",
    save_steps=1000,#self.cfg.save_steps,
    eval_steps=1000, #self.cfg.eval_steps,
    save_total_limit=3,
    gradient_accumulation_steps=3, #gradient_accumulation_steps,
    per_device_train_batch_size=4, #train_batch_size,
    per_device_eval_batch_size=4, #self.cfg.eval_batch_size,
    warmup_steps=100,
    weight_decay=0.01,
    # dataloader_drop_last=True,
    disable_tqdm=False,
    push_to_hub=False
)

trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset_train,
        eval_dataset=dataset_validation,
        data_collator=data_collator,
    )

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [23]:
trainer.train()
trainer.save_model()

Step,Training Loss,Validation Loss
1000,0.0381,0.035773
2000,0.0199,0.020562
3000,0.0139,0.022176


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


Test the model

In [5]:
torch.cuda.empty_cache()

In [6]:
def is_approximate_substring(substring, string, threshold=70):
    for i in range(len(string) - len(substring) + 1):
        window = string[i:i+len(substring)]
        similarity_ratio = fuzz.ratio(substring, window)
        if similarity_ratio >= threshold:
            return True
    return False

In [7]:
conversation_test_path = "/u/amo-d1/grad/sma340/project/llmrank/MobileConvRec-Main/dialogs/testing"

user_id = []
previous_interactions = []
recommended_app_name = []
turns = []
recommend_indexes = []


# List all files in the folder
files = os.listdir(conversation_test_path)

# Loop through each file in the directory
for filename in tqdm(files):
    # Construct the full file path
    file_path = os.path.join(conversation_test_path, filename)

    # Check if the current item is a file
    if os.path.isfile(file_path):
        # Open and read the file
        with open(file_path, 'r') as file:
            # Read the content of the file
            file_content = file.read().lower()
            
            # finding the user id
            index_1 = file_content.find("user's previous interactions")
            user_id.append(file_content[9:index_1].rstrip('\n'))
            
            # finding the User's Previous Interactions
            index_2 = file_content.find("recommended app name:")
            previous_interactions_arr = file_content[index_1+29:index_2].rstrip('\n').split("app name:")
            previous_interactions_arr_filtered = []
            for previous_interaction in previous_interactions_arr[1:]:
                previous_interactions_arr_filtered.append(previous_interaction[:previous_interaction.find(" | ")])
            if len(previous_interactions_arr_filtered) > 0:
                previous_interactions.append(",".join(previous_interactions_arr_filtered))
            else:
                previous_interactions.append(None)
            
            # finding recommended app name
            index_3 = file_content[index_2:].find("package name")
            recommended = file_content[index_2+22:index_2+index_3-3].rstrip('\n')
            recommended_app_name.append(recommended)
            
            # finding each turns
            dialog_turns = []
            dialog_index = 0
            COMPUTER_index = file_content.find("computer:")
            file_content = file_content[COMPUTER_index:]
            found_recommender = False
            while True:
                HUMAN_index = file_content.find("human:")
                if HUMAN_index == -1:
                    break
                turn = file_content[:HUMAN_index].rstrip('\n') # computer dialog
                if (recommended in turn) and not found_recommender:
                    recommend_indexes.append(dialog_index)
                    found_recommender = True
                dialog_turns.append(turn)
                dialog_index +=1
                file_content = file_content[HUMAN_index:]
                
                COMPUTER_index = file_content.find("computer:")
                turn = file_content[:COMPUTER_index].rstrip('\n') # human dialog
                dialog_turns.append(turn)
                dialog_index +=1
                file_content = file_content[COMPUTER_index:]
                
            if not found_recommender: # approximately finding the recommender turn
                for i, dialog_turn in enumerate(dialog_turns):
                    if is_approximate_substring(recommended, dialog_turn):
                        recommend_indexes.append(i)
                        found_recommender = True
                        break
                    
            if not found_recommender:
                recommend_indexes.append(-1)
                        
            turns.append(dialog_turns)

print(len(user_id))
print(len(previous_interactions))
print(len(recommended_app_name))
print(len(recommend_indexes))
df_recommender_test = pd.DataFrame({"user_id": user_id, "previous_interactions":previous_interactions, "recommended_app_name":recommended_app_name, "turns": turns, "recommend_indexes":recommend_indexes})
print(f"\nnumber of rows: {len(df_recommender_test)}")

100%|██████████| 2557/2557 [00:06<00:00, 394.47it/s]

2557
2557
2557
2557

number of rows: 2557





In [8]:
df_recommender_test = df_recommender_test[(df_recommender_test["recommend_indexes"] != -1) & (df_recommender_test["turns"].apply(lambda x: len(x) > 0))]

In [9]:
apps_training_path = "/u/amo-d1/grad/sma340/project/llmrank/MobileConvRec-Main/data/master_app_data_V1_true.csv"

all_apps = []
with open(apps_training_path, 'r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        all_apps.append(row["app_name"].lower())

In [10]:
def fix_recommended_apps_names(row):
    if row["recommended_app_name"] not in all_apps:
        for app in all_apps:
            if fuzz.ratio(row["recommended_app_name"], app) > 80:
                return app
        return "uno!™"
    else:
        return row["recommended_app_name"]

df_recommender_test['recommended_app_name'] = df_recommender_test.apply(fix_recommended_apps_names, axis=1)

In [15]:
cols = ['app_name','app_type','app_category']
df_app = pd.read_csv(apps_training_path, usecols=cols)
df_apps = df_app.map(lambda x: x.lower() if isinstance(x, str) else x)
candidate_apps = []
def filter_candidate_apps(rec_app_name):
    
    df_rec_app=df_apps[df_apps['app_name'] == rec_app_name]
    recommended_app_type = df_rec_app['app_type'].iloc[0]
    recommended_app_category = df_rec_app['app_category'].iloc[0]
    category_filtered = df_apps[df_apps['app_category'] == recommended_app_category]
    df_apps_filter_1 = df_apps[df_apps['app_category'] != recommended_app_category]
    #print(recommended_app_type, recommended_app_category)
    #print(len(category_filtered))
    
    max_ratio = 0
    best_match = None
    for index, row in category_filtered.iterrows():
        if len(candidate_apps) >= 25:
            break
        if fuzz.ratio(row["app_type"], recommended_app_type) > 30:
            app = row['app_name']
            
            candidate_apps.append(app)
    
    for index, row in df_apps_filter_1.iterrows():
        if len(candidate_apps) >= 25:
            break
        ratio = fuzz.ratio(row["app_type"], recommended_app_type)
        if ratio > max_ratio:
            max_ratio = ratio
            app = row['app_name']
            best_match = app
            candidate_apps.append(best_match)
    
    apps_to_remove_2 = set(candidate_apps)
    df_apps_filter_2 = df_apps[~df_apps['app_name'].isin(apps_to_remove_2)]

    for index, row in df_apps_filter_1.iterrows():
        if len(candidate_apps) >= 25:
            break
        app = row['app_name']
        candidate_apps.append(app)

    
     
    return candidate_apps

  df_apps = df_app.applymap(lambda x: x.lower() if isinstance(x, str) else x)


In [16]:
def candidate_creator(row):
    np.random.seed(row.name)
    selected_values = np.random.choice(np.setdiff1d(filter_candidate_apps(row["recommended_app_name"]), [row["recommended_app_name"]]), 24, replace=False)
    random_position = np.random.randint(0, len(selected_values) + 1)
    
    return np.insert(selected_values, random_position, row["recommended_app_name"]) 

df_recommender_test['candidate_apps'] = df_recommender_test.apply(lambda row: candidate_creator(row), axis=1)

In [17]:
prompt_test = []
recommend_test = []
candidate_apps = []
true_candidate_index = []
for _, row in df_recommender_test.iterrows():
    # creating candidate apps
    candidates = []
    for index, candidate_app in enumerate(row["candidate_apps"].tolist()):
        candidates.append(candidate_app + " app.")
        if candidate_app == row["recommended_app_name"]:
            true_candidate_index.append(index)
    candidate_apps.append(candidates)
    prompt = ""
    for index, turn in enumerate(row["turns"]):
        if index < row["recommend_indexes"]:
            prompt += turn + "\n"
        elif index == row["recommend_indexes"]:
            prompt += "candidate_apps: "
            for app in row["candidate_apps"]:
                prompt += "'" + app + "', "
            prompt += "\n"
            prompt += "computer: I would recommend the "
            prompt_test.append(prompt)
            recommend_test.append(row["recommended_app_name"] + " app.")
            break
        else:
            print("error!!")

In [18]:
model = AutoModelForSeq2SeqLM.from_pretrained(pretrained_model_name_or_path = "/u/amo-d1/grad/sma340/project/llmrank/MobileConvRec-Main/metrics/outputs_T5_no_interations_with_candidate_apps")
model.eval()
model = model.to('cuda')
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", additional_special_tokens=["computer:", "human:",  "candidate_apps:"])
model.resize_token_embeddings(len(tokenizer))
IGNORE_INDEX = -100

In [None]:
# bleu = evaluate.load("bleu")

In [19]:
def chunk(list_of_elements, batch_size): # using this chunk function, we can split our data to multiple batches
  for i in range(0, len(list_of_elements), batch_size):
    yield list_of_elements[i:i+batch_size]

def evaluate_recommender(prompt_test, recommend_test, model, tokenizer, batch_size=8, threshold=70):
  prompt_batches = list(chunk(prompt_test, batch_size))
  generation_batches = list(chunk(recommend_test, batch_size))

  correctly_predicted = []
  for prompt_batch, generation_batch in tqdm(zip(prompt_batches, generation_batches), total = len(generation_batches)):

    inputs = tokenizer(prompt_batch, max_length=1024, truncation=True, padding="max_length", return_tensors="pt") 

    generations_predicted = model.generate(input_ids=inputs["input_ids"].to('cuda'), attention_mask=inputs["attention_mask"].to('cuda'),
                            max_new_tokens=32,
                            num_beams=8,
                            eos_token_id=tokenizer.eos_token_id,
                            pad_token_id=tokenizer.pad_token_id,
                            bos_token_id=tokenizer.bos_token_id) # length_penalty=0.8, Set length_penalty to values < 1.0 in order to encourage the model to generate shorter sequences, to a value > 1.0 in order to encourage the model to produce longer sequences.

    decoded_generations = [tokenizer.decode(generation, skip_special_tokens=True, clean_up_tokenization_spaces=True).replace(" app.", "")  for generation in generations_predicted]
    generation_batch = [generation.replace(" app.", "") for generation in generation_batch]
    
    correctly_predicted.extend([1 if fuzz.ratio(predicted, ground_truth) > threshold else 0 for predicted, ground_truth in zip(decoded_generations, generation_batch)])
    print("\n")

  return correctly_predicted

In [20]:
correctly_predicted = evaluate_recommender(prompt_test, recommend_test, model, tokenizer, batch_size=4)
success_rate = sum(correctly_predicted) / len(correctly_predicted)
print("success_rate: ", success_rate)

  0%|          | 1/634 [00:00<09:06,  1.16it/s]





  0%|          | 2/634 [00:01<07:10,  1.47it/s]





  0%|          | 3/634 [00:02<06:42,  1.57it/s]





  1%|          | 4/634 [00:02<06:30,  1.61it/s]





  1%|          | 5/634 [00:03<06:46,  1.55it/s]





  1%|          | 6/634 [00:04<07:01,  1.49it/s]





  1%|          | 7/634 [00:04<06:37,  1.58it/s]





  1%|▏         | 8/634 [00:05<06:21,  1.64it/s]





  1%|▏         | 9/634 [00:05<05:58,  1.74it/s]





  2%|▏         | 10/634 [00:06<05:50,  1.78it/s]





  2%|▏         | 11/634 [00:06<05:47,  1.79it/s]





  2%|▏         | 12/634 [00:07<05:52,  1.76it/s]





  2%|▏         | 13/634 [00:07<05:54,  1.75it/s]





  2%|▏         | 14/634 [00:08<05:58,  1.73it/s]





  2%|▏         | 15/634 [00:09<05:58,  1.73it/s]





  3%|▎         | 16/634 [00:09<05:52,  1.76it/s]





  3%|▎         | 17/634 [00:10<05:52,  1.75it/s]





  3%|▎         | 18/634 [00:10<05:52,  1.75it/s]





  3%|▎         | 19/634 [00:11<05:48,  1.76it/s]





  3%|▎         | 20/634 [00:12<06:18,  1.62it/s]





  3%|▎         | 21/634 [00:12<06:34,  1.55it/s]





  3%|▎         | 22/634 [00:13<06:15,  1.63it/s]





  4%|▎         | 23/634 [00:13<06:20,  1.60it/s]





  4%|▍         | 24/634 [00:14<06:10,  1.65it/s]





  4%|▍         | 25/634 [00:15<05:59,  1.69it/s]





  4%|▍         | 26/634 [00:15<05:54,  1.71it/s]





  4%|▍         | 27/634 [00:16<06:12,  1.63it/s]





  4%|▍         | 28/634 [00:16<06:08,  1.65it/s]





  5%|▍         | 29/634 [00:17<06:24,  1.57it/s]





  5%|▍         | 30/634 [00:18<06:18,  1.60it/s]





  5%|▍         | 31/634 [00:18<06:02,  1.66it/s]





  5%|▌         | 32/634 [00:19<06:18,  1.59it/s]





  5%|▌         | 33/634 [00:19<06:04,  1.65it/s]





  5%|▌         | 34/634 [00:20<05:57,  1.68it/s]





  6%|▌         | 35/634 [00:21<05:50,  1.71it/s]





  6%|▌         | 36/634 [00:21<06:14,  1.60it/s]





  6%|▌         | 37/634 [00:22<06:06,  1.63it/s]





  6%|▌         | 38/634 [00:22<05:56,  1.67it/s]





  6%|▌         | 39/634 [00:23<05:39,  1.75it/s]





  6%|▋         | 40/634 [00:24<05:38,  1.75it/s]





  6%|▋         | 41/634 [00:24<06:16,  1.57it/s]





  7%|▋         | 42/634 [00:25<05:57,  1.66it/s]





  7%|▋         | 43/634 [00:25<05:45,  1.71it/s]





  7%|▋         | 44/634 [00:26<05:35,  1.76it/s]





  7%|▋         | 45/634 [00:27<05:56,  1.65it/s]





  7%|▋         | 46/634 [00:27<05:41,  1.72it/s]





  7%|▋         | 47/634 [00:28<05:36,  1.74it/s]





  8%|▊         | 48/634 [00:28<05:37,  1.73it/s]





  8%|▊         | 49/634 [00:29<05:38,  1.73it/s]





  8%|▊         | 50/634 [00:30<06:01,  1.61it/s]





  8%|▊         | 51/634 [00:30<05:45,  1.69it/s]





  8%|▊         | 52/634 [00:31<05:42,  1.70it/s]





  8%|▊         | 53/634 [00:31<05:41,  1.70it/s]





  9%|▊         | 54/634 [00:32<05:28,  1.77it/s]





  9%|▊         | 55/634 [00:32<05:33,  1.74it/s]





  9%|▉         | 56/634 [00:33<05:22,  1.79it/s]





  9%|▉         | 57/634 [00:33<05:20,  1.80it/s]





  9%|▉         | 58/634 [00:34<05:26,  1.77it/s]





  9%|▉         | 59/634 [00:35<05:27,  1.76it/s]





  9%|▉         | 60/634 [00:35<05:33,  1.72it/s]





 10%|▉         | 61/634 [00:36<05:35,  1.71it/s]





 10%|▉         | 62/634 [00:36<05:27,  1.75it/s]





 10%|▉         | 63/634 [00:37<05:21,  1.78it/s]





 10%|█         | 64/634 [00:38<05:40,  1.67it/s]





 10%|█         | 65/634 [00:38<05:39,  1.68it/s]





 10%|█         | 66/634 [00:39<05:53,  1.61it/s]





 11%|█         | 67/634 [00:39<05:45,  1.64it/s]





 11%|█         | 68/634 [00:40<05:35,  1.69it/s]





 11%|█         | 69/634 [00:41<05:34,  1.69it/s]





 11%|█         | 70/634 [00:41<05:32,  1.69it/s]





 11%|█         | 71/634 [00:42<05:26,  1.72it/s]





 11%|█▏        | 72/634 [00:42<05:49,  1.61it/s]





 12%|█▏        | 73/634 [00:43<05:50,  1.60it/s]





 12%|█▏        | 74/634 [00:44<05:37,  1.66it/s]





 12%|█▏        | 75/634 [00:44<05:37,  1.66it/s]





 12%|█▏        | 76/634 [00:45<05:34,  1.67it/s]





 12%|█▏        | 77/634 [00:46<05:49,  1.59it/s]





 12%|█▏        | 78/634 [00:46<05:25,  1.71it/s]





 12%|█▏        | 79/634 [00:47<05:26,  1.70it/s]





 13%|█▎        | 80/634 [00:47<05:23,  1.71it/s]





 13%|█▎        | 81/634 [00:48<05:21,  1.72it/s]





 13%|█▎        | 82/634 [00:48<05:17,  1.74it/s]





 13%|█▎        | 83/634 [00:49<05:16,  1.74it/s]





 13%|█▎        | 84/634 [00:49<05:15,  1.74it/s]





 13%|█▎        | 85/634 [00:50<05:37,  1.63it/s]





 14%|█▎        | 86/634 [00:51<05:33,  1.64it/s]





 14%|█▎        | 87/634 [00:51<05:31,  1.65it/s]





 14%|█▍        | 88/634 [00:52<05:23,  1.69it/s]





 14%|█▍        | 89/634 [00:53<05:20,  1.70it/s]





 14%|█▍        | 90/634 [00:53<05:17,  1.71it/s]





 14%|█▍        | 91/634 [00:54<05:06,  1.77it/s]





 15%|█▍        | 92/634 [00:54<05:28,  1.65it/s]





 15%|█▍        | 93/634 [00:55<05:41,  1.58it/s]





 15%|█▍        | 94/634 [00:56<05:36,  1.60it/s]





 15%|█▍        | 95/634 [00:56<05:27,  1.64it/s]





 15%|█▌        | 96/634 [00:57<05:20,  1.68it/s]





 15%|█▌        | 97/634 [00:57<05:35,  1.60it/s]





 15%|█▌        | 98/634 [00:58<05:48,  1.54it/s]





 16%|█▌        | 99/634 [00:59<05:35,  1.60it/s]





 16%|█▌        | 100/634 [00:59<05:29,  1.62it/s]





 16%|█▌        | 101/634 [01:00<05:35,  1.59it/s]





 16%|█▌        | 102/634 [01:01<05:29,  1.62it/s]





 16%|█▌        | 103/634 [01:01<05:22,  1.65it/s]





 16%|█▋        | 104/634 [01:02<05:12,  1.70it/s]





 17%|█▋        | 105/634 [01:02<05:07,  1.72it/s]





 17%|█▋        | 106/634 [01:03<05:04,  1.73it/s]





 17%|█▋        | 107/634 [01:03<04:59,  1.76it/s]





 17%|█▋        | 108/634 [01:04<05:00,  1.75it/s]





 17%|█▋        | 109/634 [01:05<04:59,  1.75it/s]





 17%|█▋        | 110/634 [01:05<04:58,  1.76it/s]





 18%|█▊        | 111/634 [01:06<05:11,  1.68it/s]





 18%|█▊        | 112/634 [01:07<05:42,  1.52it/s]





 18%|█▊        | 113/634 [01:07<05:25,  1.60it/s]





 18%|█▊        | 114/634 [01:08<05:18,  1.63it/s]





 18%|█▊        | 115/634 [01:08<05:22,  1.61it/s]





 18%|█▊        | 116/634 [01:09<05:19,  1.62it/s]





 18%|█▊        | 117/634 [01:10<05:15,  1.64it/s]





 19%|█▊        | 118/634 [01:10<05:13,  1.65it/s]





 19%|█▉        | 119/634 [01:11<05:29,  1.56it/s]





 19%|█▉        | 120/634 [01:11<05:22,  1.59it/s]





 19%|█▉        | 121/634 [01:12<05:50,  1.46it/s]





 19%|█▉        | 122/634 [01:13<05:20,  1.60it/s]





 19%|█▉        | 123/634 [01:13<05:07,  1.66it/s]





 20%|█▉        | 124/634 [01:14<05:00,  1.70it/s]





 20%|█▉        | 125/634 [01:14<04:44,  1.79it/s]





 20%|█▉        | 126/634 [01:15<04:47,  1.77it/s]





 20%|██        | 127/634 [01:16<05:06,  1.65it/s]





 20%|██        | 128/634 [01:16<05:34,  1.51it/s]





 20%|██        | 129/634 [01:17<05:40,  1.48it/s]





 21%|██        | 130/634 [01:18<05:43,  1.47it/s]





 21%|██        | 131/634 [01:18<05:23,  1.56it/s]





 21%|██        | 132/634 [01:19<05:07,  1.63it/s]





 21%|██        | 133/634 [01:20<05:17,  1.58it/s]





 21%|██        | 134/634 [01:20<05:16,  1.58it/s]





 21%|██▏       | 135/634 [01:21<05:06,  1.63it/s]





 21%|██▏       | 136/634 [01:21<04:57,  1.67it/s]





 22%|██▏       | 137/634 [01:22<05:00,  1.65it/s]





 22%|██▏       | 138/634 [01:23<05:01,  1.65it/s]





 22%|██▏       | 139/634 [01:23<04:53,  1.69it/s]





 22%|██▏       | 140/634 [01:24<05:02,  1.63it/s]





 22%|██▏       | 141/634 [01:24<04:54,  1.67it/s]





 22%|██▏       | 142/634 [01:25<04:49,  1.70it/s]





 23%|██▎       | 143/634 [01:26<04:47,  1.71it/s]





 23%|██▎       | 144/634 [01:26<04:44,  1.72it/s]





 23%|██▎       | 145/634 [01:27<04:42,  1.73it/s]





 23%|██▎       | 146/634 [01:27<04:45,  1.71it/s]





 23%|██▎       | 147/634 [01:28<04:58,  1.63it/s]





 23%|██▎       | 148/634 [01:29<04:52,  1.66it/s]





 24%|██▎       | 149/634 [01:29<04:37,  1.75it/s]





 24%|██▎       | 150/634 [01:30<04:37,  1.74it/s]





 24%|██▍       | 151/634 [01:30<04:35,  1.75it/s]





 24%|██▍       | 152/634 [01:31<04:39,  1.73it/s]





 24%|██▍       | 153/634 [01:31<04:28,  1.79it/s]





 24%|██▍       | 154/634 [01:32<04:39,  1.72it/s]





 24%|██▍       | 155/634 [01:32<04:26,  1.79it/s]





 25%|██▍       | 156/634 [01:33<04:30,  1.77it/s]





 25%|██▍       | 157/634 [01:34<04:45,  1.67it/s]





 25%|██▍       | 158/634 [01:34<04:42,  1.68it/s]





 25%|██▌       | 159/634 [01:35<04:36,  1.72it/s]





 25%|██▌       | 160/634 [01:35<04:29,  1.76it/s]





 25%|██▌       | 161/634 [01:36<04:27,  1.77it/s]





 26%|██▌       | 162/634 [01:36<04:31,  1.74it/s]





 26%|██▌       | 163/634 [01:37<04:35,  1.71it/s]





 26%|██▌       | 164/634 [01:38<04:29,  1.75it/s]





 26%|██▌       | 165/634 [01:38<04:25,  1.76it/s]





 26%|██▌       | 166/634 [01:39<04:23,  1.77it/s]





 26%|██▋       | 167/634 [01:39<04:21,  1.79it/s]





 26%|██▋       | 168/634 [01:40<04:33,  1.71it/s]





 27%|██▋       | 169/634 [01:41<04:48,  1.61it/s]





 27%|██▋       | 170/634 [01:41<04:39,  1.66it/s]





 27%|██▋       | 171/634 [01:42<04:34,  1.69it/s]





 27%|██▋       | 172/634 [01:42<04:44,  1.62it/s]





 27%|██▋       | 173/634 [01:43<04:34,  1.68it/s]





 27%|██▋       | 174/634 [01:44<04:44,  1.62it/s]





 28%|██▊       | 175/634 [01:44<04:47,  1.59it/s]





 28%|██▊       | 176/634 [01:45<04:42,  1.62it/s]





 28%|██▊       | 177/634 [01:46<04:42,  1.62it/s]





 28%|██▊       | 178/634 [01:46<04:37,  1.64it/s]





 28%|██▊       | 179/634 [01:47<04:36,  1.64it/s]





 28%|██▊       | 180/634 [01:47<04:44,  1.59it/s]





 29%|██▊       | 181/634 [01:48<04:55,  1.53it/s]





 29%|██▊       | 182/634 [01:49<04:41,  1.61it/s]





 29%|██▉       | 183/634 [01:49<04:43,  1.59it/s]





 29%|██▉       | 184/634 [01:50<04:47,  1.57it/s]





 29%|██▉       | 185/634 [01:51<04:35,  1.63it/s]





 29%|██▉       | 186/634 [01:51<04:18,  1.73it/s]





 29%|██▉       | 187/634 [01:52<04:33,  1.64it/s]





 30%|██▉       | 188/634 [01:52<04:27,  1.67it/s]





 30%|██▉       | 189/634 [01:53<04:22,  1.69it/s]





 30%|██▉       | 190/634 [01:53<04:15,  1.74it/s]





 30%|███       | 191/634 [01:54<04:16,  1.73it/s]





 30%|███       | 192/634 [01:54<04:03,  1.82it/s]





 30%|███       | 193/634 [01:55<03:54,  1.88it/s]





 31%|███       | 194/634 [01:56<04:01,  1.82it/s]





 31%|███       | 195/634 [01:56<03:55,  1.86it/s]





 31%|███       | 196/634 [01:57<03:55,  1.86it/s]





 31%|███       | 197/634 [01:57<03:47,  1.92it/s]





 31%|███       | 198/634 [01:58<03:56,  1.84it/s]





 31%|███▏      | 199/634 [01:58<04:15,  1.71it/s]





 32%|███▏      | 200/634 [01:59<04:09,  1.74it/s]





 32%|███▏      | 201/634 [01:59<04:09,  1.73it/s]





 32%|███▏      | 202/634 [02:00<04:08,  1.74it/s]





 32%|███▏      | 203/634 [02:01<04:00,  1.79it/s]





 32%|███▏      | 204/634 [02:01<04:00,  1.79it/s]





 32%|███▏      | 205/634 [02:02<04:00,  1.78it/s]





 32%|███▏      | 206/634 [02:02<03:58,  1.79it/s]





 33%|███▎      | 207/634 [02:03<03:57,  1.80it/s]





 33%|███▎      | 208/634 [02:03<03:58,  1.79it/s]





 33%|███▎      | 209/634 [02:04<04:01,  1.76it/s]





 33%|███▎      | 210/634 [02:05<03:59,  1.77it/s]





 33%|███▎      | 211/634 [02:05<03:58,  1.77it/s]





 33%|███▎      | 212/634 [02:06<04:13,  1.66it/s]





 34%|███▎      | 213/634 [02:06<04:08,  1.69it/s]





 34%|███▍      | 214/634 [02:07<04:06,  1.71it/s]





 34%|███▍      | 215/634 [02:08<04:13,  1.66it/s]





 34%|███▍      | 216/634 [02:08<04:02,  1.72it/s]





 34%|███▍      | 217/634 [02:09<04:00,  1.74it/s]





 34%|███▍      | 218/634 [02:09<03:55,  1.76it/s]





 35%|███▍      | 219/634 [02:10<03:48,  1.82it/s]





 35%|███▍      | 220/634 [02:10<03:47,  1.82it/s]





 35%|███▍      | 221/634 [02:11<03:43,  1.85it/s]





 35%|███▌      | 222/634 [02:11<03:44,  1.84it/s]





 35%|███▌      | 223/634 [02:12<03:47,  1.81it/s]





 35%|███▌      | 224/634 [02:12<03:43,  1.84it/s]





 35%|███▌      | 225/634 [02:13<03:49,  1.78it/s]





 36%|███▌      | 226/634 [02:14<03:54,  1.74it/s]





 36%|███▌      | 227/634 [02:14<03:53,  1.75it/s]





 36%|███▌      | 228/634 [02:15<04:07,  1.64it/s]





 36%|███▌      | 229/634 [02:15<04:04,  1.66it/s]





 36%|███▋      | 230/634 [02:16<04:15,  1.58it/s]





 36%|███▋      | 231/634 [02:17<04:02,  1.66it/s]





 37%|███▋      | 232/634 [02:17<04:12,  1.59it/s]





 37%|███▋      | 233/634 [02:18<04:02,  1.66it/s]





 37%|███▋      | 234/634 [02:18<03:55,  1.70it/s]





 37%|███▋      | 235/634 [02:19<03:43,  1.78it/s]





 37%|███▋      | 236/634 [02:19<03:31,  1.89it/s]





 37%|███▋      | 237/634 [02:20<03:26,  1.93it/s]





 38%|███▊      | 238/634 [02:21<03:44,  1.77it/s]





 38%|███▊      | 239/634 [02:21<03:58,  1.66it/s]





 38%|███▊      | 240/634 [02:22<03:46,  1.74it/s]





 38%|███▊      | 241/634 [02:22<03:43,  1.76it/s]





 38%|███▊      | 242/634 [02:23<03:43,  1.76it/s]





 38%|███▊      | 243/634 [02:23<03:40,  1.78it/s]





 38%|███▊      | 244/634 [02:24<03:43,  1.75it/s]





 39%|███▊      | 245/634 [02:25<03:38,  1.78it/s]





 39%|███▉      | 246/634 [02:25<03:35,  1.80it/s]





 39%|███▉      | 247/634 [02:26<03:35,  1.79it/s]





 39%|███▉      | 248/634 [02:26<03:34,  1.80it/s]





 39%|███▉      | 249/634 [02:27<03:29,  1.84it/s]





 39%|███▉      | 250/634 [02:27<03:31,  1.82it/s]





 40%|███▉      | 251/634 [02:28<03:25,  1.86it/s]





 40%|███▉      | 252/634 [02:29<03:41,  1.72it/s]





 40%|███▉      | 253/634 [02:29<03:36,  1.76it/s]





 40%|████      | 254/634 [02:30<03:35,  1.76it/s]





 40%|████      | 255/634 [02:30<03:35,  1.76it/s]





 40%|████      | 256/634 [02:31<03:34,  1.76it/s]





 41%|████      | 257/634 [02:31<03:32,  1.77it/s]





 41%|████      | 258/634 [02:32<03:32,  1.77it/s]





 41%|████      | 259/634 [02:32<03:29,  1.79it/s]





 41%|████      | 260/634 [02:33<03:29,  1.79it/s]





 41%|████      | 261/634 [02:34<03:28,  1.78it/s]





 41%|████▏     | 262/634 [02:34<03:20,  1.86it/s]





 41%|████▏     | 263/634 [02:35<03:34,  1.73it/s]





 42%|████▏     | 264/634 [02:35<03:32,  1.74it/s]





 42%|████▏     | 265/634 [02:36<03:32,  1.73it/s]





 42%|████▏     | 266/634 [02:37<03:36,  1.70it/s]





 42%|████▏     | 267/634 [02:37<03:32,  1.73it/s]





 42%|████▏     | 268/634 [02:38<03:29,  1.75it/s]





 42%|████▏     | 269/634 [02:38<03:41,  1.65it/s]





 43%|████▎     | 270/634 [02:39<03:35,  1.69it/s]





 43%|████▎     | 271/634 [02:39<03:31,  1.72it/s]





 43%|████▎     | 272/634 [02:40<03:28,  1.74it/s]





 43%|████▎     | 273/634 [02:41<03:26,  1.75it/s]





 43%|████▎     | 274/634 [02:41<03:27,  1.73it/s]





 43%|████▎     | 275/634 [02:42<03:24,  1.75it/s]





 44%|████▎     | 276/634 [02:42<03:23,  1.76it/s]





 44%|████▎     | 277/634 [02:43<03:22,  1.76it/s]





 44%|████▍     | 278/634 [02:43<03:20,  1.77it/s]





 44%|████▍     | 279/634 [02:44<03:14,  1.83it/s]





 44%|████▍     | 280/634 [02:45<03:29,  1.69it/s]





 44%|████▍     | 281/634 [02:45<03:26,  1.71it/s]





 44%|████▍     | 282/634 [02:46<03:38,  1.61it/s]





 45%|████▍     | 283/634 [02:46<03:30,  1.66it/s]





 45%|████▍     | 284/634 [02:47<03:25,  1.70it/s]





 45%|████▍     | 285/634 [02:48<03:22,  1.73it/s]





 45%|████▌     | 286/634 [02:48<03:32,  1.64it/s]





 45%|████▌     | 287/634 [02:49<03:20,  1.73it/s]





 45%|████▌     | 288/634 [02:49<03:22,  1.71it/s]





 46%|████▌     | 289/634 [02:50<03:21,  1.71it/s]





 46%|████▌     | 290/634 [02:50<03:18,  1.73it/s]





 46%|████▌     | 291/634 [02:51<03:16,  1.75it/s]





 46%|████▌     | 292/634 [02:52<03:14,  1.76it/s]





 46%|████▌     | 293/634 [02:52<03:07,  1.82it/s]





 46%|████▋     | 294/634 [02:53<03:07,  1.82it/s]





 47%|████▋     | 295/634 [02:53<03:06,  1.82it/s]





 47%|████▋     | 296/634 [02:54<03:07,  1.80it/s]





 47%|████▋     | 297/634 [02:54<03:07,  1.80it/s]





 47%|████▋     | 298/634 [02:55<03:19,  1.69it/s]





 47%|████▋     | 299/634 [02:56<03:26,  1.62it/s]





 47%|████▋     | 300/634 [02:56<03:31,  1.58it/s]





 47%|████▋     | 301/634 [02:57<03:23,  1.64it/s]





 48%|████▊     | 302/634 [02:58<03:30,  1.58it/s]





 48%|████▊     | 303/634 [02:58<03:22,  1.64it/s]





 48%|████▊     | 304/634 [02:59<03:16,  1.68it/s]





 48%|████▊     | 305/634 [02:59<03:11,  1.72it/s]





 48%|████▊     | 306/634 [03:00<03:08,  1.74it/s]





 48%|████▊     | 307/634 [03:00<03:09,  1.73it/s]





 49%|████▊     | 308/634 [03:01<03:06,  1.75it/s]





 49%|████▊     | 309/634 [03:01<03:03,  1.77it/s]





 49%|████▉     | 310/634 [03:02<03:02,  1.78it/s]





 49%|████▉     | 311/634 [03:03<03:16,  1.64it/s]





 49%|████▉     | 312/634 [03:03<03:11,  1.68it/s]





 49%|████▉     | 313/634 [03:04<03:06,  1.72it/s]





 50%|████▉     | 314/634 [03:04<03:03,  1.75it/s]





 50%|████▉     | 315/634 [03:05<02:56,  1.81it/s]





 50%|████▉     | 316/634 [03:05<02:54,  1.82it/s]





 50%|█████     | 317/634 [03:06<02:57,  1.78it/s]





 50%|█████     | 318/634 [03:07<02:56,  1.79it/s]





 50%|█████     | 319/634 [03:07<02:55,  1.80it/s]





 50%|█████     | 320/634 [03:08<03:02,  1.72it/s]





 51%|█████     | 321/634 [03:08<03:03,  1.71it/s]





 51%|█████     | 322/634 [03:09<03:04,  1.69it/s]





 51%|█████     | 323/634 [03:10<03:11,  1.62it/s]





 51%|█████     | 324/634 [03:10<03:15,  1.58it/s]





 51%|█████▏    | 325/634 [03:11<03:07,  1.64it/s]





 51%|█████▏    | 326/634 [03:11<03:02,  1.69it/s]





 52%|█████▏    | 327/634 [03:12<02:57,  1.73it/s]





 52%|█████▏    | 328/634 [03:13<02:55,  1.75it/s]





 52%|█████▏    | 329/634 [03:13<02:55,  1.73it/s]





 52%|█████▏    | 330/634 [03:14<02:53,  1.75it/s]





 52%|█████▏    | 331/634 [03:14<02:51,  1.76it/s]





 52%|█████▏    | 332/634 [03:15<02:49,  1.78it/s]





 53%|█████▎    | 333/634 [03:15<02:56,  1.70it/s]





 53%|█████▎    | 334/634 [03:16<02:52,  1.74it/s]





 53%|█████▎    | 335/634 [03:17<03:01,  1.65it/s]





 53%|█████▎    | 336/634 [03:17<02:56,  1.69it/s]





 53%|█████▎    | 337/634 [03:18<02:52,  1.73it/s]





 53%|█████▎    | 338/634 [03:18<02:51,  1.73it/s]





 53%|█████▎    | 339/634 [03:19<02:52,  1.71it/s]





 54%|█████▎    | 340/634 [03:20<02:54,  1.69it/s]





 54%|█████▍    | 341/634 [03:20<02:49,  1.72it/s]





 54%|█████▍    | 342/634 [03:21<02:46,  1.75it/s]





 54%|█████▍    | 343/634 [03:21<02:44,  1.77it/s]





 54%|█████▍    | 344/634 [03:22<02:40,  1.81it/s]





 54%|█████▍    | 345/634 [03:22<02:44,  1.76it/s]





 55%|█████▍    | 346/634 [03:23<02:37,  1.82it/s]





 55%|█████▍    | 347/634 [03:23<02:36,  1.83it/s]





 55%|█████▍    | 348/634 [03:24<02:44,  1.74it/s]





 55%|█████▌    | 349/634 [03:25<02:42,  1.76it/s]





 55%|█████▌    | 350/634 [03:25<02:46,  1.71it/s]





 55%|█████▌    | 351/634 [03:26<02:42,  1.74it/s]





 56%|█████▌    | 352/634 [03:26<02:39,  1.76it/s]





 56%|█████▌    | 353/634 [03:27<02:37,  1.78it/s]





 56%|█████▌    | 354/634 [03:27<02:36,  1.79it/s]





 56%|█████▌    | 355/634 [03:28<02:45,  1.69it/s]





 56%|█████▌    | 356/634 [03:29<02:41,  1.72it/s]





 56%|█████▋    | 357/634 [03:29<02:48,  1.64it/s]





 56%|█████▋    | 358/634 [03:30<02:39,  1.73it/s]





 57%|█████▋    | 359/634 [03:30<02:32,  1.81it/s]





 57%|█████▋    | 360/634 [03:31<02:31,  1.81it/s]





 57%|█████▋    | 361/634 [03:32<02:40,  1.70it/s]





 57%|█████▋    | 362/634 [03:32<02:47,  1.63it/s]





 57%|█████▋    | 363/634 [03:33<02:50,  1.59it/s]





 57%|█████▋    | 364/634 [03:33<02:43,  1.65it/s]





 58%|█████▊    | 365/634 [03:34<02:48,  1.59it/s]





 58%|█████▊    | 366/634 [03:35<02:38,  1.70it/s]





 58%|█████▊    | 367/634 [03:35<02:34,  1.73it/s]





 58%|█████▊    | 368/634 [03:36<02:31,  1.75it/s]





 58%|█████▊    | 369/634 [03:36<02:31,  1.74it/s]





 58%|█████▊    | 370/634 [03:37<02:29,  1.77it/s]





 59%|█████▊    | 371/634 [03:37<02:27,  1.78it/s]





 59%|█████▊    | 372/634 [03:38<02:28,  1.76it/s]





 59%|█████▉    | 373/634 [03:39<02:33,  1.70it/s]





 59%|█████▉    | 374/634 [03:39<02:31,  1.71it/s]





 59%|█████▉    | 375/634 [03:40<02:28,  1.74it/s]





 59%|█████▉    | 376/634 [03:40<02:20,  1.84it/s]





 59%|█████▉    | 377/634 [03:41<02:20,  1.83it/s]





 60%|█████▉    | 378/634 [03:41<02:20,  1.83it/s]





 60%|█████▉    | 379/634 [03:42<02:29,  1.71it/s]





 60%|█████▉    | 380/634 [03:43<02:36,  1.62it/s]





 60%|██████    | 381/634 [03:43<02:28,  1.71it/s]





 60%|██████    | 382/634 [03:44<02:34,  1.63it/s]





 60%|██████    | 383/634 [03:44<02:27,  1.70it/s]





 61%|██████    | 384/634 [03:45<02:24,  1.74it/s]





 61%|██████    | 385/634 [03:46<02:25,  1.72it/s]





 61%|██████    | 386/634 [03:46<02:22,  1.74it/s]





 61%|██████    | 387/634 [03:47<02:20,  1.76it/s]





 61%|██████    | 388/634 [03:47<02:18,  1.77it/s]





 61%|██████▏   | 389/634 [03:48<02:17,  1.78it/s]





 62%|██████▏   | 390/634 [03:48<02:16,  1.78it/s]





 62%|██████▏   | 391/634 [03:49<02:16,  1.78it/s]





 62%|██████▏   | 392/634 [03:50<02:23,  1.68it/s]





 62%|██████▏   | 393/634 [03:50<02:29,  1.61it/s]





 62%|██████▏   | 394/634 [03:51<02:23,  1.67it/s]





 62%|██████▏   | 395/634 [03:51<02:16,  1.76it/s]





 62%|██████▏   | 396/634 [03:52<02:23,  1.66it/s]





 63%|██████▎   | 397/634 [03:53<02:19,  1.70it/s]





 63%|██████▎   | 398/634 [03:53<02:16,  1.72it/s]





 63%|██████▎   | 399/634 [03:54<02:12,  1.77it/s]





 63%|██████▎   | 400/634 [03:54<02:14,  1.74it/s]





 63%|██████▎   | 401/634 [03:55<02:14,  1.73it/s]





 63%|██████▎   | 402/634 [03:55<02:12,  1.75it/s]





 64%|██████▎   | 403/634 [03:56<02:10,  1.77it/s]





 64%|██████▎   | 404/634 [03:56<02:09,  1.77it/s]





 64%|██████▍   | 405/634 [03:57<02:04,  1.85it/s]





 64%|██████▍   | 406/634 [03:58<02:04,  1.83it/s]





 64%|██████▍   | 407/634 [03:58<02:04,  1.82it/s]





 64%|██████▍   | 408/634 [03:59<02:04,  1.82it/s]





 65%|██████▍   | 409/634 [03:59<02:04,  1.81it/s]





 65%|██████▍   | 410/634 [04:00<02:10,  1.72it/s]





 65%|██████▍   | 411/634 [04:00<02:07,  1.75it/s]





 65%|██████▍   | 412/634 [04:01<02:05,  1.77it/s]





 65%|██████▌   | 413/634 [04:01<02:03,  1.79it/s]





 65%|██████▌   | 414/634 [04:02<02:02,  1.80it/s]





 65%|██████▌   | 415/634 [04:03<02:01,  1.80it/s]





 66%|██████▌   | 416/634 [04:03<02:02,  1.79it/s]





 66%|██████▌   | 417/634 [04:04<02:01,  1.79it/s]





 66%|██████▌   | 418/634 [04:04<02:00,  1.79it/s]





 66%|██████▌   | 419/634 [04:05<01:59,  1.79it/s]





 66%|██████▌   | 420/634 [04:05<01:59,  1.79it/s]





 66%|██████▋   | 421/634 [04:06<01:58,  1.80it/s]





 67%|██████▋   | 422/634 [04:07<02:05,  1.68it/s]





 67%|██████▋   | 423/634 [04:07<02:10,  1.61it/s]





 67%|██████▋   | 424/634 [04:08<02:01,  1.73it/s]





 67%|██████▋   | 425/634 [04:08<01:58,  1.76it/s]





 67%|██████▋   | 426/634 [04:09<01:57,  1.77it/s]





 67%|██████▋   | 427/634 [04:09<01:50,  1.87it/s]





 68%|██████▊   | 428/634 [04:10<01:51,  1.85it/s]





 68%|██████▊   | 429/634 [04:10<01:51,  1.84it/s]





 68%|██████▊   | 430/634 [04:11<01:50,  1.84it/s]





 68%|██████▊   | 431/634 [04:12<01:56,  1.74it/s]





 68%|██████▊   | 432/634 [04:12<01:54,  1.76it/s]





 68%|██████▊   | 433/634 [04:13<01:53,  1.77it/s]





 68%|██████▊   | 434/634 [04:13<01:51,  1.79it/s]





 69%|██████▊   | 435/634 [04:14<01:53,  1.75it/s]





 69%|██████▉   | 436/634 [04:14<01:50,  1.79it/s]





 69%|██████▉   | 437/634 [04:15<01:49,  1.80it/s]





 69%|██████▉   | 438/634 [04:16<01:48,  1.80it/s]





 69%|██████▉   | 439/634 [04:16<01:47,  1.81it/s]





 69%|██████▉   | 440/634 [04:17<01:42,  1.89it/s]





 70%|██████▉   | 441/634 [04:17<01:43,  1.86it/s]





 70%|██████▉   | 442/634 [04:18<01:44,  1.84it/s]





 70%|██████▉   | 443/634 [04:18<01:47,  1.78it/s]





 70%|███████   | 444/634 [04:19<01:46,  1.79it/s]





 70%|███████   | 445/634 [04:19<01:45,  1.80it/s]





 70%|███████   | 446/634 [04:20<01:44,  1.80it/s]





 71%|███████   | 447/634 [04:20<01:43,  1.80it/s]





 71%|███████   | 448/634 [04:21<01:43,  1.80it/s]





 71%|███████   | 449/634 [04:22<01:49,  1.69it/s]





 71%|███████   | 450/634 [04:22<01:42,  1.79it/s]





 71%|███████   | 451/634 [04:23<01:39,  1.84it/s]





 71%|███████▏  | 452/634 [04:23<01:39,  1.82it/s]





 71%|███████▏  | 453/634 [04:24<01:36,  1.87it/s]





 72%|███████▏  | 454/634 [04:24<01:37,  1.85it/s]





 72%|███████▏  | 455/634 [04:25<01:37,  1.83it/s]





 72%|███████▏  | 456/634 [04:26<01:44,  1.71it/s]





 72%|███████▏  | 457/634 [04:26<01:41,  1.74it/s]





 72%|███████▏  | 458/634 [04:27<01:37,  1.81it/s]





 72%|███████▏  | 459/634 [04:27<01:43,  1.70it/s]





 73%|███████▎  | 460/634 [04:28<01:45,  1.65it/s]





 73%|███████▎  | 461/634 [04:28<01:41,  1.70it/s]





 73%|███████▎  | 462/634 [04:29<01:37,  1.76it/s]





 73%|███████▎  | 463/634 [04:30<01:36,  1.78it/s]





 73%|███████▎  | 464/634 [04:30<01:35,  1.78it/s]





 73%|███████▎  | 465/634 [04:31<01:36,  1.74it/s]





 74%|███████▎  | 466/634 [04:31<01:35,  1.76it/s]





 74%|███████▎  | 467/634 [04:32<01:34,  1.77it/s]





 74%|███████▍  | 468/634 [04:32<01:31,  1.81it/s]





 74%|███████▍  | 469/634 [04:33<01:31,  1.81it/s]





 74%|███████▍  | 470/634 [04:33<01:28,  1.86it/s]





 74%|███████▍  | 471/634 [04:34<01:34,  1.73it/s]





 74%|███████▍  | 472/634 [04:35<01:32,  1.75it/s]





 75%|███████▍  | 473/634 [04:35<01:29,  1.79it/s]





 75%|███████▍  | 474/634 [04:36<01:25,  1.87it/s]





 75%|███████▍  | 475/634 [04:36<01:25,  1.86it/s]





 75%|███████▌  | 476/634 [04:37<01:31,  1.72it/s]





 75%|███████▌  | 477/634 [04:37<01:30,  1.74it/s]





 75%|███████▌  | 478/634 [04:38<01:26,  1.81it/s]





 76%|███████▌  | 479/634 [04:39<01:29,  1.73it/s]





 76%|███████▌  | 480/634 [04:39<01:33,  1.65it/s]





 76%|███████▌  | 481/634 [04:40<01:28,  1.74it/s]





 76%|███████▌  | 482/634 [04:40<01:23,  1.81it/s]





 76%|███████▌  | 483/634 [04:41<01:23,  1.81it/s]





 76%|███████▋  | 484/634 [04:41<01:22,  1.81it/s]





 76%|███████▋  | 485/634 [04:42<01:24,  1.75it/s]





 77%|███████▋  | 486/634 [04:43<01:28,  1.67it/s]





 77%|███████▋  | 487/634 [04:43<01:25,  1.71it/s]





 77%|███████▋  | 488/634 [04:44<01:23,  1.74it/s]





 77%|███████▋  | 489/634 [04:44<01:23,  1.74it/s]





 77%|███████▋  | 490/634 [04:45<01:19,  1.82it/s]





 77%|███████▋  | 491/634 [04:45<01:18,  1.82it/s]





 78%|███████▊  | 492/634 [04:46<01:17,  1.83it/s]





 78%|███████▊  | 493/634 [04:47<01:31,  1.54it/s]





 78%|███████▊  | 494/634 [04:47<01:26,  1.62it/s]





 78%|███████▊  | 495/634 [04:48<01:24,  1.65it/s]





 78%|███████▊  | 496/634 [04:48<01:20,  1.71it/s]





 78%|███████▊  | 497/634 [04:49<01:18,  1.74it/s]





 79%|███████▊  | 498/634 [04:50<01:16,  1.77it/s]





 79%|███████▊  | 499/634 [04:50<01:15,  1.79it/s]





 79%|███████▉  | 500/634 [04:51<01:12,  1.85it/s]





 79%|███████▉  | 501/634 [04:51<01:12,  1.84it/s]





 79%|███████▉  | 502/634 [04:52<01:11,  1.84it/s]





 79%|███████▉  | 503/634 [04:52<01:11,  1.83it/s]





 79%|███████▉  | 504/634 [04:53<01:15,  1.72it/s]





 80%|███████▉  | 505/634 [04:53<01:14,  1.72it/s]





 80%|███████▉  | 506/634 [04:54<01:12,  1.76it/s]





 80%|███████▉  | 507/634 [04:55<01:16,  1.67it/s]





 80%|████████  | 508/634 [04:55<01:10,  1.78it/s]





 80%|████████  | 509/634 [04:56<01:10,  1.79it/s]





 80%|████████  | 510/634 [04:56<01:09,  1.80it/s]





 81%|████████  | 511/634 [04:57<01:12,  1.70it/s]





 81%|████████  | 512/634 [04:57<01:10,  1.73it/s]





 81%|████████  | 513/634 [04:58<01:13,  1.65it/s]





 81%|████████  | 514/634 [04:59<01:10,  1.70it/s]





 81%|████████  | 515/634 [04:59<01:08,  1.73it/s]





 81%|████████▏ | 516/634 [05:00<01:07,  1.76it/s]





 82%|████████▏ | 517/634 [05:00<01:09,  1.67it/s]





 82%|████████▏ | 518/634 [05:01<01:11,  1.61it/s]





 82%|████████▏ | 519/634 [05:02<01:09,  1.67it/s]





 82%|████████▏ | 520/634 [05:02<01:08,  1.67it/s]





 82%|████████▏ | 521/634 [05:03<01:06,  1.71it/s]





 82%|████████▏ | 522/634 [05:03<01:08,  1.64it/s]





 82%|████████▏ | 523/634 [05:04<01:04,  1.71it/s]





 83%|████████▎ | 524/634 [05:04<01:00,  1.81it/s]





 83%|████████▎ | 525/634 [05:05<01:00,  1.81it/s]





 83%|████████▎ | 526/634 [05:06<00:58,  1.86it/s]





 83%|████████▎ | 527/634 [05:06<00:55,  1.92it/s]





 83%|████████▎ | 528/634 [05:07<00:57,  1.86it/s]





 83%|████████▎ | 529/634 [05:07<00:59,  1.76it/s]





 84%|████████▎ | 530/634 [05:08<00:58,  1.76it/s]





 84%|████████▍ | 531/634 [05:08<00:56,  1.83it/s]





 84%|████████▍ | 532/634 [05:09<00:58,  1.74it/s]





 84%|████████▍ | 533/634 [05:09<00:55,  1.81it/s]





 84%|████████▍ | 534/634 [05:10<00:56,  1.77it/s]





 84%|████████▍ | 535/634 [05:11<00:55,  1.78it/s]





 85%|████████▍ | 536/634 [05:11<00:56,  1.74it/s]





 85%|████████▍ | 537/634 [05:12<00:56,  1.71it/s]





 85%|████████▍ | 538/634 [05:12<00:58,  1.64it/s]





 85%|████████▌ | 539/634 [05:13<00:56,  1.68it/s]





 85%|████████▌ | 540/634 [05:14<00:53,  1.77it/s]





 85%|████████▌ | 541/634 [05:14<00:52,  1.78it/s]





 85%|████████▌ | 542/634 [05:15<00:51,  1.80it/s]





 86%|████████▌ | 543/634 [05:15<00:50,  1.80it/s]





 86%|████████▌ | 544/634 [05:16<00:53,  1.69it/s]





 86%|████████▌ | 545/634 [05:16<00:51,  1.73it/s]





 86%|████████▌ | 546/634 [05:17<00:50,  1.76it/s]





 86%|████████▋ | 547/634 [05:18<00:49,  1.75it/s]





 86%|████████▋ | 548/634 [05:18<00:49,  1.74it/s]





 87%|████████▋ | 549/634 [05:19<00:47,  1.78it/s]





 87%|████████▋ | 550/634 [05:19<00:47,  1.75it/s]





 87%|████████▋ | 551/634 [05:20<00:46,  1.79it/s]





 87%|████████▋ | 552/634 [05:20<00:45,  1.79it/s]





 87%|████████▋ | 553/634 [05:21<00:48,  1.67it/s]





 87%|████████▋ | 554/634 [05:22<00:47,  1.70it/s]





 88%|████████▊ | 555/634 [05:22<00:45,  1.72it/s]





 88%|████████▊ | 556/634 [05:23<00:44,  1.75it/s]





 88%|████████▊ | 557/634 [05:23<00:44,  1.73it/s]





 88%|████████▊ | 558/634 [05:24<00:43,  1.74it/s]





 88%|████████▊ | 559/634 [05:25<00:45,  1.65it/s]





 88%|████████▊ | 560/634 [05:25<00:46,  1.60it/s]





 88%|████████▊ | 561/634 [05:26<00:43,  1.67it/s]





 89%|████████▊ | 562/634 [05:26<00:42,  1.69it/s]





 89%|████████▉ | 563/634 [05:27<00:40,  1.76it/s]





 89%|████████▉ | 564/634 [05:27<00:39,  1.78it/s]





 89%|████████▉ | 565/634 [05:28<00:38,  1.79it/s]





 89%|████████▉ | 566/634 [05:28<00:36,  1.86it/s]





 89%|████████▉ | 567/634 [05:29<00:35,  1.89it/s]





 90%|████████▉ | 568/634 [05:29<00:35,  1.87it/s]





 90%|████████▉ | 569/634 [05:30<00:35,  1.85it/s]





 90%|████████▉ | 570/634 [05:31<00:34,  1.83it/s]





 90%|█████████ | 571/634 [05:31<00:34,  1.82it/s]





 90%|█████████ | 572/634 [05:32<00:34,  1.79it/s]





 90%|█████████ | 573/634 [05:32<00:33,  1.80it/s]





 91%|█████████ | 574/634 [05:33<00:33,  1.80it/s]





 91%|█████████ | 575/634 [05:33<00:31,  1.86it/s]





 91%|█████████ | 576/634 [05:34<00:31,  1.84it/s]





 91%|█████████ | 577/634 [05:34<00:30,  1.84it/s]





 91%|█████████ | 578/634 [05:35<00:30,  1.84it/s]





 91%|█████████▏| 579/634 [05:35<00:29,  1.84it/s]





 91%|█████████▏| 580/634 [05:36<00:29,  1.83it/s]





 92%|█████████▏| 581/634 [05:37<00:28,  1.89it/s]





 92%|█████████▏| 582/634 [05:37<00:27,  1.87it/s]





 92%|█████████▏| 583/634 [05:38<00:29,  1.72it/s]





 92%|█████████▏| 584/634 [05:38<00:28,  1.75it/s]





 92%|█████████▏| 585/634 [05:39<00:27,  1.80it/s]





 92%|█████████▏| 586/634 [05:39<00:26,  1.80it/s]





 93%|█████████▎| 587/634 [05:40<00:26,  1.81it/s]





 93%|█████████▎| 588/634 [05:40<00:25,  1.81it/s]





 93%|█████████▎| 589/634 [05:41<00:27,  1.61it/s]





 93%|█████████▎| 590/634 [05:42<00:26,  1.67it/s]





 93%|█████████▎| 591/634 [05:42<00:24,  1.76it/s]





 93%|█████████▎| 592/634 [05:43<00:23,  1.77it/s]





 94%|█████████▎| 593/634 [05:43<00:22,  1.80it/s]





 94%|█████████▎| 594/634 [05:44<00:22,  1.80it/s]





 94%|█████████▍| 595/634 [05:45<00:22,  1.76it/s]





 94%|█████████▍| 596/634 [05:45<00:22,  1.71it/s]





 94%|█████████▍| 597/634 [05:46<00:21,  1.74it/s]





 94%|█████████▍| 598/634 [05:46<00:21,  1.71it/s]





 94%|█████████▍| 599/634 [05:47<00:20,  1.74it/s]





 95%|█████████▍| 600/634 [05:47<00:19,  1.76it/s]





 95%|█████████▍| 601/634 [05:48<00:18,  1.74it/s]





 95%|█████████▍| 602/634 [05:49<00:18,  1.76it/s]





 95%|█████████▌| 603/634 [05:49<00:17,  1.78it/s]





 95%|█████████▌| 604/634 [05:50<00:16,  1.84it/s]





 95%|█████████▌| 605/634 [05:50<00:15,  1.83it/s]





 96%|█████████▌| 606/634 [05:51<00:15,  1.82it/s]





 96%|█████████▌| 607/634 [05:51<00:15,  1.70it/s]





 96%|█████████▌| 608/634 [05:52<00:15,  1.73it/s]





 96%|█████████▌| 609/634 [05:53<00:14,  1.76it/s]





 96%|█████████▌| 610/634 [05:53<00:13,  1.82it/s]





 96%|█████████▋| 611/634 [05:54<00:12,  1.79it/s]





 97%|█████████▋| 612/634 [05:54<00:12,  1.81it/s]





 97%|█████████▋| 613/634 [05:55<00:12,  1.70it/s]





 97%|█████████▋| 614/634 [05:55<00:12,  1.65it/s]





 97%|█████████▋| 615/634 [05:56<00:11,  1.70it/s]





 97%|█████████▋| 616/634 [05:57<00:10,  1.74it/s]





 97%|█████████▋| 617/634 [05:57<00:10,  1.67it/s]





 97%|█████████▋| 618/634 [05:58<00:09,  1.71it/s]





 98%|█████████▊| 619/634 [05:58<00:08,  1.75it/s]





 98%|█████████▊| 620/634 [05:59<00:08,  1.66it/s]





 98%|█████████▊| 621/634 [06:00<00:07,  1.69it/s]





 98%|█████████▊| 622/634 [06:00<00:06,  1.72it/s]





 98%|█████████▊| 623/634 [06:01<00:06,  1.65it/s]





 98%|█████████▊| 624/634 [06:01<00:06,  1.61it/s]





 99%|█████████▊| 625/634 [06:02<00:05,  1.58it/s]





 99%|█████████▊| 626/634 [06:03<00:04,  1.66it/s]





 99%|█████████▉| 627/634 [06:03<00:04,  1.62it/s]





 99%|█████████▉| 628/634 [06:04<00:03,  1.67it/s]





 99%|█████████▉| 629/634 [06:04<00:02,  1.71it/s]





 99%|█████████▉| 630/634 [06:05<00:02,  1.66it/s]





100%|█████████▉| 631/634 [06:06<00:01,  1.70it/s]





100%|█████████▉| 632/634 [06:06<00:01,  1.74it/s]





100%|█████████▉| 633/634 [06:07<00:00,  1.84it/s]





100%|██████████| 634/634 [06:07<00:00,  1.72it/s]



success_rate:  0.27287066246056785





In [21]:
def chunk(list_of_elements, batch_size): # using this chunk function, we can split our data to multiple batches
  for i in range(0, len(list_of_elements), batch_size):
    yield list_of_elements[i:i+batch_size]
    
def convert_to_sublists(numbers, sublist_size):
    return [numbers[i:i+sublist_size] for i in range(0, len(numbers), sublist_size)]

def recommender_rank(prompts, candidate_apps, model, tokenizer, batch_size=8):
  model.eval()
  encoder_max_length = 1024
  decoder_max_length = 32
  prompts_tokenized = tokenizer(prompts, max_length=encoder_max_length, truncation=True, padding="max_length", return_tensors="pt")
  
  input_ids_decoder = []
  attention_mask_decoder = []
  input_ids_encoder = []
  attention_mask_encoder  = []
  for index, candidate_app_elements in enumerate(candidate_apps):
    candidate_app_elements = [tokenizer.pad_token+element for element in candidate_app_elements] # adding pad token to the beginning of each candidate app
    candidate_apps_tokenized = tokenizer(candidate_app_elements, max_length=decoder_max_length, truncation=True, padding="max_length", return_tensors="pt")
    for candidate_app_index in range(len(candidate_app_elements)):
      input_ids_decoder.append(candidate_apps_tokenized["input_ids"][candidate_app_index])
      attention_mask_decoder.append(candidate_apps_tokenized["attention_mask"][candidate_app_index])
      input_ids_encoder.append(prompts_tokenized["input_ids"][index])
      attention_mask_encoder.append(prompts_tokenized["attention_mask"][index])
  
  input_ids_encoder_batches = list(chunk(input_ids_encoder, batch_size))
  attention_mask_encoder_batches = list(chunk(attention_mask_encoder, batch_size))
  input_ids_decoder_batches = list(chunk(input_ids_decoder, batch_size))
  attention_mask_decoder_batches = list(chunk(attention_mask_decoder, batch_size))
  

  scores = []
  for input_ids_encoder_batch, attention_mask_encoder_batch, input_ids_decoder_batch, attention_mask_decoder_batch in tqdm(zip(input_ids_encoder_batches, attention_mask_encoder_batches, input_ids_decoder_batches, attention_mask_decoder_batches), total = len(input_ids_encoder_batches)):
    decoder_input_ids = torch.stack(input_ids_decoder_batch).to("cuda")
    decoder_attention_mask = torch.stack(attention_mask_decoder_batch).to("cuda")
    input_ids = torch.stack(input_ids_encoder_batch).to("cuda")
    attention_mask = torch.stack(attention_mask_encoder_batch).to("cuda")
    with torch.no_grad():
      model_output = model(decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask, 
                           input_ids=input_ids, attention_mask=attention_mask)
    
    logprobs = F.log_softmax(model_output["logits"], dim=-1)[:, :-1, :] # remove the eos token
    output_tokens = decoder_input_ids[:, 1:] # remove the bos token
        
    tokens_logprobs = torch.gather(logprobs, 2, output_tokens[:, :, None]).squeeze(-1).to(torch.float32)
        
    mask = torch.ones(tokens_logprobs.shape, dtype=torch.bool, device="cuda")
    for i, _output in enumerate(output_tokens):
      for j, _token in enumerate(_output):
        if _token == tokenizer.pad_token_id:
          mask[i, j] = False
              
    score = (tokens_logprobs * mask).sum(-1) / mask.sum(-1)
    scores.extend(score.to('cpu').tolist())
    
  # batch_input_representations = torch.cat(batch_input_representations)
  
  scores = convert_to_sublists(scores, len(candidate_apps[0]))
  
  return scores

In [22]:
scores = recommender_rank(prompt_test, candidate_apps, model, tokenizer, batch_size=8)

 12%|█▏        | 984/7925 [03:46<26:33,  4.36it/s]

In [None]:
top_k_accuracy_score(true_candidate_index, scores, k=10)

In [None]:
true_relevance = [[1 if item == index else 0 for item in range(len(candidate_apps[0]))] for index in true_candidate_index]

In [None]:
ndcg_score(true_relevance, scores, k=10)

In [None]:
import pickle

# Open a file in binary write mode
with open('scores_FlanT5.pkl', 'wb') as file:
    # Use pickle to dump the array into the file
    pickle.dump(scores, file)

In [None]:
# Open the file in binary read mode
import pickle
with open('scores_FlanT5.pkl', 'rb') as file:
    # Load the array from the file
    scores = pickle.load(file)