In [None]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

This model uses two pre-trained models inside it.

model1:

    previous_interactions_model (pre-trained model available and code to train it provided)
    
model2:

    conversation_model (pre-trained model in baselines "gpt2 recommender")

# Importing Libraries

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
import torch
import os
import pandas as pd
from tqdm import tqdm
from torch.utils.data import Dataset
from dataclasses import dataclass
from typing import Optional, List
from fuzzywuzzy import fuzz
import numpy as np
import csv
import torch.nn as nn
from torch.nn import functional as F
from typing import Optional, Tuple, Union
from torch.nn import CrossEntropyLoss
from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions

# Importing Training and Validation Data

In [None]:
train_raw = pd.read_json("training data json file path", lines=True)
valid_raw = pd.read_json("validation data json file path", lines=True)

In [None]:
def is_approximate_substring(substring, string, threshold=70):
    for i in range(len(string) - len(substring) + 1):
        window = string[i:i+len(substring)]
        similarity_ratio = fuzz.ratio(substring, window)
        if similarity_ratio >= threshold:
            return True
    return False

In [None]:
user_id = []
previous_interactions = []
recommended_app_name = []
turns = []
recommend_indexes = []

for index, row in train_raw.iterrows():
    user_id.append(row['user_id'])
    prev = row['user_previous_interactions']
    prev_apps = [app['app_name'] for app in prev]
    if len(prev_apps) > 0:
        previous_interactions.append(",".join(prev_apps))
    else:
        previous_interactions.append(None)
    recommended_app_name.append(row['recommended_app']['app_name'])
    dialog_turns = []
    dialog_index = 0
    found_index = False
    for conv in row['turns']:
        if "COMPUTER" in conv:
            turn = 'computer: '+conv['COMPUTER']
            if (row['recommended_app']['app_name'] in turn) and not found_index:
                recommend_indexes.append(dialog_index)
                found_index = True
            dialog_turns.append(turn)
            dialog_index+=1
        if "HUMAN" in conv:
            turn = 'human: '+conv['HUMAN']
            dialog_turns.append(turn)
            dialog_index+=1
    if not found_index: # approximately finding the recommender turn
        for i, dialog_turn in enumerate(dialog_turns):
            if is_approximate_substring(row['recommended_app']['app_name'], dialog_turn):
                recommend_indexes.append(i)
                found_index = True
                break
                    
    if not found_index:
        recommend_indexes.append(-1)
                        
    turns.append(dialog_turns)
    
print(len(user_id))
print(len(previous_interactions))
print(len(recommended_app_name))
print(len(recommend_indexes))
df_recommender_train = pd.DataFrame({"user_id": user_id, "previous_interactions":previous_interactions, "recommended_app_name":recommended_app_name, "turns": turns, "recommend_indexes":recommend_indexes})
print(f"\nnumber of rows: {len(df_recommender_train)}")

In [None]:
user_id = []
previous_interactions = []
recommended_app_name = []
turns = []
recommend_indexes = []

for index, row in valid_raw.iterrows():
    user_id.append(row['user_id'])
    prev = row['user_previous_interactions']
    prev_apps = [app['app_name'] for app in prev]
    if len(prev_apps) > 0:
        previous_interactions.append(",".join(prev_apps))
    else:
        previous_interactions.append(None)
    recommended_app_name.append(row['recommended_app']['app_name'])
    dialog_turns = []
    dialog_index = 0
    found_index = False
    for conv in row['turns']:
        if "COMPUTER" in conv:
            turn = 'computer: '+conv['COMPUTER']
            if (row['recommended_app']['app_name'] in turn) and not found_index:
                recommend_indexes.append(dialog_index)
                found_index = True
            dialog_turns.append(turn)
            dialog_index+=1
        if "HUMAN" in conv:
            turn = 'human: '+conv['HUMAN']
            dialog_turns.append(turn)
            dialog_index+=1
    if not found_index: # approximately finding the recommender turn
        for i, dialog_turn in enumerate(dialog_turns):
            if is_approximate_substring(row['recommended_app']['app_name'], dialog_turn):
                recommend_indexes.append(i)
                found_index = True
                break
                    
    if not found_index:
        recommend_indexes.append(-1)
                        
    turns.append(dialog_turns)
    
print(len(user_id))
print(len(previous_interactions))
print(len(recommended_app_name))
print(len(recommend_indexes))
df_recommender_validation = pd.DataFrame({"user_id": user_id, "previous_interactions":previous_interactions, "recommended_app_name":recommended_app_name, "turns": turns, "recommend_indexes":recommend_indexes})
print(f"\nnumber of rows: {len(df_recommender_validation)}")

In [None]:
df_recommender_train = df_recommender_train[(df_recommender_train["recommend_indexes"] != -1) & (df_recommender_train["turns"].apply(lambda x: len(x) > 0))]
df_recommender_train['user_id'] = df_recommender_train['user_id'].str.lower()
df_recommender_train['previous_interactions'] = df_recommender_train['previous_interactions'].str.lower()
df_recommender_train['recommended_app_name'] = df_recommender_train['recommended_app_name'].str.lower()
df_recommender_train['turns'] = df_recommender_train['turns'].apply(lambda x: [s.lower() for s in x])

df_recommender_validation = df_recommender_validation[(df_recommender_validation["recommend_indexes"] != -1) & (df_recommender_validation["turns"].apply(lambda x: len(x) > 0))]
df_recommender_validation['user_id'] = df_recommender_validation['user_id'].str.lower()
df_recommender_validation['previous_interactions'] = df_recommender_validation['previous_interactions'].str.lower()
df_recommender_validation['recommended_app_name'] = df_recommender_validation['recommended_app_name'].str.lower()
df_recommender_validation['turns'] = df_recommender_validation['turns'].apply(lambda x: [s.lower() for s in x])

# Model Definition

model1 is previous interactions model

model2 is conversation model

In [None]:
model1_path = "model1 path"
model2_path = "model2 path"

In [None]:
class GPT2Encoder(nn.Module):
    def __init__(self, model_name='gpt2'):
        super(GPT2Encoder, self).__init__()
        self.transformer = GPT2LMHeadModel.from_pretrained(model_name).transformer

    def forward(self, input_ids, attention_mask=None):
        return self.transformer(input_ids, attention_mask=attention_mask)

In [None]:
class Conv1D(nn.Module):
    """
    1D-convolutional layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2).
    Basically works like a linear layer but the weights are transposed.
    Args:
        nf (`int`): The number of output features.
        nx (`int`): The number of input features.
    """
    def __init__(self, nf, nx):
        super().__init__()
        self.nf = nf
        self.weight = nn.Parameter(torch.empty(nx, nf))
        self.bias = nn.Parameter(torch.zeros(nf))
        nn.init.normal_(self.weight, std=0.02)

    def forward(self, x):
        size_out = x.size()[:-1] + (self.nf,)
        x = torch.addmm(self.bias, x.view(-1, x.size(-1)), self.weight)
        x = x.view(size_out)
        return x

In [None]:
class CrossAttention(nn.Module):
    def __init__(self, hidden_size, num_attention_heads, max_position_embeddings):
        super().__init__()
        self.embed_dim = hidden_size
        self.num_heads = num_attention_heads
        self.head_dim = self.embed_dim // self.num_heads
        self.split_size = self.embed_dim
        
        if self.head_dim * self.num_heads != self.embed_dim:
            raise ValueError(f"`embed_dim` must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads}).")

        self.c_attn = Conv1D(2 * self.embed_dim, self.embed_dim)
        self.q_attn = Conv1D(self.embed_dim, self.embed_dim)
        self.c_proj = Conv1D(self.embed_dim, self.embed_dim)

        self.attn_dropout = nn.Dropout(0.1)
        self.resid_dropout = nn.Dropout(0.1)

        self.register_buffer("bias", torch.tril(torch.ones((max_position_embeddings, max_position_embeddings), dtype=torch.bool)).view(1, 1, max_position_embeddings, max_position_embeddings), persistent=False)
        self.register_buffer("masked_bias", torch.tensor(-1e4), persistent=False)

    def _attn(self, query, key, value, attention_mask_kv=None, head_mask=None):
        attn_weights = torch.matmul(query, key.transpose(-1, -2)) #(batch,n_head,seqlen_q,seqlen_kv)
        attn_weights = attn_weights / (value.size(-1) ** 0.5)

        if attention_mask_kv is not None:
            # Attention mask for key-value pairs  # Shape: (batch_size, 1, 1, seq_length_kv)
            attn_weights = attn_weights + attention_mask_kv #attention mask is (0,-inf)

        

        attn_weights = nn.functional.softmax(attn_weights, dim=-1)
        attn_weights = attn_weights.type(value.dtype)
        attn_weights = self.attn_dropout(attn_weights)
        
        if head_mask is not None:
            attn_weights = attn_weights * head_mask

        attn_output = torch.matmul(attn_weights, value) #(batch,n_head,seqlen_q,head_dim)
        return attn_output, attn_weights

    def _split_heads(self, tensor, num_heads, attn_head_size):
        new_shape = tensor.size()[:-1] + (num_heads, attn_head_size)
        tensor = tensor.view(new_shape)
        return tensor.permute(0, 2, 1, 3)

    def _merge_heads(self, tensor, num_heads, attn_head_size):
        tensor = tensor.permute(0, 2, 1, 3).contiguous()
        new_shape = tensor.size()[:-2] + (num_heads * attn_head_size,)
        return tensor.view(new_shape)

    def forward(
        self,
        hidden_states: torch.FloatTensor,
        encoder_hidden_states: torch.Tensor,
        attention_mask_kv: Optional[torch.FloatTensor] = None,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        query = self.q_attn(hidden_states) 
        key, value = self.c_attn(encoder_hidden_states).split(self.split_size, dim=2)

        query = self._split_heads(query, self.num_heads, self.head_dim) #(batch,n_head,sequence_len,head_dim)
        key = self._split_heads(key, self.num_heads, self.head_dim) #(batch,n_head,sequence_len,head_dim)
        value = self._split_heads(value, self.num_heads, self.head_dim) #(batch,n_head,sequence_len,head_dim)
        #attention masks shape (batch,1,1,sequence_len)
        

        attn_output, attn_weights = self._attn(query, key, value, attention_mask_kv)
        

        attn_output = self._merge_heads(attn_output, self.num_heads, self.head_dim)#(batch,seqlen,n_embd)
        attn_output = self.c_proj(attn_output)
        attn_output = self.resid_dropout(attn_output)

        return attn_output, attn_weights


In [None]:
class GPT2MLP(nn.Module):
    def __init__(self, embed_dim,multiplyer):
        super().__init__()
        intermediate_size = multiplyer * embed_dim
        self.c_fc = Conv1D(intermediate_size, embed_dim)
        self.c_proj = Conv1D(embed_dim, intermediate_size)
        self.act = nn.GELU()
        self.dropout = nn.Dropout(0.1)

    def forward(self, hidden_states: torch.FloatTensor) -> torch.FloatTensor:
        hidden_states = self.c_fc(hidden_states)
        hidden_states = self.act(hidden_states)
        hidden_states = self.c_proj(hidden_states)
        hidden_states = self.dropout(hidden_states)
        return hidden_states

In [None]:
class TransformerBlock(nn.Module):
    def __init__(self, n_embd, n_head,max_length):
        super(TransformerBlock, self).__init__()
        self.attention = CrossAttention(n_embd, n_head,max_length)
        self.ln1 = nn.LayerNorm(n_embd)
        self.feed_forward = GPT2MLP(n_embd,4)
        self.ln2 = nn.LayerNorm(n_embd)
        self.ln3 = nn.LayerNorm(n_embd)

    def forward(self, x,context,attention_mask):
        x = self.ln2(x)
        context = self.ln3(x)
        y,z = self.attention(x,context,attention_mask)
        x = x+y
        residual = x
        x = self.ln1(x)
        x = residual + self.feed_forward(x)
        return x

In [None]:
gpt2_model = GPT2LMHeadModel.from_pretrained(model2_path)
gpt2_model.resize_token_embeddings(len(tokenizer))
pretrained_linear_layer = gpt2_model.lm_head

In [None]:
class CombinedModel(nn.Module):
    def __init__(self, model1 = '/kaggle/working/models/model1',model2 = '/kaggle/input/gp2-recommender/GPT2_recommender', n_head=12,n_layer=12,n_embd=768,max_length=512):
        super(CombinedModel, self).__init__()
        self.encoder1 = GPT2Encoder(model1)
        self.encoder2 = GPT2Encoder(model2)
        self.h = nn.ModuleList([TransformerBlock(n_embd, n_head, max_length) for i in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd)
        self.lm_head = pretrained_linear_layer
        self.loss_fn = CrossEntropyLoss(ignore_index=-100)

    def forward(self, input_ids1, input_ids2, attention_mask1=None, attention_mask2=None,labels=None):
        # Encode inputs using both encoders
        encoded_output1 = self.encoder1(input_ids1, attention_mask1).last_hidden_state
        encoded_output2 = self.encoder2(input_ids2, attention_mask2).last_hidden_state
        x = encoded_output2

        # Apply cross-attention
        for layer in self.h:
            x = layer(x,encoded_output1,attention_mask1)
            
        normalized = self.ln_f(x)
        
        
        final_output = self.lm_head(normalized)
        
        loss = None
        if labels is not None:
            # Shift labels and final_output to the right to align with prediction
            shift_logits = final_output[..., :-1, :].contiguous()
            shift_labels = labels[..., 1:].contiguous()
            loss = self.loss_fn(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
            
        if loss is not None:
            return CausalLMOutputWithCrossAttentions(loss=loss, logits=final_output)
        return CausalLMOutputWithCrossAttentions(logits=final_output)
    
    
    
    def get_last_non_padding_token_position(input_ids, pad_token_id):
        # Get the positions of the last non-padding tokens
        non_pad_positions = (input_ids != pad_token_id).nonzero(as_tuple=True)[1]
        last_non_pad_position = non_pad_positions[-1]
        return last_non_pad_position
    
    
    def generate(self, input_ids1, input_ids2, attention_mask1=None, attention_mask2=None, max_length=20, temperature=1.0, tokenizer=None):
        if tokenizer is None:
            raise ValueError("Tokenizer must be provided")

        eos_token_id = tokenizer.eos_token_id
        pad_token_id = tokenizer.pad_token_id

        # Ensure the model is in evaluation mode
        self.eval()
        if input_ids1.dim() == 1:
            input_ids1 = input_ids1.unsqueeze(0)
        if input_ids2.dim() == 1:
            input_ids2 = input_ids2.unsqueeze(0)
        if attention_mask1 is not None and attention_mask1.dim() == 1:
            attention_mask1 = attention_mask1.unsqueeze(0)
        if attention_mask2 is not None and attention_mask2.dim() == 1:
            attention_mask2 = attention_mask2.unsqueeze(0)

        generated_sequence = []

        with torch.no_grad():
            for _ in range(max_length):
                pos = (input_ids2 != pad_token_id).nonzero(as_tuple=True)[1][-1].item()
                output = self.forward(input_ids1, input_ids2, attention_mask1, attention_mask2)
                logits = output.logits[:, pos, :] / temperature
                probabilities = torch.nn.functional.softmax(logits, dim=-1)
                next_token = torch.multinomial(probabilities, num_samples=1)

                # Append the generated tokens to the respective sequences
                generated_sequence.append(next_token.item())

                # Break the loop if the EOS token is generated for all sequences
                if (next_token.item() == eos_token_id):
                    break

                # Update input_ids2 and attention_mask2 by appending the new token
                
                input_ids2 = torch.cat([input_ids2[:, :pos + 1], next_token,input_ids2[:, pos + 2:]], dim=1)
                attention_mask2 = torch.cat([attention_mask2[:, :pos + 1], torch.ones((1, 1), device=input_ids2.device),attention_mask2[:, pos + 2:]], dim=1)


        return generated_sequence


Model1 here is previous interactions model
Model2 here is conversation model

In [None]:
model_max_length=512
model = CombinedModel(model1=model1_path,model2=model2_path,n_head=12,n_layer=12,n_embd=768,max_length=model_max_length)
print(model)

# Training Model

In [None]:
model_checkpoint = "gpt2"
bos = '<|startoftext|>'
eos = '<|endoftext|>'
pad = '<|pad|>'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = GPT2Tokenizer.from_pretrained(model_checkpoint, bos_token=bos, eos_token=eos, pad_token=pad, additional_special_tokens=["computer:", "human:"])

print(len(tokenizer))

In [None]:
@dataclass
class RecommenderItem:
    prompt: str
    generation: Optional[str] = None
    interaction: Optional[str] = None
    
class recommenderDataset(Dataset):
    def __init__(self, data: List[RecommenderItem]):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx) -> RecommenderItem:
        return self.data[idx]

In [None]:
items_train = []
for _, row in df_recommender_train.iterrows():
    interactions = bos
    prompt = bos
    if row["previous_interactions"] is not None:
        interactions = interactions + row["previous_interactions"]
    else:
        interactions = interactions + "None"
    for index, turn in enumerate(row["turns"]):
        if index < row["recommend_indexes"]:
            prompt += turn + "\n"
        elif index == row["recommend_indexes"]:
            prompt += "computer: I would recommend the "
            items_train.append(RecommenderItem(prompt, row["recommended_app_name"] + " app." + eos,interactions))
            break
        else:
            print("error!!")

In [None]:
items_validation = []
for _, row in df_recommender_validation.iterrows():
    interactions = bos
    prompt = bos
    if row["previous_interactions"] is not None:
        interactions = interactions + row["previous_interactions"]
    else:
        interactions = interactions + "None"
    for index, turn in enumerate(row["turns"]):
        if index < row["recommend_indexes"]:
            prompt += turn + "\n"
        elif index == row["recommend_indexes"]:
            prompt += "computer: I would recommend the "
            items_validation.append(RecommenderItem(prompt, row["recommended_app_name"] + " app." + eos,interactions))
            break
        else:
            print("error!!")

In [None]:
def training_collator(batch: list[recommenderDataset]): # for training a language model
    input_ids1 = []
    attention_masks1 = []
    input_ids2 = []
    attention_masks2 = []
    labels = []
    for item in batch:
        interaction_tokens = tokenizer.encode(item.interaction, return_tensors="pt")[0]
        prompt_tokens = tokenizer.encode(item.prompt, return_tensors="pt")[0] 
        generation_tokens = tokenizer.encode(item.generation, return_tensors="pt")[0]
        prompt_len = len(prompt_tokens)
        generation_len = len(generation_tokens)
        interaction_len = len(interaction_tokens)
        unused_len1 = model_max_length - prompt_len - generation_len
        unused_len2 = model_max_length - interaction_len
        # handling case when input is greater than tokenizer length.
        
        if unused_len2 < 0:
            interaction_start_tokens = interaction_tokens[:1]
            trimmed_interaction = interaction_tokens[unused_len2 * -1 + 1 :] # TODO: you could delete the prompt to reach the first |beginuser| token
            interaction_tokens = torch.cat(
                [interaction_start_tokens, trimmed_interaction], axis=0
            )
            prompt_len = len(prompt_tokens)
            unused_len1 = 0
        
        if unused_len1 < 0:
            prompt_start_tokens = prompt_tokens[:1]
            trimmed_prompt = prompt_tokens[unused_len1 * -1 + 1 :] # TODO: you could delete the prompt to reach the first |beginuser| token
            prompt_tokens = torch.cat(
                [prompt_start_tokens, trimmed_prompt], axis=0
            )
            prompt_len = len(prompt_tokens)
            unused_len1 = 0
        pad1 = torch.full([unused_len1], tokenizer.pad_token_id)
        pad2 = torch.full([unused_len2], tokenizer.pad_token_id)
        input_tokens2 = torch.cat(
            [interaction_tokens, pad2]
        )
        input_tokens1 = torch.cat(
            [prompt_tokens, generation_tokens, pad1]
        )
        label = torch.cat(
            [
                torch.full(
                    [prompt_len],
                    -100,
                ),
                generation_tokens,
                torch.full([unused_len1], -100),
            ]
        )
        attention_mask1 = torch.cat(
            [
                torch.full([prompt_len + generation_len], 1),
                torch.full([unused_len1], 0),
            ]
        )
        attention_mask2 = torch.cat(
            [
                torch.full([interaction_len], 1),
                torch.full([unused_len2], 0),
            ]
        )
        input_ids1.append(input_tokens1)
        attention_masks1.append(attention_mask1)
        input_ids2.append(input_tokens2)
        attention_masks2.append(attention_mask2)
        labels.append(label)

    out = {
        "input_ids2": torch.stack(input_ids1),
        "attention_mask2": torch.stack(attention_masks1),
        "input_ids1": torch.stack(input_ids2),
        "attention_mask1": torch.stack(attention_masks2),
        "labels": torch.stack(labels),
    }

    return out

In [None]:
training_args = TrainingArguments(
    output_dir="model save path",
    num_train_epochs=5,
    # logging_steps=500,
    # logging_dir=self.cfg.logging_dir,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    save_strategy="steps",
    evaluation_strategy="steps",
    save_steps=0.3,#self.cfg.save_steps,
    eval_steps=0.3, #self.cfg.eval_steps,
    save_total_limit=4,
    gradient_accumulation_steps=3, #gradient_accumulation_steps,
    per_device_train_batch_size=3, #train_batch_size,
    per_device_eval_batch_size=3, #self.cfg.eval_batch_size,
    warmup_steps=100,
    weight_decay=0.01,
    # dataloader_drop_last=True,
    disable_tqdm=False,
    report_to='none',
    push_to_hub=False
)


trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=recommenderDataset(items_train),
        eval_dataset=recommenderDataset(items_validation), #dm.datasets[DataNames.dev_language_model.value],
        data_collator=training_collator,
    )

In [None]:
trainer.train()

We are using hugging face trainer and the model is not completely made for it so it might not be able to save using trainer.save_model()

In that case, use this

In [None]:
torch.save(model.state_dict(), "save path")

# Importing Test Data

In [None]:
test_raw = pd.read_json("Test data json file path", lines=True)

In [None]:
def is_approximate_substring(substring, string, threshold=70):
    for i in range(len(string) - len(substring) + 1):
        window = string[i:i+len(substring)]
        similarity_ratio = fuzz.ratio(substring, window)
        if similarity_ratio >= threshold:
            return True
    return False

In [None]:
user_id = []
previous_interactions = []
recommended_app_name = []
turns = []
recommend_indexes = []

for index, row in test_raw.iterrows():
    user_id.append(row['user_id'])
    prev = row['user_previous_interactions']
    prev_apps = [app['app_name'] for app in prev]
    if len(prev_apps) > 0:
        previous_interactions.append(",".join(prev_apps))
    else:
        previous_interactions.append(None)
    recommended_app_name.append(row['recommended_app']['app_name'])
    dialog_turns = []
    dialog_index = 0
    found_index = False
    for conv in row['turns']:
        if "COMPUTER" in conv:
            turn = 'computer: '+conv['COMPUTER']
            if (row['recommended_app']['app_name'] in turn) and not found_index:
                recommend_indexes.append(dialog_index)
                found_index = True
            dialog_turns.append(turn)
            dialog_index+=1
        if "HUMAN" in conv:
            turn = 'human: '+conv['HUMAN']
            dialog_turns.append(turn)
            dialog_index+=1
    if not found_index: # approximately finding the recommender turn
        for i, dialog_turn in enumerate(dialog_turns):
            if is_approximate_substring(row['recommended_app']['app_name'], dialog_turn):
                recommend_indexes.append(i)
                found_index = True
                break
                    
    if not found_index:
        recommend_indexes.append(-1)
                        
    turns.append(dialog_turns)
    
print(len(user_id))
print(len(previous_interactions))
print(len(recommended_app_name))
print(len(recommend_indexes))
df_recommender_test = pd.DataFrame({"user_id": user_id, "previous_interactions":previous_interactions, "recommended_app_name":recommended_app_name, "turns": turns, "recommend_indexes":recommend_indexes})
print(f"\nnumber of rows: {len(df_recommender_test)}")

In [None]:
df_recommender_test = df_recommender_test[(df_recommender_test["recommend_indexes"] != -1) & (df_recommender_test["turns"].apply(lambda x: len(x) > 0))]
df_recommender_test['user_id'] = df_recommender_test['user_id'].str.lower()
df_recommender_test['previous_interactions'] = df_recommender_test['previous_interactions'].str.lower()
df_recommender_test['recommended_app_name'] = df_recommender_test['recommended_app_name'].str.lower()
df_recommender_test['turns'] = df_recommender_test['turns'].apply(lambda x: [s.lower() for s in x])

# Testing Model

In [None]:
model_checkpoint = "gpt2"
bos = '<|startoftext|>'
eos = '<|endoftext|>'
pad = '<|pad|>'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = GPT2Tokenizer.from_pretrained(model_checkpoint, bos_token=bos, eos_token=eos, pad_token=pad, additional_special_tokens=["computer:", "human:"])

print(len(tokenizer))

In [None]:
model.load_state_dict(torch.load("save path"))
model = model.to(device)

In [None]:
items_test = []
for _, row in df_recommender_test.iterrows():
    interactions = bos
    prompt = bos
    if row["previous_interactions"] is not None:
        interactions = interactions + row["previous_interactions"]
    else:
        interactions = interactions + "None"
    for index, turn in enumerate(row["turns"]):
        if index < row["recommend_indexes"]:
            prompt += turn + "\n"
        elif index == row["recommend_indexes"]:
            prompt += "computer: I would recommend the "
            items_test.append(RecommenderItem(prompt, row["recommended_app_name"] + " app.",interactions))
            break
        else:
            print("error!!")

In [None]:
def chunk(list_of_elements, batch_size): # using this chunk function, we can split our data to multiple batches
    for i in range(0, len(list_of_elements), batch_size):
        yield list_of_elements[i:i+batch_size]

def evaluate_recommender(dataset, model, tokenizer, device=device, threshold=70):
    batch_size=1
    prompt_arr = [data.prompt for data in dataset]
    generation_arr = [data.generation for data in dataset]
    interaction_arr = [data.interaction for data in dataset]
    max_length=480
    generation_length = 32
    print(len(dataset))
    correctly_predicted = []
    
    for prompt, generation,interaction in tqdm(zip(prompt_arr, generation_arr,interaction_arr), total = len(generation_arr)):
        
        inputs1 = tokenizer(interaction,max_length=max_length, truncation=True, padding="max_length", return_tensors="pt")
        inputs2 = tokenizer(prompt,max_length=max_length, truncation=True, padding="max_length", return_tensors="pt") 

        generations_predicted = model.generate(input_ids1=inputs1["input_ids"].to(device),input_ids2=inputs2["input_ids"].to(device), attention_mask1=inputs1["attention_mask"].to(device),attention_mask2=inputs2["attention_mask"].to(device),
                            max_length=generation_length,
                            tokenizer=tokenizer) # length_penalty=0.8, Set length_penalty to values < 1.0 in order to encourage the model to generate shorter sequences, to a value > 1.0 in order to encourage the model to produce longer sequences.

        generations_predicted = generations_predicted # we only need the generation part, not the prompt part.
        decoded_generation = tokenizer.decode(generations_predicted, skip_special_tokens=True, clean_up_tokenization_spaces=True).replace(" app.", "")
        generation = generation.replace(" app.", "")
    
        correctly_predicted.append(1 if fuzz.ratio(decoded_generation, generation) > threshold else 0)


    return correctly_predicted

Generate Function does not currently work with multiple batches. I will update code with updated generate function.

In [None]:
correctly_predicted = evaluate_recommender(recommenderDataset(items_test), model, tokenizer, device=device, threshold=95)
success_rate = sum(correctly_predicted) / len(correctly_predicted)
print("success_rate1: ", success_rate)