## Importing Dataset

In [458]:
%pip install kagglehub[pandas-datasets]


zsh:1: no matches found: kagglehub[pandas-datasets]


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [459]:
import os
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from collections import Counter
import kagglehub
from kagglehub import KaggleDatasetAdapter

books_df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "arashnic/book-recommendation-dataset",
  "Books.csv",
)

ratings_df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "arashnic/book-recommendation-dataset",
  "Ratings.csv",
)

users_df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "arashnic/book-recommendation-dataset",
  "Users.csv",
)

  books_df = kagglehub.load_dataset(
  result = read_function(
  ratings_df = kagglehub.load_dataset(
  users_df = kagglehub.load_dataset(


### Only keeping books with more than three ratings

In [460]:
# print(books_df.shape)
# books_df = books_df[books_df.groupby('Book-Title')['Book-Title'].transform('count') > 5]
print(books_df.shape)

(271360, 8)


### Joining Books, Ratings, and Users tables together

In [461]:
df_ratings_books = pd.merge(ratings_df, books_df, on="ISBN", how='inner')
df = pd.merge(df_ratings_books, users_df, on='User-ID')
df['User-ID'] = df['User-ID'].astype(str)
df['Year-Of-Publication'] = pd.to_numeric(df['Year-Of-Publication'], errors='coerce')
df = df.dropna(subset=['Year-Of-Publication'])
df = df.dropna(subset=['Age'])
df = df[df['Book-Rating'] > 0]
df = df[df['Age'] <= 100]
df = df[df['Year-Of-Publication'] > 0]
df['User-Age'] = pd.cut(
    df['Age'],
    bins=[0, 18, 25, 35, 50, 100],
    labels=['<18', '18-25', '26-35', '36-50', '50+'],
    right=False
)
df['Book-Year-Of-Publication'] = pd.cut(
    df['Year-Of-Publication'],
    bins=[0, 1950, 1980, 2000, 2010, 2020, 2050],
    labels=['pre-1950', '1950-1979', '1980-1999', '2000-2009', '2010-2019', '2020+'],
    right=False
)
df['Book-Rating'].describe()

count    264742.000000
mean          7.738848
std           1.813809
min           1.000000
25%           7.000000
50%           8.000000
75%           9.000000
max          10.000000
Name: Book-Rating, dtype: float64

### Combining my own data into the training set

In [462]:
personal_df = pd.read_csv("./fine-tuning-book-set.txt")
end_index = len(df)
df = pd.concat([df, personal_df], ignore_index=True, sort=False)

In [463]:
personal_df.head(2)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Age,Book-Year-Of-Publication,User-Age
0,1234567890,451521951,5,The Count of Monte Cristo,Alexandre Dumas,1844,Signet Book,23,pre-1950,18-25
1,1234567890,684813637,5,1776,David McCullough,2005,Simon & Schuster,23,2000-2009,18-25


In [464]:
df.head(2)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,Location,Age,User-Age,Book-Year-Of-Publication
0,276729,052165615X,3,Help!: Level 1,Philip Prowse,1999.0,Cambridge University Press,http://images.amazon.com/images/P/052165615X.0...,http://images.amazon.com/images/P/052165615X.0...,http://images.amazon.com/images/P/052165615X.0...,"rijeka, n/a, croatia",16.0,<18,1980-1999
1,276729,0521795028,6,The Amsterdam Connection : Level 4 (Cambridge ...,Sue Leather,2001.0,Cambridge University Press,http://images.amazon.com/images/P/0521795028.0...,http://images.amazon.com/images/P/0521795028.0...,http://images.amazon.com/images/P/0521795028.0...,"rijeka, n/a, croatia",16.0,<18,2000-2009


In [465]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [466]:
import pandas as pd
import numpy as np
from torch.utils.data import Dataset
from sklearn.preprocessing import  StandardScaler

# User Tower -- User-ID, Age
# Item Tower -- ISBN, Book-Title, Book-Author, Publisher, Year-Of-Publication

class BookRecommenderDataset(Dataset):
    """
    A PyTorch Dataset class for book recommendation tasks.

    Parameters
    ----------
    dataframe : pd.DataFrame
        The input data containing user, item, and possibly interaction features.

    Attributes
    ----------
    data : pd.DataFrame
        The processed version of the input dataframe.
    encoders : dict
        A dictionary mapping column names to fitted label encoders.
    reverse_encoders : dict
        A dictionary mapping column names to reverse label encoders (index to label).
    scalers : dict
        A dictionary mapping column names to fitted scalers for numerical features.
    """

    def __init__(self, data):
        self.encoders = {} # {'Column name': {'value': idx, ...}, ...}
        self.reverse_encoders = {} # {'Column name': {idx: 'value', ...}, ...}
        self.data = data
        self.preprocess(self.data)

    def preprocess(self, data):
        self.encode_information()

    def encode_information(self):
        """
        Maps {key: index} pairs and StandardScaler for real valued numbers
        """
        label_encoders = ['User-ID', 'ISBN', 'Book-Author', 'Book-Title', 'Publisher', "User-Age", "Book-Year-Of-Publication"]

        for col in label_encoders:
            unique_vals = self.data[col].astype(str).unique()
            self.encoders[col] = {val: idx + 1 for idx, val in enumerate(unique_vals)}
            self.reverse_encoders[col] = {idx + 1: val for idx, val in enumerate(unique_vals)}
            self.data[col] = self.data[col].astype(str).map(self.encoders[col]).fillna(0).astype(int)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        return {
            "User-ID": torch.tensor(row["User-ID"], dtype=torch.long),
            "User-Age": torch.tensor(row["User-Age"], dtype=torch.long),
            "Book-ISBN": torch.tensor(row["ISBN"], dtype=torch.long),
            "Book-Title": torch.tensor(row["Book-Title"], dtype=torch.long),
            "Book-Author": torch.tensor(row["Book-Author"], dtype=torch.long),
            "Book-Publisher": torch.tensor(row["Publisher"], dtype=torch.long),
            "Book-Year-Of-Publication": torch.tensor(row["Book-Year-Of-Publication"], dtype=torch.long),
            "Book-Title-Text": row['Book-Title']
        }


dataset = BookRecommenderDataset(df)


In [467]:
dataset[0]

{'User-ID': tensor(1),
 'User-Age': tensor(1),
 'Book-ISBN': tensor(1),
 'Book-Title': tensor(1),
 'Book-Author': tensor(1),
 'Book-Publisher': tensor(1),
 'Book-Year-Of-Publication': tensor(1),
 'Book-Title-Text': np.int64(1)}

In [468]:
train_size = int(0.7 * len(dataset))
test_size = len(dataset) - train_size
train_data, test_data = random_split(dataset, [train_size, test_size])

# Dataloaders
train_loader = DataLoader(train_data, batch_size=512, shuffle=True)
test_loader = DataLoader(test_data, batch_size=512, shuffle=False)

In [469]:
next(iter(train_loader))

{'User-ID': tensor([ 1949, 18398,  1872, 25388, 34721, 22661, 20926, 30338, 30104, 25629,
         33762, 19403,  8470, 34671, 16164, 11578, 34617, 19246, 35279, 26644,
         15631, 10588,  9043, 20884,  3655, 27047,  2143, 21033, 14361,  4529,
          5757,  8275, 22010, 18864, 32320,  1561, 29155, 32726,  4893, 16416,
          8119,  2823, 14361, 18178,    33,  2708, 14361, 21048, 16066, 10214,
         37103, 15224, 13774, 38990, 25491, 11346, 14713, 22156, 38666, 18048,
         37607, 24252, 13351, 34428, 10396,  3865, 25277, 13337,  7317,  1744,
         15269,  5508, 32324, 39240, 22326,  8752,  8112,  9663, 20973,  7637,
         16505, 29769, 20847, 20340, 16769, 28434, 29338, 12931, 21283,  7151,
         29420,  6931, 12662, 26493, 17431,   462, 25629, 38800, 20935, 24267,
          6945,  8948, 39336, 38052, 10210, 34315, 13555, 16658,  7821,  9603,
         39024, 38369, 26878, 10920, 32594, 20105,  8965, 35494,  9477, 31386,
         35568, 30200,  6456,   319, 3062

## Two Tower Model for Recommendations

In [470]:
class UserTower(nn.Module):

    # User Tower -- User-ID, Age

    def __init__(self, num_users, num_ages, embedding_dim):
        super().__init__()

        user_embedding_dim = 128    
        age_embedding_dim = 16   
        linear_in = user_embedding_dim + age_embedding_dim

        self.user_embedding = nn.Embedding(num_users, user_embedding_dim, padding_idx=0)
        self.user_age_embedding = nn.Embedding(num_ages, age_embedding_dim, padding_idx=0)

        self.user_mlp = nn.Sequential(
            nn.Linear(linear_in, 512), # 2 embeddings (user-id, user-age)
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, embedding_dim),
        )

    def forward(self, user_id, age):
        user_emb = self.user_embedding(user_id)
        age_emb = self.user_age_embedding(age)
        x = torch.cat([user_emb, age_emb], dim=1)
        return self.user_mlp(x)

    def get_embedding(self, data):
        return self.forward(data['User-ID'], data['User-Age'])


In [None]:
class ItemTower(nn.Module):
    def __init__(self, num_isbn, num_titles, num_authors, num_publishers, num_year_of_publications, embedding_dim):
        super().__init__()

        # Item Tower -- ISBN, Book-Title, Book-Author, Publisher, Year-Of-Publication
        # book_isbn_embedding_size = 64
        book_title_embedding_size = 32
        book_author_embedding_size = 32
        book_publisher_embedding_size = 16
        book_year_of_publication_embedding_size = 8

        # Categorical embeddings
        # self.book_isbn_embedding = nn.Embedding(num_isbn, book_isbn_embedding_size, padding_idx=0)
        self.book_title_embedding = nn.Embedding(num_titles, book_title_embedding_size, padding_idx=0)
        self.book_author_embedding = nn.Embedding(num_authors, book_author_embedding_size, padding_idx=0)
        self.book_publisher_embedding = nn.Embedding(num_publishers, book_publisher_embedding_size, padding_idx=0)
        self.book_year_of_publication_embedding = nn.Embedding(num_year_of_publications, book_year_of_publication_embedding_size, padding_idx=0)

        # book_isbn_embedding_size + 
        linear_in = book_title_embedding_size + book_author_embedding_size + book_publisher_embedding_size + book_year_of_publication_embedding_size

        self.item_mlp = nn.Sequential(
            nn.Linear(linear_in, 512), 
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, embedding_dim),
        )

    def forward(self, isbn, book_title, book_author, book_publisher, book_year_of_publication):
        # book_isbn_emb = self.book_isbn_embedding(isbn)
        book_title_emb = self.book_title_embedding(book_title)
        book_author_emb = self.book_author_embedding(book_author)
        book_publisher_emb = self.book_publisher_embedding(book_publisher)
        book_year = self.book_year_of_publication_embedding(book_year_of_publication)

        x = torch.cat([
            # book_isbn_emb,
            book_title_emb,
            book_author_emb,
            book_publisher_emb,
            book_year
        ], dim=1)

        return self.item_mlp(x)

    def get_embedding(self, data):
        return self.forward(
            data['Book-ISBN'],
            data['Book-Title'],
            data['Book-Author'],
            data['Book-Publisher'],
            data['Book-Year-Of-Publication'],
        )


In [472]:
class TwoTowers(nn.Module):
    def __init__(self, user_tower: UserTower, item_tower: ItemTower):
        super().__init__()
        self.user_tower = user_tower
        self.item_tower = item_tower

    def forward(self, data):
        user_emb = self.user_tower.get_embedding(data)
        item_emb = self.item_tower.get_embedding(data)

        user_emb = F.normalize(user_emb, p=2, dim=1)
        item_emb = F.normalize(item_emb, p=2, dim=1)
        
        return user_emb, item_emb

In [473]:
NUM_USERS = len(dataset.encoders['User-ID']) + 1
NUM_AGES = len(dataset.encoders['User-Age']) + 1
NUM_ISBN = len(dataset.encoders['ISBN']) + 1
NUM_TITLES = len(dataset.encoders['Book-Title']) + 1
NUM_AUTHORS = len(dataset.encoders['Book-Author']) + 1
NUM_PUBLISHERS = len(dataset.encoders['Publisher']) + 1
NUM_YEAR_OF_PUBLICATIONS = len(dataset.encoders['Book-Year-Of-Publication']) + 1

EMBEDDING_SIZE = 128

user_tower = UserTower(
    num_users=NUM_USERS,  
    num_ages=NUM_AGES,
    embedding_dim=EMBEDDING_SIZE
)

item_tower = ItemTower(
    num_isbn=NUM_ISBN,
    num_titles=NUM_TITLES,
    num_authors=NUM_AUTHORS,
    num_publishers=NUM_PUBLISHERS,
    num_year_of_publications=NUM_YEAR_OF_PUBLICATIONS,
    embedding_dim=EMBEDDING_SIZE
)

two_towers = TwoTowers(
    user_tower,
    item_tower
).to(device)

print("NUM_USERS:", NUM_USERS)
print("NUM_AGES:", NUM_AGES)
print("NUM_ISBN:", NUM_ISBN)
print("NUM_TITLES:", NUM_TITLES)
print("NUM_AUTHORS:", NUM_AUTHORS)
print("NUM_PUBLISHERS:", NUM_PUBLISHERS)
print("NUM_YEAR_OF_PUBLICATIONS:", NUM_YEAR_OF_PUBLICATIONS)


NUM_USERS: 39813
NUM_AGES: 7
NUM_ISBN: 117562
NUM_TITLES: 107251
NUM_AUTHORS: 50048
NUM_PUBLISHERS: 9474
NUM_YEAR_OF_PUBLICATIONS: 8


In [474]:
def calculate_recall_at_k(two_towers, epoch, EPOCHS, k):
    """
    Gathers the current item embeddings, 
    calculates similarity between each user and the items. 

    Calculates and returns recall@k metric of the recommendations 
    made to the user.
    """

    # --- Getting all item embeddings --- 
    entire_dataset = DataLoader(dataset, batch_size=1, shuffle=False)
    all_item_embeddings = []
    for batch in entire_dataset:
        _, item_embedding = two_towers(batch)
        all_item_embeddings.append(item_embedding)
    all_item_embeddings = torch.cat(all_item_embeddings, dim=0)

    total_recall = 0.0
    num_users = 0

    # --- Calculating recall@k --- 
    for idx, batch in enumerate(test_loader):
        user_embedding, _ = two_towers(batch)
        similarity_scores = user_embedding @ all_item_embeddings.T  # [batch_size, num_items]

        top_scores, top_indices = torch.topk(similarity_scores, k=k, dim=1)

        for user_id, items, scores in zip(batch['User-ID'], top_indices, top_scores):
            user_rows = dataset.data[dataset.data['User-ID'] == user_id.item()]

            # Recommended books (Book-Title IDs)
            recommended_book_ids_set = set([dataset.data.iloc[idx.item()]['Book-Title'] for idx in items])
            actual_book_ids_set = set(user_rows['Book-Title'].tolist())

            hits = len(recommended_book_ids_set & actual_book_ids_set)  # intersection
            recall_at_k = hits / len(actual_book_ids_set)

            total_recall += recall_at_k
            num_users += 1

    average_recall_at_k = total_recall / num_users
    print(f"Epoch {epoch}/{EPOCHS}, Average Recall@{k}: {average_recall_at_k:.4f}\n")
    return average_recall_at_k


## Training

#### Main Training Loop

In [475]:
%rm -rf ./logs/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [476]:
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import datetime

EPOCHS = 50
LEARNING_RATE = 5e-4
EARLY_STOPPING_PATIENCE = 15
TEMPERATURE = 0.1
WEIGHT_DECAY = 1e-5
MODEL_SAVE_PATH = "/models"

optimizer = torch.optim.Adam(two_towers.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
writer = SummaryWriter('./logs/')

best_test_loss = float('inf')
early_stopping_counter = 0
global_step = 0

In [478]:

for epoch in range(1, EPOCHS + 1):

    # -- Main Loop --
    running_train_loss = 0.0
    two_towers.train()

    for batch_idx, batch in enumerate(train_loader):
        optimizer.zero_grad()

        user_embedding, item_embedding = two_towers(batch)

        logits = (user_embedding @ item_embedding.T) / TEMPERATURE
        labels = torch.arange(user_embedding.size(0)).to(device) 
        loss = F.cross_entropy(logits, labels)

        loss.backward()
        optimizer.step()

        running_train_loss += loss.item()

    # -- Test Loop -- 
    two_towers.eval()
    running_test_loss = 0.0
    with torch.no_grad():
        for batch in test_loader:
            user_embedding, item_embedding = two_towers(batch)

            logits = (user_embedding @ item_embedding.T) / TEMPERATURE
            labels = torch.arange(user_embedding.size(0)).to(device)
            loss = F.cross_entropy(logits, labels)

            running_test_loss += loss.item()

    avg_train_loss = running_train_loss / len(train_loader)
    avg_test_loss = running_test_loss / len(test_loader)
    print(f"Epoch {epoch}/{EPOCHS}, Average Training Loss: {avg_train_loss:.4f}, ")
    print(f"Epoch {epoch}/{EPOCHS}, Average Test Loss: {avg_test_loss:.4f}, ")
    # calculate_recall_at_k(two_towers, epoch, EPOCHS, 25)
    
    if epoch % 10 == 0:
        torch.save(two_towers.state_dict(), f"./{MODEL_SAVE_PATH}/two_towers_epoch{epoch}_test{avg_test_loss:.2}_train{avg_train_loss:.2f}.pt")

writer.close()
print("Training complete.")


Epoch 1/50, Average Training Loss: 6.1493, 
Epoch 1/50, Average Test Loss: 6.1049, 
Epoch 2/50, Average Training Loss: 6.0447, 
Epoch 2/50, Average Test Loss: 6.0206, 
Epoch 3/50, Average Training Loss: 5.9251, 
Epoch 3/50, Average Test Loss: 5.9530, 
Epoch 4/50, Average Training Loss: 5.7980, 
Epoch 4/50, Average Test Loss: 5.9025, 
Epoch 5/50, Average Training Loss: 5.6671, 
Epoch 5/50, Average Test Loss: 5.8649, 
Epoch 6/50, Average Training Loss: 5.5294, 
Epoch 6/50, Average Test Loss: 5.8438, 
Epoch 7/50, Average Training Loss: 5.3924, 
Epoch 7/50, Average Test Loss: 5.8371, 
Epoch 8/50, Average Training Loss: 5.2515, 
Epoch 8/50, Average Test Loss: 5.8359, 
Epoch 9/50, Average Training Loss: 5.1093, 
Epoch 9/50, Average Test Loss: 5.8371, 
Epoch 10/50, Average Training Loss: 4.9729, 
Epoch 10/50, Average Test Loss: 5.8390, 
Epoch 11/50, Average Training Loss: 4.8360, 
Epoch 11/50, Average Test Loss: 5.8532, 
Epoch 12/50, Average Training Loss: 4.7042, 
Epoch 12/50, Average Test L

### Seeing what the model recommends to me after training

- It should have seen me somewhere in the training data and should have learned enough information from the other data to generalize over what I might like.
- I will pass my username and age into the User Tower. And then conduct a dot product between my vector and the matrix of learned item embeddings to get relevance scores.
- I will then conduct some semi-manual ranking based on removing what I have already read and other info.
- Then I will make the final 50 recommendations for me.

In [None]:

# Pretrained model:
two_towers.load_state_dict(torch.load("models/two_towers_epoch50_test6.3_train2.38.pt", map_location=device))

RuntimeError: Error(s) in loading state_dict for TwoTowers:
	Unexpected key(s) in state_dict: "item_tower.book_isbn_embedding.weight". 
	size mismatch for user_tower.user_embedding.weight: copying a param with shape torch.Size([11996, 128]) from checkpoint, the shape in current model is torch.Size([39813, 128]).
	size mismatch for item_tower.book_title_embedding.weight: copying a param with shape torch.Size([1958, 32]) from checkpoint, the shape in current model is torch.Size([107251, 32]).
	size mismatch for item_tower.book_author_embedding.weight: copying a param with shape torch.Size([2112, 32]) from checkpoint, the shape in current model is torch.Size([50048, 32]).
	size mismatch for item_tower.book_publisher_embedding.weight: copying a param with shape torch.Size([863, 16]) from checkpoint, the shape in current model is torch.Size([9474, 16]).
	size mismatch for item_tower.book_year_of_publication_embedding.weight: copying a param with shape torch.Size([7, 8]) from checkpoint, the shape in current model is torch.Size([8, 8]).
	size mismatch for item_tower.item_mlp.0.weight: copying a param with shape torch.Size([512, 152]) from checkpoint, the shape in current model is torch.Size([512, 88]).

In [None]:
# Getting all item embeddings
entire_dataset = DataLoader(dataset, batch_size=1, shuffle=False)
all_item_embeddings = []
for batch in entire_dataset:
    _, item_embedding = two_towers(batch)
    all_item_embeddings.append(item_embedding)
all_item_embeddings = torch.cat(all_item_embeddings, dim=0)

# Getting a single embedding for my learned user
paul_user_id = dataset.encoders['User-ID']['1234567890']
paul_age = dataset.encoders['User-Age']['18-25']
paul_batch = {
    'User-ID': torch.tensor([paul_user_id], dtype=torch.long, device=device),
    'User-Age': torch.tensor([paul_age], dtype=torch.long, device=device)
}
paul_user_embedding = two_towers.user_tower.get_embedding(paul_batch) # [1 batch, 128 dimensions]
paul_user_embedding = F.normalize(paul_user_embedding, p=2, dim=1)

In [None]:
print(all_item_embeddings.shape)
print(paul_user_embedding.shape)

torch.Size([264758, 128])
torch.Size([1, 128])


In [None]:
similarity_scores = (paul_user_embedding @ all_item_embeddings.T).squeeze()
top_k = 1000
top_scores, top_indices = torch.topk(similarity_scores, top_k)

unique_recommendations = []
seen_titles = set()
read_isbns = personal_df['ISBN'].astype(str).to_list()

for score, idx in zip(top_scores.detach().cpu().numpy(), top_indices.detach().cpu().numpy()):
    row = dataset.data.iloc[idx]  # pandas row

    title_idx = int(row['Book-Title'])
    author_idx = int(row['Book-Author'])
    isbn_idx = int(row['ISBN'])

    title = dataset.reverse_encoders['Book-Title'][title_idx]
    author = dataset.reverse_encoders['Book-Author'][author_idx]
    isbn = dataset.reverse_encoders['ISBN'][isbn_idx]

    # skip duplicates or already read books
    if title in seen_titles or isbn in read_isbns:
        continue

    seen_titles.add(title)
    unique_recommendations.append({
        'title': title,
        'author': author,
        'score': score
    })


In [None]:
for rec in unique_recommendations[:100]:
    print(f"Title: {rec['title']}, Author: {rec['author']}, Score: {rec['score']:.4f}")

Title: Journey to the End of the Night, Author: Louis-Ferdinand Celine, Score: 0.7245
Title: Notes from the Underground (Dover Thrift Editions), Author: Fyodor Dostoyevsky, Score: 0.6672
Title: Like a Hole in the Head, Author: Jen Banbury, Score: 0.6457
Title: And Then There Were None, Author: Agatha Christie, Score: 0.6418
Title: Dune, Author: Frank Herbert, Score: 0.6367
Title: The Boggart, Author: Susan Cooper, Score: 0.6345
Title: Masterpieces of murder, Author: Agatha Christie, Score: 0.6320
Title: O medo (Documenta poÃ©tica), Author: Al Berto, Score: 0.6319
Title: When Bunny Grows Up, Author: Patsy Scarry, Score: 0.6307
Title: Dracula: A Symphony in Moonlight &amp; Nightmares, Author: Jon J. Muth, Score: 0.6276
Title: Time Travel: Fact, Fiction, &amp; Possibility, Author: Jenny Randles, Score: 0.6242
Title: Gods Themselves, Author: Isaac Asimov, Score: 0.6218
Title: Tao Teh King: Nature and Intelligence, Author: Lao Tzu, Score: 0.6205
Title: Here Comes the Cat!, Author: Frank Asc

In [None]:
calculate_recall_at_k(two_towers, 0, 0, 25)

Epoch 0/0, Average Recall@25: 0.2348



0.23479582032851187