Simple solution building the graph based on author being referenced in description.

In [36]:
import sys
import os

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

In [37]:
from src.models.modules import BookDescriptionEmbeddingSimilarity

model_path = os.path.join(PROJECT_ROOT, "data/embeddings/books_embeddings_dataset.npy")
model = BookDescriptionEmbeddingSimilarity(model_path)

In [38]:
import json
from networkx.readwrite import json_graph

def load_graph(relative_path) :
    graph_path =  os.path.join(PROJECT_ROOT, relative_path)
    with open(graph_path, "r") as f:
        graph_data = json.load(f)

    Graph = json_graph.node_link_graph(graph_data)
    return Graph

In [39]:
import networkx as nx
import pandas as pd

dataset_path =  os.path.join(PROJECT_ROOT,"data/raw_data/LEHABOOKS.csv")
dataset = pd.read_csv(dataset_path)
G = nx.Graph()

books = dataset.to_dict(orient="records")

book_lookup = {f"{book['Title']} ({book['Authors']})": book for book in books}

for book in books:
    book_id = f"{book['Title']} ({book['Authors']})"
    G.add_node(book_id, data=book)  

def find_references_author(description, books, current_book):
    references = []

    for book in books:
        author = book["Authors"]
        if pd.isna(author) or author is None:
            continue 

        book_id = f"{book['Title']} ({book['Authors']})"

        if book_id != current_book:
            if author in description:  
                references.append(book_id) 

    return references

In [40]:
def is_multi_word(title):
    return len(title.split()) > 1

def find_references_title(description, books, current_book):
    references = []

    for book in books:
        author = book["Authors"]
        title = book["Title"]
        if pd.isna(author) or author is None:
            continue 

        book_id = f"{book['Title']} ({book['Authors']})"

        if book_id != current_book and len(title) > 3 and is_multi_word(title):
            if title in description:  
                references.append(book_id) 

    return references

Save the graph in json format.

In [None]:
def save_graph(graph, relative_path):
    save_path =  os.path.join(PROJECT_ROOT, relative_path)
    graph_data = json_graph.node_link_data(graph)
    with open(save_path, "w") as f:
        json.dump(graph_data, f, indent=4)

In [42]:
def find_neighbors_title(Graph, title):
    matching_nodes = [node for node in Graph.nodes if node.startswith(f"{title} (")]
    all_neighbors = set()  
    
    for node in matching_nodes:
        neighbors = list(Graph.neighbors(node))
        all_neighbors.update(neighbors)
    
    titles = [neighbor.rsplit(" (", 1)[0] for neighbor in all_neighbors]
    return titles

In [43]:
def get_records_graph(title, n=10, bonus=1.2):
    Graph = load_graph("data/graphs/book_graph.json")
    predicted = model.recommend_by_title(title, n=68945)  
    predicted_graph = set(find_neighbors_title(Graph, title))  
    book_scores = {}  

    for book, score in predicted:
        adjusted_score = score * bonus if book in predicted_graph else score
        if book not in book_scores or adjusted_score > book_scores[book]:
            book_scores[book] = adjusted_score 

    sorted_books = sorted(book_scores.items(), key=lambda x: x[1], reverse=True)

    return sorted_books[:n]  


In [48]:
print(get_records_graph("1984", n = 10))

[('We', np.float32(1.1027404)), ('Animal Farm', np.float32(1.0987049)), ("Snowball's Chance", np.float32(1.094665)), ("Orwell's Nineteen Eighty-four", np.float32(1.0945809)), ('The Middle Stories', np.float32(1.08216)), ('Into the Forest', np.float32(1.0791802)), ('Mil novecientos ochenta y cuatro', np.float32(1.078745)), ('That Hideous Strength', np.float32(1.0694191)), ('Rebelión en la Granja', np.float32(1.0563898)), ('Mother Night', np.float32(1.0374453))]


In [45]:
def print_all_nodes(Graph):
    for node in Graph.nodes():
        if len(list(Graph.neighbors(node))) > 0: 
            print(f"{node} references: {list(Graph.neighbors(node))}")