In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

In [3]:
from dotenv import load_dotenv

load_dotenv("C:/Users/Ali/PycharmProjects/book-recommender/.venv/.env")

True

In [4]:
import pandas as pd
books = pd.read_csv('books_cleaned.csv')

In [5]:
books["tagged_descriptions"].to_csv("tagged_descriptions.txt",
                                    sep = "\n",
                                    index = False,
                                    header = False)

In [6]:
raw_documents = TextLoader("tagged_descriptions.txt",encoding="utf-8").load()

text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)


Created a chunk of size 1169, which is longer than the specified 0
Created a chunk of size 1215, which is longer than the specified 0
Created a chunk of size 374, which is longer than the specified 0
Created a chunk of size 310, which is longer than the specified 0
Created a chunk of size 484, which is longer than the specified 0
Created a chunk of size 483, which is longer than the specified 0
Created a chunk of size 961, which is longer than the specified 0
Created a chunk of size 189, which is longer than the specified 0
Created a chunk of size 844, which is longer than the specified 0
Created a chunk of size 297, which is longer than the specified 0
Created a chunk of size 198, which is longer than the specified 0
Created a chunk of size 882, which is longer than the specified 0
Created a chunk of size 1089, which is longer than the specified 0
Created a chunk of size 1190, which is longer than the specified 0
Created a chunk of size 305, which is longer than the specified 0
Create

In [7]:
db_books = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings())

In [8]:
query = "A book about nature"
docs = db_books.similarity_search(query,k = 10)
docs

[Document(id='e894c12a-fc0a-4399-b86f-748af26fd3ea', metadata={'source': 'tagged_descriptions.txt'}, page_content='9780140448009: Features short fiction by the French naturalist author that is introduced by an essay describing his life, works, and artistic abilities, as well as explanatory notes, a chronology, and suggestions for further reading.'),
 Document(id='269028c1-8e1f-4f14-9994-df060bd61af0', metadata={'source': 'tagged_descriptions.txt'}, page_content='9780571090242: Death of a Naturalist marks the auspicious outset of an acclaimed master. As a first book of poems, it is remarkable for its accurate perceptions and its rich linguistic gifts.'),
 Document(id='8348e648-acd7-49fd-b8bc-802b9d0e7fe1', metadata={'source': 'tagged_descriptions.txt'}, page_content='9780618711666: Presents a novel of life in modern India, chronicling the interwoven journey of an American marine biologist and a Delhi businessman who travel to the remote Sundarban islands.'),
 Document(id='ae987fcb-092d-

In [9]:
import re

def retrieve_semantic_recommendations(
        query: str,
        top_k: int = 10,
) -> pd.DataFrame:
    recs = db_books.similarity_search(query, k=50)

    books_list = []

    for i in range(len(recs)):
        # Extract the first part, strip non-numeric characters, and convert to int
        isbn_str = recs[i].page_content.strip('"').split()[0]
        isbn_numeric = re.sub(r'\D', '', isbn_str)  # Removes any non-numeric characters
        books_list.append(int(isbn_numeric))

    return books[books["isbn13"].isin(books_list)]


In [10]:
retrieve_semantic_recommendations("A book about war")

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitles,tagged_descriptions
66,9780007162994,0007162995,If I Die in a Combat Zone,Tim O'Brien,"Vietnam War, 1961-1975",http://books.google.com/books/content?id=0qUtS...,Perhaps the best book to emerge from the Vietn...,2003.0,3.95,208.0,11.0,If I Die in a Combat Zone,9780007162994: Perhaps the best book to emerge...
461,9780099273868,0099273861,The Great World,David Malouf,Australia,http://books.google.com/books/content?id=f8IiK...,"""Every city, town and village has its memorial...",1999.0,3.87,336.0,514.0,The Great World,"9780099273868: ""Every city, town and village h..."
524,9780099483472,0099483475,All Quiet on the Western Front,Erich Maria Remarque,"World War, 1914-1918",,All Quiet on the Western Front is probably the...,2005.0,3.95,216.0,1018.0,All Quiet on the Western Front,9780099483472: All Quiet on the Western Front ...
534,9780099595816,0099595818,Civilwarland in Bad Decline,George Saunders,Short stories,http://books.google.com/books/content?id=ZY7h1...,A brilliant collection of stories from the win...,1997.0,4.22,179.0,12271.0,Civilwarland in Bad Decline,9780099595816: A brilliant collection of stori...
572,9780140149241,0140149244,We Were the Rats,Lawson Glassop,"Tobruk, Battles of, 1941-1942",,Reissue of the famous novel based on the autho...,1991.0,3.23,275.0,13.0,We Were the Rats,9780140149241: Reissue of the famous novel bas...
575,9780140153194,0140153195,Justine,Lawrence Durrell,Fiction,http://books.google.com/books/content?id=zuK-P...,On the eve of World War II in the Egyptian cit...,1991.0,3.9,253.0,5183.0,Justine,9780140153194: On the eve of World War II in t...
580,9780140158762,0140158766,Chickenhawk - Back in the World,Robert Mason,Biography & Autobiography,http://books.google.com/books/content?id=R7cGA...,From the author of the bestselling autobiograp...,1994.0,4.05,400.0,166.0,Chickenhawk - Back in the World: Life After Vi...,9780140158762: From the author of the bestsell...
623,9780140283358,0140283358,Waiting for the Barbarians,J. M. Coetzee,Fiction,http://books.google.com/books/content?id=1me6T...,A magistrate in a country village protests the...,1980.0,3.93,152.0,17232.0,Waiting for the Barbarians,9780140283358: A magistrate in a country villa...
795,9780142002803,0142002801,"The Fall of Berlin, 1945",Antony Beevor,History,http://books.google.com/books/content?id=u6avD...,Chronicles the horror of Berlin's fall to the ...,2003.0,4.28,490.0,9635.0,"The Fall of Berlin, 1945",9780142002803: Chronicles the horror of Berlin...
796,9780142002889,0142002887,When the Elephants Dance,Tess Uriza Holthe,Fiction,http://books.google.com/books/content?id=XKtOP...,In the final weeks of the Japanese occupation ...,2003.0,4.03,368.0,2525.0,When the Elephants Dance: A Novel,9780142002889: In the final weeks of the Japan...
