In [46]:
import requests
import pandas as pd 
from bs4 import BeautifulSoup
from joblib import Parallel, delayed
from rake_nltk import Rake


In [74]:
def get_product_links(url):
    global base_url
    

    request = requests.get(url)
    soup = BeautifulSoup(request.text, 'html.parser')

    product_articles = soup.find_all('article', class_='product_pod')
    product_links = [base_url + link['href'] for item in product_articles for link in item.find_all('a', href=True) ]

    return product_links


In [3]:
def flatten_list(list_of_lists):
    flat_list = []
    for list in list_of_lists:
        flat_list += list
    return flat_list

In [5]:
def get_product_details(url):
    request = requests.get(url)
    soup = BeautifulSoup(request.text, 'html.parser')

    name=soup.find('h1').text.strip()
    availability=soup.find('p',class_='instock availability').text.strip()
    price=soup.find('p').text.strip('Â')
    rating = soup.find('p', class_='star-rating')['class'][1] 
    
    return {'name': name, 'availability': availability, 'price': price, 'rating': rating, 'link': url}


In [7]:

if __name__ == '__main__':

    global base_url
    base_url = 'https://books.toscrape.com/catalogue/'

    product_links = Parallel(n_jobs=18)(delayed(get_product_links)(base_url+f'page-{page}.html') for page in range(1, 51))
    product_links =  set(flatten_list(product_links))

    books = Parallel(n_jobs=32, verbose=100)(delayed(get_product_details)(url) for url in product_links)
    books = pd.DataFrame(books)


[Parallel(n_jobs=32)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=32)]: Done   1 tasks      | elapsed:   10.2s
[Parallel(n_jobs=32)]: Done   2 tasks      | elapsed:   10.8s
[Parallel(n_jobs=32)]: Done   3 tasks      | elapsed:   11.0s
[Parallel(n_jobs=32)]: Done   4 tasks      | elapsed:   11.0s
[Parallel(n_jobs=32)]: Done   5 tasks      | elapsed:   11.0s
[Parallel(n_jobs=32)]: Done   6 tasks      | elapsed:   11.0s
[Parallel(n_jobs=32)]: Done   7 tasks      | elapsed:   11.0s
[Parallel(n_jobs=32)]: Done   8 tasks      | elapsed:   11.0s
[Parallel(n_jobs=32)]: Done   9 tasks      | elapsed:   11.2s
[Parallel(n_jobs=32)]: Done  10 tasks      | elapsed:   11.6s
[Parallel(n_jobs=32)]: Done  11 tasks      | elapsed:   12.5s
[Parallel(n_jobs=32)]: Done  12 tasks      | elapsed:   12.6s
[Parallel(n_jobs=32)]: Done  13 tasks      | elapsed:   12.7s
[Parallel(n_jobs=32)]: Done  14 tasks      | elapsed:   12.8s
[Parallel(n_jobs=32)]: Done  15 tasks      | elapsed:  

In [73]:
books

Unnamed: 0,name,availability,price,rating,link
0,What's It Like in Space?: Stories from Astrona...,In stock (14 available),£19.60,Two,https://books.toscrape.com/catalogue/whats-it-...
1,Agnostic: A Spirited Manifesto,In stock (15 available),£12.51,Five,https://books.toscrape.com/catalogue/agnostic-...
2,Large Print Heart of the Pride,In stock (15 available),£19.15,Two,https://books.toscrape.com/catalogue/large-pri...
3,Team of Rivals: The Political Genius of Abraha...,In stock (7 available),£20.12,Five,https://books.toscrape.com/catalogue/team-of-r...
4,'Salem's Lot,In stock (4 available),£49.56,Four,https://books.toscrape.com/catalogue/salems-lo...
...,...,...,...,...,...
995,"Miracles from Heaven: A Little Girl, Her Journ...",In stock (3 available),£57.83,One,https://books.toscrape.com/catalogue/miracles-...
996,Something Borrowed (Darcy & Rachel #1),In stock (3 available),£48.96,Five,https://books.toscrape.com/catalogue/something...
997,The Metamorphosis,In stock (5 available),£28.58,One,https://books.toscrape.com/catalogue/the-metam...
998,"The Elegant Universe: Superstrings, Hidden Dim...",In stock (3 available),£13.03,Four,https://books.toscrape.com/catalogue/the-elega...


In [68]:
def search_books(book_name, books_df):
    matched_books = []
    r = Rake()
    r.extract_keywords_from_text(book_name)
    book_keywords = set(r.get_ranked_phrases())

    book_name_lower = book_name.lower()

    for index, row in books_df.iterrows():
        name_lower = row['name'].lower()

        if book_name_lower in name_lower:
            matched_books.append(row.to_dict())
        else:
            r.extract_keywords_from_text(row['name'])
            details_keywords = set(r.get_ranked_phrases())
            if book_keywords.intersection(details_keywords):
                matched_books.append(row.to_dict())

    return matched_books


In [75]:
search_query = input("Enter the book name to search: ")
search_results = search_books(search_query, books)
if search_results:
    print("Search Results:")
    for result in search_results:
        print(result)
else:
    print("No matching books found.")

No matching books found.
