
## **1. Data Collection**
### **1.1. Get the list of Michelin restaurants**

In [1]:
import requests
from bs4 import BeautifulSoup
import os
import pandas as pd
from IPython.display import display

In [2]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
} # user agent is used to simulate that the http request comes from a real web browser, this prevent the server from blocking requests

def guide_michelin(): # 2037
        links = []
        for i in range(1,101): #100
            link = "https://guide.michelin.com/en/it/restaurants/page/{}".format(i)
            try:
                response = requests.get(link, headers=headers)
            except Exception as e:
                print(f"{e} \n {link}")
                continue
            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')
                section = soup.find('div', class_="row restaurant__list-row js-restaurant__list_items")
                if section:  
                    for a_tag in section.find_all('a', href=True):
                        href = 'https://guide.michelin.com' + a_tag['href']
                        if href not in links and "/restaurant/" in href: 
                            links.append(href)
            else:
                print(f"Failed to retrieve page {i}")    
        return links

url_set = guide_michelin()
print(len(url_set))

1981


In [3]:
with open('links.txt', 'w') as f:
    for url in url_set:
        f.write(url + '\n')

### **1.2. Crawl Michelin restaurant pages**

In [4]:
if not os.path.exists('pages'):
    os.makedirs('pages')

with open('links.txt', 'r') as f:
    urls = f.read().splitlines()

# Create directories and save HTML documents
for index, url in enumerate(urls):
    page_number = index // 20 + 1
    directory = os.path.join('pages', f'page_{page_number}')
    if not os.path.exists(directory):
        os.makedirs(directory)
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            file_path = os.path.join(directory, f'document_{index}.html')
            with open(file_path, 'w', encoding='utf-8') as file:
                file.write(response.text)
        else:
            print(f"Failed to retrieve {url}")
    except Exception as e:
        print(f"Error fetching {url}: {e}")

print("HTML documents saved successfully.")

HTML documents saved successfully.


In [5]:
dir_paths = [os.path.join('pages', dir) for dir in os.listdir('pages')]
len(dir_paths)

100

### **1.3. Parse downloaded pages**

In [6]:
# Function to extract restaurant details from HTML content
def extract_restaurant_details(content):
    
    # Extract the restaurant name
    name = content.find('h1', class_='data-sheet__title').get_text(strip=True) if content.find('h1', class_='data-sheet__title') else ""
    
    # Extract the first row of basic information
    firstRow = content.find_all("div", class_="data-sheet__block--text")[0].get_text(strip=True)
    #firstRow = content.find("div", class_="data-sheet__block--text").get_text(strip=True)
    firstRow_list = [info.strip() for info in firstRow.split(",")]

    address = " ".join(firstRow_list[:-3]) if len(firstRow_list) > 3 else ""
    city = firstRow_list[-3] if len(firstRow_list) > 2 else ""
    postalCode = firstRow_list[-2] if len(firstRow_list) > 1 else ""
    country = firstRow_list[-1] if firstRow_list else ""

    # Extract the second row of basic information
    secondRow = content.find_all("div", class_="data-sheet__block--text")[1].get_text(strip=True)
    #secondRow = content.find("div", class_="data-sheet__block--text").get_text(strip=True)
    secondRow_list = [info.strip() for info in secondRow.split("·")]

    priceRange = secondRow_list[0] if secondRow_list else ""
    cuisineType = secondRow_list[1] if len(secondRow_list) > 1 else ""

    # Extract the description
    description = content.find("div", class_="data-sheet__description").get_text(strip=True) if content.find("div", class_="data-sheet__description") else ""

    # Extract facilities and services
    facilitiesServices_div = content.find_all("div", class_="col col-12 col-lg-6")
    # facilitiesServices_div = content.find("div", class_="col col-12 col-lg-6")
    facilitiesServices = [li.get_text(strip=True) for li in facilitiesServices_div[0].find_all("li")] if facilitiesServices_div else []
    # facilitiesServices = [li.get_text(strip=True) for li in facilitiesServices_div.find("li")] if facilitiesServices_div else []

    # Extract credit card information
    creditCards_div = content.find("div", class_="restaurant-details__services--info")
    creditCards = [os.path.basename(img["data-src"]).split("-")[0] for img in creditCards_div.find_all("img")] if creditCards_div else []

    # Extract phone number
    phoneNumber = content.find("span", attrs={"x-ms-format-detection": "none"}).get_text(strip=True) if content.find("span", attrs={"x-ms-format-detection": "none"}) else ""

    # Extract website
    website_div = content.find("div", class_="collapse__block-item link-item")
    website = website_div.find("a", class_="link js-dtm-link")["href"] if website_div and website_div.find("a", class_="link js-dtm-link") else ""

    # Return the extracted data as a dictionary
    return {
        "restaurantName": name,
        "address": address,
        "city": city,
        "postalCode": postalCode,
        "country": country,
        "priceRange": priceRange,
        "cuisineType": cuisineType,
        "description": description,
        "facilitiesServices": facilitiesServices,
        "creditCards": creditCards,
        "phoneNumber": phoneNumber,
        "website": website
    }

# Collecting data from all HTML files
#folder_paths = [d for d in os.listdir('pages') if os.path.isdir(d) and d.startswith("page_")]
dir_paths = [os.path.join('pages', dir) for dir in os.listdir('pages')]

data = []
for dir in dir_paths:
    for html_file in os.listdir(dir):
        if html_file.endswith(".html"):
            with open(os.path.join(dir, html_file), "r", encoding="utf-8") as file:
                soup = BeautifulSoup(file, "html.parser")
                restaurant_details = extract_restaurant_details(soup)
                data.append(restaurant_details)

# Create a DataFrame from the data list
df = pd.DataFrame(data)

df.columns = ["restaurantName", "address", "city", "postalCode", "country", "priceRange", "cuisineType", "description", "facilitiesServices", "creditCards", "phoneNumber", "website"]


In [7]:
# Display the DataFrame
display(df)

Unnamed: 0,restaurantName,address,city,postalCode,country,priceRange,cuisineType,description,facilitiesServices,creditCards,phoneNumber,website
0,O Me O Il Mare,Via Roma 45/47,Gragnano,80054,Italy,€€€€,"Italian Contemporary, Modern Cuisine","Known around the world as the town of pasta, G...","[Air conditioning, Interesting wine list, Whee...","[amex, dinersclub, mastercard, visa]",+39 081 620 0550,http://omeoilmare.com
1,Donevandro,via Garibaldi 2,Popoli,65026,Italy,€€,"Contemporary, Seasonal Cuisine","Up until a few years ago, the owner-chef at th...",[Air conditioning],"[mastercard, visa]",+39 388 887 6858,http://www.donevandroristorante.it
2,Dama,Via Mulino località San Gaudenzio,Cervesina,27050,Italy,€€€,Modern Cuisine,Situated just a few kilometres from Voghera in...,"[Garden or park, Wheelchair access]","[amex, mastercard, visa]",+39 0383 3331,https://www.hcsg.it/ristorante/
3,Il Ristorante Alain Ducasse Napoli,Via Cristoforo Colombo 45,Naples,80133,Italy,€€€€,"Creative, Mediterranean Cuisine","Alain Ducasse, one of the great names in conte...","[Air conditioning, Great view, Interesting win...","[amex, dinersclub, discover, maestrocard, mast...",+39 081 604 1580,https://theromeocollection.com/en/romeo-napoli...
4,Palazzo Utini,via Antonio Gramsci 6,Noceto,43015,Italy,€€€€,"Creative, Contemporary",This historic building in the town centre has ...,"[Air conditioning, Interesting wine list, Whee...","[amex, dinersclub, mastercard, visa]",+39 0521 152 1001,https://palazzoutini.com
...,...,...,...,...,...,...,...,...,...,...,...,...
1978,Rendenèr Alpine Food,Via Sorano 35,Pinzolo,38086,Italy,€€,Contemporary,"Situated within the Lory hotel, this restauran...",[Car park],"[amex, jcb, maestrocard, mastercard, visa]",+39 0465 502008,https://rendeneralpinefood.it/
1979,Umami,Via Ugo Secondo Partigiano 1,Badalucco,18010,Italy,€€,Modern Cuisine,A young chef with experience in renowned resta...,"[Terrace, Wheelchair access]","[amex, mastercard, visa]",+39 331 338 6005,https://www.umamirestaurant.it/
1980,Visione Restaurant and Living,Strada Nicolini Basso 34 loc. Tre Stelle,Barbaresco,12050,Italy,€€€,"Contemporary, Piedmontese","At this restaurant, new, young and enthusiasti...","[Air conditioning, Car park]","[amex, maestrocard, mastercard, visa]",+39 328 134 0218,https://www.ristorantevisione.it
1981,Ristorante de LEN,Via Cesare Battisti 66,Cortina d'Ampezzo,32043,Italy,€€,Regional Cuisine,Just a stone’s throw from the central and very...,[Wheelchair access],"[amex, dinersclub, mastercard, visa]",+39 0436 4246,https://hoteldelen.it


In [14]:
pip install nltk

Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting click (from nltk)
  Downloading click-8.1.7-py3-none-any.whl.metadata (3.0 kB)
Collecting joblib (from nltk)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting regex>=2021.8.3 (from nltk)
  Downloading regex-2024.11.6-cp313-cp313-win_amd64.whl.metadata (41 kB)
Collecting tqdm (from nltk)
  Downloading tqdm-4.67.0-py3-none-any.whl.metadata (57 kB)
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ------ --------------------------------- 0.3/1.5 MB ? eta -:--:--
   ---------------------------------- ----- 1.3/1.5 MB 2.9 MB/s eta 0:00:01
   ---------------------------------------- 1.5/1.5 MB 2.8 MB/s eta 0:00:00
Downloading regex-2024.11.6-cp313-cp313-win_amd64.whl (273 kB)
Downloading click-8.1.7-py3-none-any.whl (97 kB)
Downloading joblib-1.4.2-py3-none-any.whl (301 kB)
Downloading tqdm-4.67.0-py3-none-any.whl


[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


# 2  Search Engine

### 2.0 Preprocessing the Text

In [15]:

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import string


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


In [16]:
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

def preprocess_text(text):
    # Lowercase the text
    text = text.lower()
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    # Tokenize and remove stopwords, then apply stemming
    tokens = [stemmer.stem(word) for word in text.split() if word not in stop_words]
    return ' '.join(tokens)

# Apply to the description field
df['processed_description'] = df['description'].apply(preprocess_text)


### 2.1 Conjunctive Query

### 2.1.1 Create the Index!

In [17]:
from collections import defaultdict
import pandas as pd

vocabulary = {}
inverted_index = defaultdict(list)
term_id_counter = 0

for doc_id, description in enumerate(df['processed_description']):
    for word in description.split():
        # Map each unique word to a term_id
        if word not in vocabulary:
            vocabulary[word] = term_id_counter
            term_id_counter += 1
        term_id = vocabulary[word]
        inverted_index[term_id].append(doc_id)

# Save the vocabulary to a CSV file
pd.DataFrame(list(vocabulary.items()), columns=['term', 'term_id']).to_csv('vocabulary.csv', index=False)


In [18]:
import json

with open('inverted_index.json', 'w') as f:
    json.dump(inverted_index, f)


### 2.1.2 Execute the Query

In [None]:
def preprocess_query(query):
    query = query.lower()
    query = query.translate(str.maketrans('', '', string.punctuation))
    tokens = [stemmer.stem(word) for word in query.split() if word not in stop_words]
    return tokens

def conjunctive_query(query):
    query_terms = preprocess_query(query)
    term_ids = [vocabulary.get(term) for term in query_terms if term in vocabulary]

    if not term_ids:
        return pd.DataFrame(columns=["restaurantName", "address", "description", "website"])

    # Start with the document list for the first term, then intersect with others
    matching_docs = set(inverted_index[term_ids[0]])
    for term_id in term_ids[1:]:
        matching_docs &= set(inverted_index[term_id])

    results = df.loc[list(matching_docs), ["restaurantName", "address", "description", "website"]]
    return results



In [24]:
pip install scikit-learn


Collecting scikit-learnNote: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip



  Downloading scikit_learn-1.5.2-cp313-cp313-win_amd64.whl.metadata (13 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.14.1-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.5.2-cp313-cp313-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   -- ------------------------------------- 0.8/11.0 MB 3.2 MB/s eta 0:00:04
   ----- ---------------------------------- 1.6/11.0 MB 3.4 MB/s eta 0:00:03
   --------- ------------------------------ 2.6/11.0 MB 4.0 MB/s eta 0:00:03
   ----------- ---------------------------- 3.1/11.0 MB 3.7 MB/s eta 0:00:03
   --------------- ------------------------ 4.2/11.0 MB 3.9 MB/s eta 0:00:02
   ------------------- -------------------- 5.2/11.0 MB 4.0 MB/s eta 0:00:02
   -----------------

In [25]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['processed_description'])


In [26]:
tfidf_index = defaultdict(list)
feature_names = tfidf_vectorizer.get_feature_names_out()

# Loop over each term (feature) in the TF-IDF matrix
for term_id, term in enumerate(feature_names):
    # Get non-zero document indices and the corresponding scores for this term
    doc_indices = tfidf_matrix[:, term_id].nonzero()[0]
    scores = tfidf_matrix[:, term_id].data
    
    # Append each document ID and score to the tfidf_index dictionary 
    for doc_id, score in zip(doc_indices, scores):
        tfidf_index[term].append((doc_id, score))



### 2.2 Ranked Search Engine with TF-IDF and Cosine Similarity

In [27]:
from sklearn.metrics.pairwise import cosine_similarity

def ranked_query(query, top_k=5):
    query_vec = tfidf_vectorizer.transform([preprocess_text(query)])
    cosine_similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()
    top_doc_indices = cosine_similarities.argsort()[-top_k:][::-1]

    results = df.loc[top_doc_indices, ['restaurantName', 'address', 'description', 'website']]
    results['similarity_score'] = cosine_similarities[top_doc_indices]
    return results



### Testing

In [28]:
# Test the conjunctive query
query = "modern seasonal cusine"
conjunctive_results = conjunctive_query(query)
print(conjunctive_results)
display(conjunctive_results)
# Test the ranked query
ranked_results = ranked_query(query, top_k=5)
print(ranked_results)
display(ranked_results)


                            restaurantName  \
1792                              Radimare   
519                               La Valle   
520                              Esplanade   
9                                    20Tre   
136                             Il Galeone   
...                                    ...   
887                            San Giorgio   
1272  La Musa Restaurant & Rooftop Terrace   
1146                                 Razzo   
1661                       Osteria Taviani   
1918                               Il Tino   

                                      address  \
1792       via Beato Piergiorgio Frassati 5/a   
519   via Umberto I 25 località Valle Sauglio   
520                               via Lario 3   
9                    via David Chiossone 20 r   
136                       piazzale Amendola 2   
...                                       ...   
887                 viale Brigate Bisagno 69r   
1272                         Località Cini 29   
1146  

Unnamed: 0,restaurantName,address,description,website
1792,Radimare,via Beato Piergiorgio Frassati 5/a,There’s no tasting menu at this restaurant but...,http://www.radimare.com
519,La Valle,via Umberto I 25 località Valle Sauglio,A well - run restaurant in a quiet area just o...,https://www.ristorantelavalle.it/
520,Esplanade,via Lario 3,"One of Italy’s long-established restaurants, t...",https://www.ristorante-esplanade.com/
9,20Tre,via David Chiossone 20 r,"Run by three partners, this contemporary-style...",https://www.ristorante20tregenova.it/
136,Il Galeone,piazzale Amendola 2,Housed on the ground floor of the Elisabeth Du...,https://www.ilgaleone.net/
...,...,...,...,...
887,San Giorgio,viale Brigate Bisagno 69r,Situated in the city albeit not right in the c...,https://www.ristorantesangiorgiogenova.it/
1272,La Musa Restaurant & Rooftop Terrace,Località Cini 29,Just six intimate tables in a modern restauran...,https://ristorante-la-musa.com
1146,Razzo,via Andrea Doria 17/f,"A quiet restaurant with a relaxed, young and m...",https://vadoarazzo.it/
1661,Osteria Taviani,piazza Vittorio Emanuele II 28,"This pleasant, warmly decorated restaurant is ...",


     restaurantName                                  address  \
1654       La Botte                 via Giuseppe Garibaldi 8   
172            Saur                     via Filippo Turati 8   
1146          Razzo                    via Andrea Doria 17/f   
997    Piccolo Lord              corso San Maurizio 69 bis/g   
519        La Valle  via Umberto I 25 località Valle Sauglio   

                                            description  \
1654  A modern and welcoming contemporary bistro sit...   
172   In a tiny rural village, this contemporary, al...   
1146  A quiet restaurant with a relaxed, young and m...   
997   Professional service in a welcoming, modern re...   
519   A well - run restaurant in a quiet area just o...   

                                    website  similarity_score  
1654   http://www.trattorialabottestresa.it          0.281255  
172               https://ristorantesaur.it          0.279414  
1146                 https://vadoarazzo.it/          0.252421  
997 

Unnamed: 0,restaurantName,address,description,website,similarity_score
1654,La Botte,via Giuseppe Garibaldi 8,A modern and welcoming contemporary bistro sit...,http://www.trattorialabottestresa.it,0.281255
172,Saur,via Filippo Turati 8,"In a tiny rural village, this contemporary, al...",https://ristorantesaur.it,0.279414
1146,Razzo,via Andrea Doria 17/f,"A quiet restaurant with a relaxed, young and m...",https://vadoarazzo.it/,0.252421
997,Piccolo Lord,corso San Maurizio 69 bis/g,"Professional service in a welcoming, modern re...",https://www.ristorantepiccololord.it/,0.231283
519,La Valle,via Umberto I 25 località Valle Sauglio,A well - run restaurant in a quiet area just o...,https://www.ristorantelavalle.it/,0.219998


# 3. Define a New Score!


In [29]:
import heapq
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd


In [None]:
''' def calculate_custom_score(doc, query_tfidf, doc_tfidf, cuisine_preferences, service_preferences, price_preferences):
    score = 0

    # Description score using TF-IDF similarity
    description_score = cosine_similarity(query_tfidf, doc_tfidf)[0, 0]
    score += description_score * 0.5  # Higher weight for description

    # Score for cuisine type match
    if any(cuisine.lower() in doc['cuisineType'].lower() for cuisine in cuisine_preferences):
        score += 0.2

    # Score for service match
    matching_services = sum(1 for service in service_preferences if service.lower() in [s.lower() for s in doc['facilitiesServices']])
    score += matching_services * 0.1  # Adds 0.1 for each matching service

    # Score for price range
    if doc['priceRange'] in price_preferences:
        score += 0.2  # Additional weight for preferred price range

    return score



In [None]:
'''def search_rank_restaurants(query, tfidf_matrix, vectorizer, df, top_k=5, cuisine_preferences=None, service_preferences=None, price_preferences=None):
    # Transform the query into a TF-IDF vector
    query_tfidf = vectorizer.transform([query])
    heap = []

    for doc_id, doc in df.iterrows():
        # Get the TF-IDF vector of the current document
        doc_tfidf = tfidf_matrix[doc_id]

        # Calculate the custom score for the current document
        score = calculate_custom_score(
            doc,
            query_tfidf,
            doc_tfidf,
            cuisine_preferences or [],
            service_preferences or [],
            price_preferences or []
        )

        # Keep only the top-k results in the heap
        if len(heap) < top_k:
            heapq.heappush(heap, (score, doc_id))
        else:
            heapq.heappushpop(heap, (score, doc_id))

    # Sort results in descending order of score
    ranked_results = sorted(heap, key=lambda x: x[0], reverse=True)

    # Collect the final data for each restaurant
    results = []
    for score, doc_id in ranked_results:
        row = df.iloc[doc_id]
        results.append({
            "restaurantName": row["restaurantName"],
            "address": row["address"],
            "description": row["description"],
            "website": row["website"],
            "custom_score": round(score, 3)
        })

    results_df = pd.DataFrame(results)
    return results_df


In [36]:
from sklearn.metrics.pairwise import cosine_similarity

def calculate_cosine_similarity(query):
    # Converti la query in vettore TF-IDF
    query_tfidf = tfidf_vectorizer.transform([query])
    
    # Calcola la similarità coseno tra la query e tutte le descrizioni
    cosine_similarities = cosine_similarity(query_tfidf, tfidf_matrix).flatten()
    
    return cosine_similarities


In [40]:
import heapq

def custom_scoring(query, k=10):
    # Ottieni le similarità di coseno tra la query e le descrizioni
    cosine_similarities = query_similarity(query)
    
    # Heap per mantenere i top-k risultati
    top_k_restaurants = []
    
    # Itera su tutti i documenti e calcola il punteggio personalizzato
    for doc_id, cosine_score in enumerate(cosine_similarities):
        # Descrizione (similarità di coseno tra query e descrizione)
        description_score = cosine_score
        
        # Incremento per corrispondenza nel `cuisineType`
        cuisine_score = 1 if 'cuisineType' in df.columns and query in df.loc[doc_id, 'cuisineType'] else 0
        
        # Incremento per corrispondenza nei `facilitiesServices`
        facilities = df.loc[doc_id, 'facilitiesServices'] if 'facilitiesServices' in df.columns else ""
        facilities_score = sum(1 for facility in ["Terrace", "Air conditioning"] if facility in facilities)
        
        # Punteggio per `priceRange` con livelli
        if doc['priceRange'] in price_preferences:
            score += 0.2
        
        # Calcolo del punteggio finale
        final_score = (0.5 * description_score) + (0.2 * cuisine_score) + (0.2 * facilities_score) + (0.1 * price_score)
        
        # Aggiungi al heap solo se il punteggio è maggiore del minimo attuale
        if len(top_k_restaurants) < k:
            heapq.heappush(top_k_restaurants, (final_score, doc_id))
        else:
            heapq.heappushpop(top_k_restaurants, (final_score, doc_id))
    
    # Ordina i risultati in ordine decrescente di punteggio
    top_k_restaurants = sorted(top_k_restaurants, key=lambda x: x[0], reverse=True)
    
    # Prepara l'output
    results = []
    for score, doc_id in top_k_restaurants:
        results.append({
            "restaurantName": df.loc[doc_id, "restaurantName"],
            "address": df.loc[doc_id, "address"],
            "description": df.loc[doc_id, "description"],
            "website": df.loc[doc_id, "website"],
            "custom_score": score
        })
    
    return pd.DataFrame(results)



In [41]:
# Example usage
query = "seafood"
cuisine_preferences = ["Italian"]
service_preferences = ["Terrace", "Air conditioning"]
price_preferences = ["$", "$$"]
top_k = 5

# Call the function and display the results
results_df = search_rank_restaurants(query, tfidf_matrix, tfidf_vectorizer, df, top_k, cuisine_preferences, service_preferences, price_preferences)
display(results_df)


Unnamed: 0,restaurantName,address,description,website,custom_score
0,Il Merlo,via Bernardini 660,"Situated right on the beach, Il Merlo boasts a...",https://www.ilmerlocamaiore.it/,0.535
1,Attico sul Mare,piazza Kursaal 6,Situated on the second and top floor (don’t wo...,http://www.atticosulmare.it,0.483
2,Extra,viale Turigliano 13,Situated slightly inland behind Marina di Carr...,https://www.extracarrara.it/,0.463
3,Impronta,via Angarano 7,"Located right next to the Ponte Vecchio, the s...",https://improntaristorante.it,0.456
4,Almondo Trattoria,piazza Gran Madre di Dio 2/l,This simple yet charming restaurant located ju...,http://www.almondotrattoria.it,0.453
