In [1]:
import subprocess
import sys

# List of required packages
required_packages = ['requests', 'beautifulsoup4', 'pandas']

# Function to install packages if they are not already installed
def install_package(package):
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# Install any missing packages
for package in required_packages:
    install_package(package)

In [2]:
import requests
from bs4 import BeautifulSoup
import time
import os
import glob
import pandas as pd


In [3]:
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1.1; SM-G928X Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83 Mobile Safari/537.36'}
start_url = "https://guide.michelin.com/en/it/restaurants"
base_url = "https://guide.michelin.com"
next_page = start_url
link_list = []
while next_page:
    # Request page content
    response = requests.get(next_page,verify=False, headers=headers)
    soup = BeautifulSoup(response.content, features="lxml")
    # Find all restaurant links on the current page
    for link in soup.select("a.link"):
        href = link.get("href")
        if href and "/restaurant/" in href:
            link_list.append(base_url + href)
    # Look for the 'Next' button to proceed to the next page
    next_button = soup.find_all("a", class_="btn btn-outline-secondary btn-sm btn-carousel__link", href=True)
    if next_button:
        for content in next_button:
            if content.find("span", class_="icon fal fa-angle-right"):
                next_page = base_url+content["href"]
                break
            else:
                next_page = None

    else:
        next_page = None


# Display the collected links
print(f"Found {len(link_list)} restaurants:")
# Save to a text file
with open("urls.txt", "w") as file:
    for url in link_list:
        file.write(f"{url}\n")




Found 1983 restaurants:


In [4]:
# it is gonna take more 10 minutes
for index, link in enumerate(link_list):
    cnt = requests.get(link, verify=False, headers=headers)
    if cnt.status_code==200:
        html = BeautifulSoup(cnt.content, features="lxml")
        # Define the name of the subfolder and the filename
        subfolder = f"HTML/Page {str((index+20)//20)}"
        filename = f"{(link[link.rfind('/') + 1:]).replace('-', ' ')}.html"
        file_path = os.path.join(subfolder, filename)

        # Check if the subfolder exists, create it if it doesn't
        if not os.path.exists(subfolder):
            os.makedirs(subfolder)
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(html.prettify())
    else:
        print("Request denied!")
        break



In [5]:

# Initialize an empty list to store the rows for DataFrame
data = []
# Define the base directory
base_directory = "HTML"
# Use glob to find all directories matching "Page*"
page_folders = glob.glob(os.path.join(base_directory, "Page *"))

# Loop through each Page* directory
for page_folder in page_folders:
    # Get all HTML files in the current Page* directory
    html_files = glob.glob(os.path.join(page_folder, "*.html"))

    # Read each HTML file
    for html_file in html_files:
        with open(html_file, "r", encoding='utf-8') as file:  # Ensure correct encoding
            content = BeautifulSoup(file.read(), "html.parser")
            # Extract the required information
            restaurantName = content.find("h1",class_="data-sheet__title").get_text().strip() if content.find("h1",class_="data-sheet__title") else ""

            basic_info_first_row_list=content.findAll("div",class_="data-sheet__block--text")[0].text
            basic_info_first_row_striped_list = [info.strip() for info in basic_info_first_row_list.split(",")]
            address = " ".join(basic_info_first_row_striped_list[:-3]) if basic_info_first_row_striped_list[:-3] else ""
            city = basic_info_first_row_striped_list[-3] if basic_info_first_row_striped_list[-3] else ""
            postal_code = basic_info_first_row_striped_list[-2]  if basic_info_first_row_striped_list[-2] else ""
            country = basic_info_first_row_striped_list[-1]  if basic_info_first_row_striped_list[-1] else ""


            basic_info_second_row_list=content.findAll("div",class_="data-sheet__block--text")[1].text
            basic_info_second_row_striped_list = [info.strip() for info in basic_info_second_row_list.split("·")]

            priceRange = basic_info_second_row_striped_list[0] if basic_info_second_row_striped_list[0] else ""
            cuisineType = basic_info_second_row_striped_list[1]  if basic_info_second_row_striped_list[1] else ""

            description = content.find("div",class_="data-sheet__description").get_text().strip() if content.find("div",class_="data-sheet__description") else ""

            facilitiesServices_div = content.findAll("div", class_="col col-12 col-lg-6")
            facilitiesServices = [li.get_text(strip=True) for li in facilitiesServices_div[0].find_all("li")] if facilitiesServices_div[0] else ""

            div_creditCard = content.find("div", class_="restaurant-details__services--info")

            creditCards = [os.path.basename(img['data-src']).split('-')[0] for img in div_creditCard.find_all("img")] if div_creditCard else ""


            phoneNumber = content.find("span", attrs={"x-ms-format-detection": "none"}).get_text().strip() if content.find("span", attrs={"x-ms-format-detection": "none"}) else ""


            div_website = content.find("div", class_="collapse__block-item link-item")

            # Find the <a> tag within this container and get the href attribute
            a_website = div_website.find("a", class_="link js-dtm-link") if div_website else ""
            website = a_website.get("href") if a_website!="" else ""


            # Append the extracted info as a new row to the list
            data.append([restaurantName,address,city,postal_code,country,priceRange,cuisineType,description,facilitiesServices,creditCards,phoneNumber,website])


# Create a DataFrame from the data list
df = pd.DataFrame(data, columns=["restaurantName","Address","City","Postal Code","Country","Price Range","Cuisine Type","Description","facilitiesServices","creditCards","phoneNumber","website"])

display(df)



# Iterate through each row in the DataFrame
for i, row in df.iterrows():
    # Define the file name using the index
    file_name = f"restaurant_{i}.tsv"

    # Prepare row data as a single line with tab-separated values
    content =  f"{row['restaurantName']}\t{row['Address']}\t{row['City']}\t{row['Postal Code']}\t{row['Country']}\t{row['Price Range']}\t{row['Cuisine Type']}\t{row['Description']}\t{row['facilitiesServices']}\t{row['creditCards']}\t{row['phoneNumber']}\t{row['website']}\n"

    subfolder = f"tsv_files"
    file_path = os.path.join(subfolder, file_name)

    # Check if the subfolder exists, create it if it doesn't
    if not os.path.exists(subfolder):
        os.makedirs(subfolder)
    # Write the row data to the .tsv file
    with open(file_path, "w", encoding="utf-8") as file:
        file.write(content)

    print(f"Created file: {file_name}")

Unnamed: 0,restaurantName,Address,City,Postal Code,Country,Price Range,Cuisine Type,Description,facilitiesServices,creditCards,phoneNumber,website
0,Baccanti,via Sant'Angelo 58/61,Matera,75100,Italy,€€,Modern Cuisine,Facing Matera’s picturesque cave churches in o...,"[Interesting wine list, Terrace]","[amex, dinersclub, mastercard, visa]",+39 0835 333704,https://www.baccantiristorante.com/
1,La Gritta,Vicolo del Faro,Palau,07020,Italy,€€€,"Seafood, Mediterranean Cuisine",Two of your senses will experience powerful ma...,"[Car park, Garden or park, Great view, Restaur...","[amex, maestrocard, mastercard, visa]",+39 0789 708045,https://www.ristorantelagritta.it/
2,Osteria della Villetta,via Marconi 104,Palazzolo sull'Oglio,25036,Italy,€,"Lombardian, Traditional Cuisine",A bulwark of Brescian cuisine for over a centu...,[Terrace],"[amex, mastercard, visa]",+39 030 740 1899,https://www.osteriadellavilletta.it/
3,Le Miniere,piazza Martiri 1944 4,Traversella,10080,Italy,€,"Piedmontese, Traditional Cuisine",This restaurant stands on the main square of a...,"[Great view, Terrace, Wheelchair access]","[amex, dinersclub, mastercard, visa]",+39 0125 794006,https://www.albergominiere.com/
4,Lipadusa,via Bonfiglio 16,Lampedusa,92010,Italy,€€,Mediterranean Cuisine,"In the centre of the village, a classic very s...","[Air conditioning, Terrace]","[dinersclub, mastercard, visa]",+39 0922 970267,
...,...,...,...,...,...,...,...,...,...,...,...,...
1978,Antica Osteria Il Monte Rosso,via Troubetzkoy 128,Suna,28925,Italy,€€,"Country cooking, Classic Cuisine",Situated on the lakeside in the residential di...,"[Air conditioning, Interesting wine list, Terr...","[amex, mastercard, visa]",+39 0323 506056,http://www.osteriamonterosso.com
1979,Onda Blu,via Orsa Minore 1,San Mauro a Mare,47030,Italy,€€€,Seafood,Almost appearing to rise up directly out of th...,"[Air conditioning, Interesting wine list, Terr...","[amex, jcb, maestrocard, mastercard, visa]",+39 0541 344886,https://www.ristoranteondablu.com/
1980,San Martino,piazza Cappelletto 1 località Rio San Martino,Scorzè,30037,Italy,€€€,Modern Cuisine,"A smart, elegant restaurant with a bright, min...","[Air conditioning, Interesting wine list]","[mastercard, visa]",+39 041 584 0648,https://www.ristorantesanmartino.info/
1981,Agli Amici,via Liguria 252,Godia,33100,Italy,€€€€,"Country cooking, Modern Cuisine","Serving signature cuisine, Agli Amici is one o...","[Air conditioning, Interesting wine list, Rest...","[amex, mastercard, visa]",+39 0432 565411,https://www.agliamici.it/


Created file: restaurant_0.tsv
Created file: restaurant_1.tsv
Created file: restaurant_2.tsv
Created file: restaurant_3.tsv
Created file: restaurant_4.tsv
Created file: restaurant_5.tsv
Created file: restaurant_6.tsv
Created file: restaurant_7.tsv
Created file: restaurant_8.tsv
Created file: restaurant_9.tsv
Created file: restaurant_10.tsv
Created file: restaurant_11.tsv
Created file: restaurant_12.tsv
Created file: restaurant_13.tsv
Created file: restaurant_14.tsv
Created file: restaurant_15.tsv
Created file: restaurant_16.tsv
Created file: restaurant_17.tsv
Created file: restaurant_18.tsv
Created file: restaurant_19.tsv
Created file: restaurant_20.tsv
Created file: restaurant_21.tsv
Created file: restaurant_22.tsv
Created file: restaurant_23.tsv
Created file: restaurant_24.tsv
Created file: restaurant_25.tsv
Created file: restaurant_26.tsv
Created file: restaurant_27.tsv
Created file: restaurant_28.tsv
Created file: restaurant_29.tsv
Created file: restaurant_30.tsv
Created file: rest

## ***2.0 Pre-processing:***

In [6]:
!pip install nltk

import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('words')
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import nltk





[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


In [7]:
from nltk.corpus import words

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
english_words = set(words.words())  # İngilizce kelime listesi

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    words = word_tokenize(text)
    words = [
        lemmatizer.lemmatize(word)
        for word in words
        if word not in stop_words and len(word) > 2 and word in english_words
    ]
    return words

In [8]:
df['Description'] = df['Description'].str.lower()
df['processed_description'] = df['Description'].apply(preprocess_text)
df['processed_description']

Unnamed: 0,processed_description
0,"[facing, picturesque, cave, one, charming, old..."
1,"[two, experience, powerful, marine, place, sig..."
2,"[bulwark, cuisine, century, restaurant, art, s..."
3,"[restaurant, main, square, delightful, village..."
4,"[village, classic, simple, atmosphere, family,..."
...,...
1978,"[situated, lakeside, residential, district, sm..."
1979,"[almost, rise, directly, sand, unexpectedly, e..."
1980,"[smart, elegant, restaurant, bright, large, ta..."
1981,"[serving, signature, cuisine, one, leading, bo..."


In [9]:
df.head()

Unnamed: 0,restaurantName,Address,City,Postal Code,Country,Price Range,Cuisine Type,Description,facilitiesServices,creditCards,phoneNumber,website,processed_description
0,Baccanti,via Sant'Angelo 58/61,Matera,75100,Italy,€€,Modern Cuisine,facing matera’s picturesque cave churches in o...,"[Interesting wine list, Terrace]","[amex, dinersclub, mastercard, visa]",+39 0835 333704,https://www.baccantiristorante.com/,"[facing, picturesque, cave, one, charming, old..."
1,La Gritta,Vicolo del Faro,Palau,7020,Italy,€€€,"Seafood, Mediterranean Cuisine",two of your senses will experience powerful ma...,"[Car park, Garden or park, Great view, Restaur...","[amex, maestrocard, mastercard, visa]",+39 0789 708045,https://www.ristorantelagritta.it/,"[two, experience, powerful, marine, place, sig..."
2,Osteria della Villetta,via Marconi 104,Palazzolo sull'Oglio,25036,Italy,€,"Lombardian, Traditional Cuisine",a bulwark of brescian cuisine for over a centu...,[Terrace],"[amex, mastercard, visa]",+39 030 740 1899,https://www.osteriadellavilletta.it/,"[bulwark, cuisine, century, restaurant, art, s..."
3,Le Miniere,piazza Martiri 1944 4,Traversella,10080,Italy,€,"Piedmontese, Traditional Cuisine",this restaurant stands on the main square of a...,"[Great view, Terrace, Wheelchair access]","[amex, dinersclub, mastercard, visa]",+39 0125 794006,https://www.albergominiere.com/,"[restaurant, main, square, delightful, village..."
4,Lipadusa,via Bonfiglio 16,Lampedusa,92010,Italy,€€,Mediterranean Cuisine,"in the centre of the village, a classic very s...","[Air conditioning, Terrace]","[dinersclub, mastercard, visa]",+39 0922 970267,,"[village, classic, simple, atmosphere, family,..."


## ***2.1 Conjuctive Query:***

In [10]:
import pandas as pd

vocab_dict = {}
term_id = 0

all_uniqe_words = set(word for description in df["processed_description"] for word in description)

for word in all_uniqe_words:
    vocab_dict[word] = term_id
    term_id+=1

vocab_dict

{'cardoon': 0,
 'skewer': 1,
 'decisive': 2,
 'apply': 3,
 'freely': 4,
 'strange': 5,
 'viewpoint': 6,
 'accompaniment': 7,
 'costume': 8,
 'boundary': 9,
 'kindergarten': 10,
 'pear': 11,
 'robustly': 12,
 'big': 13,
 'lobster': 14,
 'sample': 15,
 'explanation': 16,
 'alternatively': 17,
 'towards': 18,
 'assured': 19,
 'nativity': 20,
 'faith': 21,
 'sum': 22,
 'limit': 23,
 'previously': 24,
 'church': 25,
 'busy': 26,
 'abundant': 27,
 'music': 28,
 'lucky': 29,
 'sitting': 30,
 'imaginative': 31,
 'chic': 32,
 'tone': 33,
 'franco': 34,
 'marc': 35,
 'girl': 36,
 'spruce': 37,
 'lovely': 38,
 'classy': 39,
 'travelled': 40,
 'promise': 41,
 'concentrate': 42,
 'locanda': 43,
 'exacting': 44,
 'overnight': 45,
 'formal': 46,
 'stove': 47,
 'pool': 48,
 'doubt': 49,
 'center': 50,
 'outdoors': 51,
 'tooth': 52,
 'shaving': 53,
 'comprise': 54,
 'chip': 55,
 'peach': 56,
 'laundry': 57,
 'mezzanine': 58,
 'pork': 59,
 'tombola': 60,
 'ladle': 61,
 'oversee': 62,
 'delight': 63,
 'u

In [11]:
vocab_df = pd.DataFrame(list(vocab_dict.items()), columns=['term', 'term_id'])
vocab_df.to_csv('vocabulary.csv', index=False)

vocab_df.head()

Unnamed: 0,term,term_id
0,cardoon,0
1,skewer,1
2,decisive,2
3,apply,3
4,freely,4


In [12]:
from collections import defaultdict
import json

inverted_idx = defaultdict(list)

for idx, description in enumerate(df['processed_description']):
    for word in description:
        term_id = vocab_dict[word]

        if idx not in inverted_idx[term_id]:
            inverted_idx[term_id].append(idx)


with open('inverted_index.json', 'w') as f:
    json.dump(inverted_idx, f)


In [13]:
def process_query(query, vocab_dict, inverted_index, df):
    # 1. Sorguyu işlemden geçir
    query_words = preprocess_text(query)

    # 2. Her sorgu teriminin term_id'sini al ve ilgili document_id'leri bul
    doc_sets = []
    for word in query_words:
        term_id = vocab_dict.get(word)
        if term_id is not None:
            doc_sets.append(set(inverted_index.get(term_id, [])))

    # 3. Tüm sorgu kelimeleri için kesişim al
    if doc_sets:
        result_docs = set.intersection(*doc_sets)
    else:
        result_docs = set()

    # 4. Sonuçları döndür
    results = df.loc[result_docs, ["restaurantName", "Address", "Description", "website"]]
    return results


In [46]:
query = "modern seasonal cuisine"
results = process_query(query, vocab_dict, inverted_idx, df)
results

TypeError: Passing a set as an indexer is not supported. Use a list instead.

## ***2.2 Ranked Search Engine with TF-IDF and Cosine Similarity:***

BY ME


In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

def build_tfidf_inverted_index(df):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(df['processed_description'].apply(lambda x: ' '.join(x)))

    terms = vectorizer.get_feature_names_out()
    term_to_id = {term: idx for idx, term in enumerate(terms)}
    inverted_index = defaultdict(list)

    for doc_id in range(tfidf_matrix.shape[0]):
        for term_id in tfidf_matrix[doc_id].nonzero()[1]:
            tfidf_score = tfidf_matrix[doc_id, term_id]
            inverted_index[term_id].append((doc_id, tfidf_score))

    return inverted_index, term_to_id, tfidf_matrix, vectorizer


In [16]:
inverted_index, term_to_id, tfidf_matrix, vectorizer = build_tfidf_inverted_index(df)

readable_inverted_index = {
    term: [(doc_id, round(tfidf_score, 3)) for doc_id, tfidf_score in inverted_index[term_id]]
    for term, term_id in term_to_id.items()
}

for term, doc_scores in readable_inverted_index.items():
    print(f"{term}: {doc_scores}")


abate: [(1502, 0.214)]
abbey: [(674, 0.368), (917, 0.203), (1016, 0.335), (1168, 0.264)]
abbreviation: [(739, 0.466)]
ability: [(740, 0.145), (791, 0.181), (1344, 0.194), (1360, 0.2)]
able: [(461, 0.192), (1211, 0.249), (1303, 0.269), (1315, 0.235)]
ably: [(82, 0.148), (313, 0.152), (404, 0.152), (595, 0.129), (723, 0.167), (763, 0.156), (989, 0.2), (1058, 0.19), (1344, 0.161), (1409, 0.196), (1458, 0.186), (1496, 0.159), (1681, 0.245), (1697, 0.185), (1884, 0.148)]
abroad: [(171, 0.24), (301, 0.178), (1029, 0.188), (1039, 0.24), (1061, 0.207), (1105, 0.133), (1116, 0.168), (1184, 0.37), (1278, 0.256), (1289, 0.25), (1522, 0.152), (1768, 0.21), (1874, 0.159), (1946, 0.375), (1963, 0.323)]
absinthe: [(1817, 0.214)]
absolute: [(72, 0.284), (125, 0.173)]
absolutely: [(523, 0.265), (1606, 0.27), (1950, 0.236)]
abundance: [(703, 0.179), (1085, 0.343), (1180, 0.242)]
abundant: [(120, 0.271), (133, 0.288), (901, 0.248), (1015, 0.246), (1081, 0.176), (1100, 0.285), (1208, 0.237), (1817, 0.17),

In [17]:
import numpy as np

def search_query(query, tfidf_matrix, vectorizer, df, top_k=5):
    query_tfidf = vectorizer.transform([query])

    cosine_similarities = (tfidf_matrix @ query_tfidf.T).toarray().flatten()

    relevant_docs = np.argsort(-cosine_similarities)[:top_k]

    results = df.loc[relevant_docs, ["restaurantName", "Address", "Description", "website"]]
    results["Similarity Score"] = cosine_similarities[relevant_docs]

    return results


***TEST***

In [18]:
inverted_index, term_to_id, tfidf_matrix, vectorizer = build_tfidf_inverted_index(df)

query = "modern seasonal cuisine"
results = search_query(query, tfidf_matrix, vectorizer, df)

results

Unnamed: 0,restaurantName,Address,Description,website,Similarity Score
744,Saur,via Filippo Turati 8,"in a tiny rural village, this contemporary, al...",https://ristorantesaur.it,0.344817
559,20Tre,via David Chiossone 20 r,situated in the heart of genoa’s historic cent...,https://www.ristorante20tregenova.it/,0.336543
923,La Botte,via Giuseppe Garibaldi 8,a modern and welcoming contemporary bistro sit...,http://www.trattorialabottestresa.it,0.316122
1706,Retrobottega,via della Stelletta 4,minimalist decor and clean lines characterise ...,https://www.retro-bottega.com,0.297534
1292,Razzo,via Andrea Doria 17/f,"a quiet restaurant with a relaxed, young and m...",https://vadoarazzo.it/,0.284057


In [43]:
def search_query_dscore(query, tfidf_matrix, vectorizer, df, top_k=5):
    query_tfidf = vectorizer.transform([query])

    cosine_similarities = (tfidf_matrix @ query_tfidf.T).toarray().flatten()

    return cosine_similarities

def calculate_score(doc, query, vectorizer, tfidf_matrix, cuisine_preferences, facility_preferences, price_preferences):
    score = 0

    # TF-IDF vector for the query
    query_tfidf = vectorizer.transform([query])


    doc_index = doc.name  # Index of the document in the dataframe
    doc_vec = tfidf_matrix[doc_index]

    #description score
    description_score = cosine_similarity(query_tfidf, doc_vec)[0, 0]  # cosine similarity
    score += description_score


    #cuisine type score
    for cuisine in cuisine_preferences:
        if cuisine.lower() in doc['Cuisine Type'].lower():
            score += 0.2

    #facilities score
    for facility in facility_preferences:
        if facility.lower() in [f.lower() for f in doc['facilitiesServices']]:
            score += 0.1

    #price range score
    for price in price_preferences:
        if price in doc['Price Range']:
            score += 0.2

    return score




In [47]:
import heapq

def ranked_restaurants(query, tfidf_matrix, vectorizer, df, top_k=5, cuisine_preferences=None, facility_preferences=None, price_preferences=None):
    # create an heap
    heap = []

    for doc_id, doc in df.iterrows():  # for each restaurant
        # personalized score
        score = calculate_score(
            doc,  # current restourant
            query,
            vectorizer,
            tfidf_matrix,
            cuisine_preferences or [],
            facility_preferences or [],
            price_preferences or []
        )

        # top k results in the heat
        if len(heap) < top_k:
            heapq.heappush(heap, (score, doc_id))  # Adding an element (score, ID doc)
        else:
            heapq.heappushpop(heap, (score, doc_id))  # Replace the smaller item if necessary

    # Sort the heap to get the results in descending order
    ranked_results = sorted(heap, key=lambda x: x[0], reverse=True)

    # Format the final results
    results = []
    for score, doc_id in ranked_results:
        row = df.iloc[doc_id]
        results.append({
            "restaurantName": row["restaurantName"],
            "Address": row["Address"],
            "Description": row["Description"],
            "website": row["website"],
            "custom_score": round(score, 3)
        })

    results_df2 = pd.DataFrame(results)
    return results_df2


In [50]:
from IPython.display import display

query = "modern seasonal cuisine"
cuisine_preferences = ["Italian", "French"]
facility_preferences = ["Terrace", "Air conditioning"]
price_preferences = ["$$", "$$$"]
top_k = 5

results_df = ranked_restaurants(query, tfidf_matrix, vectorizer, df, top_k, cuisine_preferences, facility_preferences, price_preferences)

# Use display for a tabular view
display(results_df)



Unnamed: 0,restaurantName,Address,Description,website,custom_score
0,Saur,via Filippo Turati 8,"in a tiny rural village, this contemporary, al...",https://ristorantesaur.it,0.745
1,Locanda Solagna,piazza I Novembre 2,although this restaurant has been in business ...,https://www.locandasolagna.it/,0.637
2,Miramonti l'Altro,via Crosette 34 località Costorio,a french-italian couple is at the helm in this...,https://www.miramontilaltro.it/,0.619
3,Il Galeone,piazzale Amendola 2,housed on the ground floor of the elisabeth du...,https://www.ilgaleone.net/,0.604
4,Savô,piazza XXV Aprile 8,the reopening in 2022 of the hotel windsor wit...,http://www.thewindsor.it,0.598


**The new scoring function improves the results because it incorporates additional variables such as the type of cuisine, the services available and the price range. This allows us to obtain results that are more relevant to the user's preferences, which would otherwise have been ignored in the original scoring function. For example, a restaurant that meets preferences in terms of cuisine and price, but has a less detailed description, is now considered more relevant.**