In [None]:
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/ForkIt/tfdtfData.csv")
df.head()

Unnamed: 0,Name,Cuisine,Price_Level,Reviews,Editorial_Summary,Combined_Text
0,Flavor of Poland,['Polish'],2.0,"[""We really enjoyed this authentic Polish rest...",Flavor of Poland offers authentic Polish cuisi...,Flavor of Poland Flavor of Poland offers authe...
1,tacoria,['Mexican'],2.0,['My son and I stopped here to just try out a ...,"Modern tacos, burritos & vegan offerings in a ...","tacoria Modern tacos, burritos & vegan offerin..."
2,Halal Brothers,"['Sandwiches', 'Halal', 'Middle Eastern']",1.0,"[""I have gone here for very long time and the ...",Halal Brothers serves up delicious Middle East...,Halal Brothers Halal Brothers serves up delici...
3,KC Prime Restaurant Steakhouse,['Steakhouses'],3.0,"['Dinner with a friend here, is nice quiet ste...","Steaks, prime ribs & seafood plus a Sunday bru...","KC Prime Restaurant Steakhouse Steaks, prime r..."
4,Roots Ocean Prime,"['New American', 'Seafood', 'Steakhouses']",3.0,['Had a great dinner here. I must admit the re...,Fancy restaurant offering a variety of seafood...,Roots Ocean Prime Fancy restaurant offering a ...


In [None]:
import google.generativeai as genai
import time

def generate_description(name, cuisine, reviews):
    """Generates a short, engaging restaurant description using Google Gemini API."""

    prompt = f"Write a short, family-friendly restaurant description for {name}, a {cuisine} restaurant. \
              Here are some customer reviews: {reviews}. Keep it under 50 words and make it neutral and informative."

    try:
        model = genai.GenerativeModel("gemini-pro")
        response = model.generate_content(prompt)

        if response.candidates and response.candidates[0].content.parts:
            description = response.candidates[0].content.parts[0].text
            return description
        else:
            print(f"Safety filter triggered for: {name}. Skipping.")
            return "Not Available"

    except Exception as e:
        print(f"Error generating description for {name}: {e}")
        return "Not Available"


missing_desc_rows = df[df["Editorial_Summary"] == "Not Available"]

# Process each row with a delay
for index, row in missing_desc_rows.iterrows():
    df.at[index, "Editorial_Summary"] = generate_description(
        row["Name"],
        row["Cuisine"],
        row["Reviews"])

    # Add delay to prevent rate limits
    time.sleep(2)

In [None]:
df.head()

Unnamed: 0,Name,Cuisine,Price_Level,Reviews,Editorial_Summary,Combined_Text
0,Flavor of Poland,['Polish'],2.0,"[""We really enjoyed this authentic Polish rest...",Flavor of Poland offers authentic Polish cuisi...,Flavor of Poland Flavor of Poland offers authe...
1,tacoria,['Mexican'],2.0,['My son and I stopped here to just try out a ...,"Modern tacos, burritos & vegan offerings in a ...","tacoria Modern tacos, burritos & vegan offerin..."
2,Halal Brothers,"['Sandwiches', 'Halal', 'Middle Eastern']",1.0,"[""I have gone here for very long time and the ...",Halal Brothers serves up delicious Middle East...,Halal Brothers Halal Brothers serves up delici...
3,KC Prime Restaurant Steakhouse,['Steakhouses'],3.0,"['Dinner with a friend here, is nice quiet ste...","Steaks, prime ribs & seafood plus a Sunday bru...","KC Prime Restaurant Steakhouse Steaks, prime r..."
4,Roots Ocean Prime,"['New American', 'Seafood', 'Steakhouses']",3.0,['Had a great dinner here. I must admit the re...,Fancy restaurant offering a variety of seafood...,Roots Ocean Prime Fancy restaurant offering a ...


In [None]:
df.columns

Index(['Place_ID', 'Name', 'Type', 'URL', 'Vicinity',
       'Wheelchair_Accessible_Entrance', 'Dine_In', 'Editorial_Summary',
       'Reservable', 'Serves_Beer', 'Serves_Breakfast', 'Serves_Brunch',
       'Serves_Dinner', 'Serves_Lunch', 'Serves_Vegetarian_Food',
       'Serves_Wine', 'Yelp_URL', 'Latitude', 'Longitude', 'Reviews',
       'Total_Reviews', 'Photo_Reference', 'PhoneNumber', 'Price_Level',
       'Ratings', 'Cuisine'],
      dtype='object')

In [None]:
df = df[["Name", "Cuisine", "Price_Level", "Reviews", "Editorial_Summary"]]

In [None]:
df.to_csv("/content/drive/MyDrive/ForkIt/tfdtfData.csv", index=False)

In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [None]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

In [None]:
def process_sentences(text):
    """Advanced text preprocessing with lemmatization, POS tagging, stopword removal, and contraction handling."""
    if pd.isna(text):
        return ""

    temp_sent = []

    words = word_tokenize(text.lower())

    tags = nltk.pos_tag(words)

    for i, word in enumerate(words):
        if tags[i][1] in ('VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ'):
            lemmatized = lemmatizer.lemmatize(word, 'v')
        else:
            lemmatized = lemmatizer.lemmatize(word)

        if lemmatized not in stop_words and lemmatized.isalpha():
            temp_sent.append(lemmatized)

    full_sentence = ' '.join(temp_sent)
    full_sentence = full_sentence.replace("n't", " not").replace("'m", " am") \
                                 .replace("'s", " is").replace("'re", " are") \
                                 .replace("'ll", " will").replace("'ve", " have") \
                                 .replace("'d", " would")

    return full_sentence

In [None]:
price_map = {
    1: "cheap inexpensive low-price low-cost economical economic affordable",
    2: "moderate fair mid-price reasonable average",
    3: "expensive fancy lavish",
    4: "luxurious high-end premium fine-dining"
}

df["Cuisine"] = df["Cuisine"].apply(lambda x: ', '.join(x) if isinstance(x, list) else str(x))
df["Reviews"] = df["Reviews"].apply(lambda x: ' '.join(x) if isinstance(x, list) else str(x))


df.loc[:, "Combined_Text"] = (
    df["Name"].fillna("") + " " +
    df["Editorial_Summary"].fillna("") + " " +
    df["Reviews"].fillna("") + " " +
    df["Cuisine"].fillna("") + " " +
    df["Price_Level"].map(price_map).fillna("")
)

df

Unnamed: 0,Name,Cuisine,Price_Level,Reviews,Editorial_Summary,Combined_Text
0,Flavor of Poland,['Polish'],2.0,"[""We really enjoyed this authentic Polish rest...",Flavor of Poland offers authentic Polish cuisi...,Flavor of Poland Flavor of Poland offers authe...
1,tacoria,['Mexican'],2.0,['My son and I stopped here to just try out a ...,"Modern tacos, burritos & vegan offerings in a ...","tacoria Modern tacos, burritos & vegan offerin..."
2,Halal Brothers,"['Sandwiches', 'Halal', 'Middle Eastern']",1.0,"[""I have gone here for very long time and the ...",Halal Brothers serves up delicious Middle East...,Halal Brothers Halal Brothers serves up delici...
3,KC Prime Restaurant Steakhouse,['Steakhouses'],3.0,"['Dinner with a friend here, is nice quiet ste...","Steaks, prime ribs & seafood plus a Sunday bru...","KC Prime Restaurant Steakhouse Steaks, prime r..."
4,Roots Ocean Prime,"['New American', 'Seafood', 'Steakhouses']",3.0,['Had a great dinner here. I must admit the re...,Fancy restaurant offering a variety of seafood...,Roots Ocean Prime Fancy restaurant offering a ...
...,...,...,...,...,...,...
460,The Patio at Mountain View,"['Sandwiches', 'Breakfast & Brunch']",2.0,"[""Always happy to visit here even when we don'...","The Patio at Mountain View offers breakfast, b...",The Patio at Mountain View The Patio at Mounta...
461,The Best Pizzeria and Restaurant,['Pizza'],1.0,['The guys here at Best do a fantastic job coo...,Enjoy a delicious meal at The Best Pizzeria an...,The Best Pizzeria and Restaurant Enjoy a delic...
462,Afghan Kabob and Grill,"['Indian', 'Halal', 'Afghan']",2.0,"[""The tables can accommodate a party if at lea...",Easygoing halal eatery serving Indian & Afghan...,Afghan Kabob and Grill Easygoing halal eatery ...
463,Jammin' Crepes,"['Caterers', 'Creperies']",2.0,['I have been to jammin’ crepes a few times an...,"Counter-serve operation known for sweet, savor...",Jammin' Crepes Counter-serve operation known f...


In [None]:
df.loc[:, "Processed_Text"] = df["Combined_Text"].apply(process_sentences)

In [None]:
df

Unnamed: 0,Name,Cuisine,Price_Level,Reviews,Editorial_Summary,Combined_Text,Processed_Text
0,Flavor of Poland,['Polish'],2.0,"[""We really enjoyed this authentic Polish rest...",Flavor of Poland offers authentic Polish cuisi...,Flavor of Poland Flavor of Poland offers authe...,flavor poland flavor poland offer authentic po...
1,tacoria,['Mexican'],2.0,['My son and I stopped here to just try out a ...,"Modern tacos, burritos & vegan offerings in a ...","tacoria Modern tacos, burritos & vegan offerin...",tacoria modern taco burrito vegan offering fun...
2,Halal Brothers,"['Sandwiches', 'Halal', 'Middle Eastern']",1.0,"[""I have gone here for very long time and the ...",Halal Brothers serves up delicious Middle East...,Halal Brothers Halal Brothers serves up delici...,halal brother halal brother serve delicious mi...
3,KC Prime Restaurant Steakhouse,['Steakhouses'],3.0,"['Dinner with a friend here, is nice quiet ste...","Steaks, prime ribs & seafood plus a Sunday bru...","KC Prime Restaurant Steakhouse Steaks, prime r...",kc prime restaurant steakhouse steak prime rib...
4,Roots Ocean Prime,"['New American', 'Seafood', 'Steakhouses']",3.0,['Had a great dinner here. I must admit the re...,Fancy restaurant offering a variety of seafood...,Roots Ocean Prime Fancy restaurant offering a ...,root ocean prime fancy restaurant offer variet...
...,...,...,...,...,...,...,...
460,The Patio at Mountain View,"['Sandwiches', 'Breakfast & Brunch']",2.0,"[""Always happy to visit here even when we don'...","The Patio at Mountain View offers breakfast, b...",The Patio at Mountain View The Patio at Mounta...,patio mountain view patio mountain view offer ...
461,The Best Pizzeria and Restaurant,['Pizza'],1.0,['The guys here at Best do a fantastic job coo...,Enjoy a delicious meal at The Best Pizzeria an...,The Best Pizzeria and Restaurant Enjoy a delic...,best pizzeria restaurant enjoy delicious meal ...
462,Afghan Kabob and Grill,"['Indian', 'Halal', 'Afghan']",2.0,"[""The tables can accommodate a party if at lea...",Easygoing halal eatery serving Indian & Afghan...,Afghan Kabob and Grill Easygoing halal eatery ...,afghan kabob grill easygoing halal eatery serv...
463,Jammin' Crepes,"['Caterers', 'Creperies']",2.0,['I have been to jammin’ crepes a few times an...,"Counter-serve operation known for sweet, savor...",Jammin' Crepes Counter-serve operation known f...,jammin crepe operation know sweet savory break...


In [None]:
df["Price_Level"] = df["Price_Level"].replace("Unavailable", pd.NA).fillna(df["Price_Level"].mode()[0])
df['Price_Level'] = pd.to_numeric(df['Price_Level'], errors='coerce')
df['Price_Level'] = df['Price_Level'].astype(int)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(stop_words='english')

tfidf_matrix = vectorizer.fit_transform(df["Processed_Text"])

print("TF-IDF vectorization complete. Shape:", tfidf_matrix.shape)


TF-IDF vectorization complete. Shape: (465, 5744)


In [None]:
def process_user_query(query):
    """Preprocess and convert user query into a TF-IDF vector."""
    query_tfidf = vectorizer.transform([query])
    return query_tfidf

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def get_recommendations(user_query, top_n=5):
    """Finds the most similar restaurants based on a text query."""
    query_tfidf = process_user_query(user_query)

    similarity_scores = cosine_similarity(query_tfidf, tfidf_matrix)

    top_indices = similarity_scores.argsort()[0][-top_n:][::-1]

    return df.iloc[top_indices][["Name", "Cuisine", "Price_Level", "Editorial_Summary"]]


In [None]:
user_query = "pasta"
recommendations = get_recommendations(user_query)
recommendations

Unnamed: 0,Name,Cuisine,Price_Level,Editorial_Summary
7,Lo Ré Pasta Shop,"['Sushi Bars', 'Italian']",2,"Lo Ré Pasta Shop offers fresh, homemade Italia..."
313,King's Pizzarama,['Pizza'],2,"King's Pizzarama serves pizzas, pasta, hoagies..."
344,Pennington Pizza & Grill,"['Italian', 'Pizza']",1,Pennington Pizza & Grill serves a wide variety...
212,Laurea,['Italian'],2,Laurea serves authentic Italian cuisine in a f...
210,Mamma Flora's Trattorias,"['Italian', 'Pizza']",2,"Casual, BYOB restaurant serving old-school Ita..."


In [None]:
type(tfidf_matrix)

In [None]:
import pickle

filename = 'tfidf_matrix.pkl'
with open(filename, 'wb') as file:
    pickle.dump(tfidf_matrix, file)

In [None]:
with open('tfidf_vectorizer.pkl', 'wb') as file:
    pickle.dump(vectorizer, file)

In [None]:
df2 = pd.read_csv("/content/drive/MyDrive/ForkIt/restaurants_details_cleaned.csv")

In [None]:
df["Place_ID"] = df2["Place_ID"]

In [None]:
df

Unnamed: 0,Place_ID,Name,Cuisine,Price_Level,Reviews,Editorial_Summary,Combined_Text,Processed_Text
0,ChIJkc5WLM9ZwYkRMmz1U3Em5fI,Flavor of Poland,['Polish'],2,"[""We really enjoyed this authentic Polish rest...",Flavor of Poland offers authentic Polish cuisi...,Flavor of Poland Flavor of Poland offers authe...,flavor poland flavor poland offer authentic po...
1,ChIJnzTECsHmw4kRPMkKgyGmY9o,tacoria,['Mexican'],2,['My son and I stopped here to just try out a ...,"Modern tacos, burritos & vegan offerings in a ...","tacoria Modern tacos, burritos & vegan offerin...",tacoria modern taco burrito vegan offering fun...
2,ChIJ9TgE4gVZwYkRTixvHR5CdDI,Halal Brothers,"['Sandwiches', 'Halal', 'Middle Eastern']",1,"[""I have gone here for very long time and the ...",Halal Brothers serves up delicious Middle East...,Halal Brothers Halal Brothers serves up delici...,halal brother halal brother serve delicious mi...
3,ChIJkUvDKOzhw4kRVk-dF19hZx4,KC Prime Restaurant Steakhouse,['Steakhouses'],3,"['Dinner with a friend here, is nice quiet ste...","Steaks, prime ribs & seafood plus a Sunday bru...","KC Prime Restaurant Steakhouse Steaks, prime r...",kc prime restaurant steakhouse steak prime rib...
4,ChIJg-PLzmHnw4kR86cynTbawUo,Roots Ocean Prime,"['New American', 'Seafood', 'Steakhouses']",3,['Had a great dinner here. I must admit the re...,Fancy restaurant offering a variety of seafood...,Roots Ocean Prime Fancy restaurant offering a ...,root ocean prime fancy restaurant offer variet...
...,...,...,...,...,...,...,...,...
460,ChIJmyqDrIb9w4kRuup6_Q2tcJk,The Patio at Mountain View,"['Sandwiches', 'Breakfast & Brunch']",2,"[""Always happy to visit here even when we don'...","The Patio at Mountain View offers breakfast, b...",The Patio at Mountain View The Patio at Mounta...,patio mountain view patio mountain view offer ...
461,ChIJvRSh1xJZwYkRgzblRUaX6vo,The Best Pizzeria and Restaurant,['Pizza'],1,['The guys here at Best do a fantastic job coo...,Enjoy a delicious meal at The Best Pizzeria an...,The Best Pizzeria and Restaurant Enjoy a delic...,best pizzeria restaurant enjoy delicious meal ...
462,ChIJbVoWhTlfwYkRAbN9V_9-6GM,Afghan Kabob and Grill,"['Indian', 'Halal', 'Afghan']",2,"[""The tables can accommodate a party if at lea...",Easygoing halal eatery serving Indian & Afghan...,Afghan Kabob and Grill Easygoing halal eatery ...,afghan kabob grill easygoing halal eatery serv...
463,ChIJy4XgpMDmw4kRf48olBzKRcY,Jammin' Crepes,"['Caterers', 'Creperies']",2,['I have been to jammin’ crepes a few times an...,"Counter-serve operation known for sweet, savor...",Jammin' Crepes Counter-serve operation known f...,jammin crepe operation know sweet savory break...


In [None]:
df = df[["Place_ID"] + [col for col in df.columns if col != "Place_ID"]]

In [None]:
df

Unnamed: 0,Place_ID,Name,Cuisine,Price_Level,Reviews,Editorial_Summary,Combined_Text,Processed_Text
0,ChIJkc5WLM9ZwYkRMmz1U3Em5fI,Flavor of Poland,['Polish'],2,"[""We really enjoyed this authentic Polish rest...",Flavor of Poland offers authentic Polish cuisi...,Flavor of Poland Flavor of Poland offers authe...,flavor poland flavor poland offer authentic po...
1,ChIJnzTECsHmw4kRPMkKgyGmY9o,tacoria,['Mexican'],2,['My son and I stopped here to just try out a ...,"Modern tacos, burritos & vegan offerings in a ...","tacoria Modern tacos, burritos & vegan offerin...",tacoria modern taco burrito vegan offering fun...
2,ChIJ9TgE4gVZwYkRTixvHR5CdDI,Halal Brothers,"['Sandwiches', 'Halal', 'Middle Eastern']",1,"[""I have gone here for very long time and the ...",Halal Brothers serves up delicious Middle East...,Halal Brothers Halal Brothers serves up delici...,halal brother halal brother serve delicious mi...
3,ChIJkUvDKOzhw4kRVk-dF19hZx4,KC Prime Restaurant Steakhouse,['Steakhouses'],3,"['Dinner with a friend here, is nice quiet ste...","Steaks, prime ribs & seafood plus a Sunday bru...","KC Prime Restaurant Steakhouse Steaks, prime r...",kc prime restaurant steakhouse steak prime rib...
4,ChIJg-PLzmHnw4kR86cynTbawUo,Roots Ocean Prime,"['New American', 'Seafood', 'Steakhouses']",3,['Had a great dinner here. I must admit the re...,Fancy restaurant offering a variety of seafood...,Roots Ocean Prime Fancy restaurant offering a ...,root ocean prime fancy restaurant offer variet...
...,...,...,...,...,...,...,...,...
460,ChIJmyqDrIb9w4kRuup6_Q2tcJk,The Patio at Mountain View,"['Sandwiches', 'Breakfast & Brunch']",2,"[""Always happy to visit here even when we don'...","The Patio at Mountain View offers breakfast, b...",The Patio at Mountain View The Patio at Mounta...,patio mountain view patio mountain view offer ...
461,ChIJvRSh1xJZwYkRgzblRUaX6vo,The Best Pizzeria and Restaurant,['Pizza'],1,['The guys here at Best do a fantastic job coo...,Enjoy a delicious meal at The Best Pizzeria an...,The Best Pizzeria and Restaurant Enjoy a delic...,best pizzeria restaurant enjoy delicious meal ...
462,ChIJbVoWhTlfwYkRAbN9V_9-6GM,Afghan Kabob and Grill,"['Indian', 'Halal', 'Afghan']",2,"[""The tables can accommodate a party if at lea...",Easygoing halal eatery serving Indian & Afghan...,Afghan Kabob and Grill Easygoing halal eatery ...,afghan kabob grill easygoing halal eatery serv...
463,ChIJy4XgpMDmw4kRf48olBzKRcY,Jammin' Crepes,"['Caterers', 'Creperies']",2,['I have been to jammin’ crepes a few times an...,"Counter-serve operation known for sweet, savor...",Jammin' Crepes Counter-serve operation known f...,jammin crepe operation know sweet savory break...


In [None]:
with open("restaurants.pkl", "wb") as f:
    pickle.dump(df, f)