In [11]:
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from nltk.tokenize import word_tokenize
import pandas as pd
import nltk
from nltk.corpus import stopwords
from gensim.models import Word2Vec
import google.generativeai as palm
from sklearn.metrics.pairwise import cosine_similarity
from spellchecker import SpellChecker  # Import the SpellChecker



df = pd.read_csv('all_recipies.csv')#imports the dataset
#the dataset holds recipe_name,rating,reviews,description,published_date,prep_time,cook_time,total_time,servings,calories,fat,carbs,protein

#we focus on the name of the dish and the description however so we pull those out here
food_types = df['recipe_name'].tolist()#turns them into a list from that column
food_decr_raw = df['description'].tolist()

# Download the stopwords from NLTK
nltk.download('punkt')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

#function that preprocesses each description (lowercasing, tokenizing, etc)
def preprocess(description):
    tokens = word_tokenize(description)
    tokens = [word for word in tokens if word.isalnum()]
    postprocessed_desc = [w.lower() for w in tokens if not w.lower() in stop_words]
    return " ".join(postprocessed_desc)

food_decr = [preprocess(desc) for desc in food_decr_raw]#a list of processes food discreptions all ready to use

# Train Word2Vec model
word2vec_model = Word2Vec(sentences=[word_tokenize(desc) for desc in food_decr], vector_size=50, window=5, min_count=1, workers=4)



  from pandas.core.computation.check import NUMEXPR_INSTALLED
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/yezenhijazin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/yezenhijazin/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [12]:
#Use Word2Vec embeddings to initialize Doc2Vec model
tagged_data = [TaggedDocument(words=word_tokenize(doc.lower()), tags=[str(i)]) for i, doc in enumerate(food_decr)]
doc2vec_model = Doc2Vec(vector_size=50, window=5, min_count=1, workers=4, epochs=100)
doc2vec_model.build_vocab(tagged_data)
doc2vec_model.wv = word2vec_model.wv  # have the doc2vec use Word2Vec embeddings
doc2vec_model.train(tagged_data, total_examples=doc2vec_model.corpus_count, epochs=doc2vec_model.epochs)



In [106]:
dining_history=[]
dining_histories=[]
users=input("How many people are in your party? ")
for i in range(int(users)):#takes in the dining history of your party
    for j in range(0,5):
        dining_history.append(input("Enter 5 food you've ate recentely or that you like for user "+str(i+1)+": "))
    dining_histories.append(dining_history)
    dining_history=[]


# Create a list of dining histories for multiple users
# dining_histories = [["lasanga", "pesto pasta", "pizza","gnocchi","calzone"],["sushi","sashimi","ramen","udon","fish"]]

# Combine all dining histories into one
combined_dining_history=[item for sublist in dining_histories for item in sublist]
spell = SpellChecker()
corrected_combined_dining_history = [spell.correction(word) if spell.correction(word) is not None else word for word in combined_dining_history]


# Get the document vector for the combined dining history
combined_vector = doc2vec_model.infer_vector(word_tokenize(" ".join(corrected_combined_dining_history).lower()))

#compare our vector with all of the vectors in the model of food descriptions to find the most similar dish for the party/you
similar_documents=doc2vec_model.dv.most_similar([combined_vector], topn=1)
similar_food_index=int(similar_documents[0][0])
similar_food=food_types[similar_food_index]

# Calculate cosine similarity between the combined vector and all food vectors
# similarities = [cosine_similarity([combined_vector], [doc2vec_model.dv[i]])[0][0] for i in range(len(food_types))]

# # Find the index of the most similar food item
# similar_food_index = similarities.index(max(similarities))
# similar_food = food_types[similar_food_index]

print(f"The most similar food item to the combined dining history of all users is: {similar_food}")




['orangee chicen', 'calzone', 'humus', 'shawarma', 'hufeoe']
The most similar food item to the combined dining history of all users is: Ta'ameya (Egyptian Falafel)


In [88]:

palm.configure(api_key= "AIzaSyCMo4_bDMpu-GArtcs5T5S4rrlfMshnSwg")#API key for PaLM

models = [
    m for m in palm.list_models() if "generateText" in m.supported_generation_methods
]

model = models[0].name

#the prompt matters alot and had to be fine tined to get the best possible response
# prompt = "Given the food item "+similar_food+" suggest a broad genre or type of cuisine that matches this preference. Additionally, provide recommendations for restaurants in College Station, TX within this genre." 
prompt = "Given the food item "+similar_food+" suggest a broad genre or type of cuisine that matches this preference."


completion = palm.generate_text(
    model = model,
    prompt=prompt,
    temperature=0.33,
    # max length of the response
    max_output_tokens=800,
    
)

print(completion.result)#prints the prompt response

import requests

def search_yelp(api_key, term, location, limit=5):
  
    endpoint = "https://api.yelp.com/v3/businesses/search"

    
    headers = {
        "Authorization": f"Bearer {api_key}"
    }

    
    params = {
        "term": term,
        "location": location,
        "limit": limit
    }

  
    response = requests.get(endpoint, headers=headers, params=params)

    
    if response.status_code == 200:
        
        data = response.json()

        
        for business in data.get("businesses", []):
            print(f"Name: {business['name']}")
            print(f"Address: {', '.join(business['location']['display_address'])}")
            print("---")
    else:
        print(f"Error: {response.status_code}, {response.text}")

if __name__ == "__main__":
    
    api_key = "_kpunRxiPhTA4sySgs432sZPBS9afRrsPNYnMOIC66GSuhuadaPuHZfxWiQiu821pjAWVAgJNJ2OEwZjdkbeuphkLzrWEHvWsDFi_gJN7SC-GOn7t6ffJpdWBmFmZXYx"

    
    search_term = completion.result
    location = "College Station, TX"

    
    limit = 5 

    search_yelp(api_key, search_term, location, limit)


Italian
Name: Cento
Address: 595 S 3rd St, Columbus, OH 43215
---
Name: Lola & Giuseppe's Trattoria
Address: 100 Granville St, Columbus, OH 43230
---
Name: La Tavola
Address: 1664 W 1st Ave, Grandview Heights, OH 43212
---
Name: Speck Italian Eatery
Address: 89 N High St, Columbus, OH 43215
---
Name: Marcella's
Address: 615 N High St, Columbus, OH 43215
---
