In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
data = pd.read_csv("RAW_recipes[1].csv")

In [13]:
data.head(5)

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,clean_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7.0,"winter squash, mexican seasoning, mixed spice,..."
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6.0,"prepared pizza crust, sausage patty, eggs, mil..."
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13.0,"ground beef, yellow onions, diced tomatoes, to..."
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11.0,"spreadable cheese with garlic and herbs, new p..."
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8.0,"tomato juice, apple cider vinegar, sugar, salt..."


In [17]:
# Ensure NaN values in 'clean_ingredients' are replaced with an empty string
data['clean_ingredients'] = data['clean_ingredients'].fillna('')

# Now proceed with transforming the data using the TF-IDF Vectorizer
tfidf_matrix = tfidf.transform(data['clean_ingredients'])


In [19]:
# Step 1: Preprocess the ingredients data
def clean_ingredients(ingredients):
    # Check if the value is a string, otherwise return an empty string or NaN
    if isinstance(ingredients, str):
        # Convert ingredients from string format (lists) to a simple string of comma-separated values
        ingredients = ingredients.strip("[]").replace("'", "").split(",")
        ingredients = [ingredient.strip().lower() for ingredient in ingredients]
        return ', '.join(ingredients)
    return ''

# Apply the cleaning function, handling missing values
data['clean_ingredients'] = data['ingredients'].apply(clean_ingredients)

# Display the first few rows to check the result
print(data[['ingredients', 'clean_ingredients']].head())

                                         ingredients  \
0  ['winter squash', 'mexican seasoning', 'mixed ...   
1  ['prepared pizza crust', 'sausage patty', 'egg...   
2  ['ground beef', 'yellow onions', 'diced tomato...   
3  ['spreadable cheese with garlic and herbs', 'n...   
4  ['tomato juice', 'apple cider vinegar', 'sugar...   

                                   clean_ingredients  
0  winter squash, mexican seasoning, mixed spice,...  
1  prepared pizza crust, sausage patty, eggs, mil...  
2  ground beef, yellow onions, diced tomatoes, to...  
3  spreadable cheese with garlic and herbs, new p...  
4  tomato juice, apple cider vinegar, sugar, salt...  


In [21]:
# Step 2: Create a TF-IDF Vectorizer for the ingredients
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['clean_ingredients'])

In [23]:
def recommend_recipes(input_ingredients, top_n=5):
    # Preprocess the user's input ingredients to match the format of the dataset
    input_ingredients = clean_ingredients(str(input_ingredients))

    # Vectorize the user's input ingredients
    input_tfidf = tfidf.transform([input_ingredients])

    # Compute cosine similarity between the input and the recipes in the dataset
    cosine_similarities = cosine_similarity(input_tfidf, tfidf_matrix).flatten()

    # Get the top_n most similar recipes
    top_n_indices = cosine_similarities.argsort()[-top_n:][::-1]

    # Display the recommended recipes
    recommended_recipes = data.iloc[top_n_indices][['name', 'clean_ingredients', 'minutes', 'steps']]
    return recommended_recipes

# Example usage of the recommendation system:
user_ingredients = ['chicken', '', '']  # Input example
recommended_recipes = recommend_recipes(user_ingredients, top_n=5)

print("Top 5 Recipe Recommendations Based on Your Ingredients:")
print(recommended_recipes)

Top 5 Recipe Recommendations Based on Your Ingredients:
                                         name  \
74332           easiest chicken recipe of all   
109634            how to spatchcock a chicken   
141432           my  famous  shredded chicken   
66727   crock pot shredded chicken sandwiches   
95198           grandma s chicken n dumplings   

                                        clean_ingredients  minutes  \
74332                                       chicken, salt       95   
109634                     whole chicken, salt and pepper       10   
141432                     chicken breasts, chicken broth       90   
66727   boneless chicken, chicken flavor stuffing mix,...      125   
95198   all-purpose flour, eggs, salt, oil, chicken bo...       40   

                                                    steps  
74332   ['lay the chicken on its back in the middle of...  
109634  ['place chicken on a flat surface breast side ...  
141432  ['place chicken breasts in pan with brot

In [25]:
import pickle

# Save the TF-IDF Vectorizer
with open('model1.pkl', 'wb') as f:
    pickle.dump(tfidf, f)

# Save the data with preprocessed ingredients
data.to_csv('preprocessed_recipes.csv', index=False)

# Save the recommendation function
with open('recommendation_function.pkl', 'wb') as f:
    pickle.dump(recommend_recipes, f)