In [19]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from rapidfuzz import process, fuzz

In [20]:
def get_best_match_rapid(query, choices, scorer=fuzz.WRatio):
    """
    Returns the best match of `query` in `choices` using Rapidfuzz.
    - `scorer`: Scoring function from fuzz, such as fuzz.WRatio, fuzz.QRatio, etc.
    """
    result = process.extractOne(query, choices, scorer=scorer)
    if result:
        return result[0]
    return None

In [28]:

def ingredients_to_binary(ingredient_string, df_ingredients):
    """
    Converts a string of ingredients into a binary list indicating the presence of unique ingredients based on the best match.
    - `ingredient_string`: A string of ingredients separated by commas or spaces.
    - `unique_ingredients`: A list of all unique ingredient names.
    """
    # Initialize the binary list
    ingredients_binary = [0] * len(df_ingredients)
    
    # Normalize and split the ingredient string by commas or spaces
    queries = ingredient_string.replace(',', ' ').split()
    
    for query in queries:
        query = query.lower()
        best_match = get_best_match_rapid(query,df_ingredients)
        if best_match:
            # Update the binary list
            index = unique_ingredients.index(best_match)
            ingredients_binary[index] = 1  # Set to 1 at the index of the best match

    return ingredients_binary

In [38]:
#Get the Top n scores from my cosine similarity matrix

def top_n_scores(scores, n):
    # Flatten the array (if it's multidimensional)
    scores = scores.flatten()
    
    # Get the indices that would sort the array, and select the last 'n' indices (top scores)
    top_indices = np.argsort(scores)[-n:][::-1]
    
    
    return top_indices

In [23]:
def get_columns_with_ones(df, row_index):
    condition = df.loc[row_index] == 1
    return df.columns[condition].tolist()

In [40]:
# See what Recipes and subsequent Ingredients were recommended

def get_recipes(df, similarity, n):
    
    # store list of recipes
    recipes = []
    
    #store lsit of ingredients 
    ingredients = []
    
    top_indices = top_n_scores(similarity, 10)

    for each in top_indices:
        
        recipes.append(df.recipename[df.id == each])
        
        ingredients.append(get_columns_with_ones(df,each))
        
    return recipes, ingredients

In [25]:
# Import data 
data = pd.read_csv('../recipe_ingredients_dataset/ingredient_df_with_recipenames.csv')

In [26]:
# Get list of unique ingredients in dataset
unique_ingredients = [col for col in data.columns if col not in ['id', 'cuisine','recipename']]

In [29]:
user_input = 'Butter, Milk, Eggs'

# Process user string input as binary for SVD recommendation
x = ingredients_to_binary(user_input, unique_ingredients)

In [31]:
# Load in SVD Matrices 
vmatrix = pd.read_csv('../recipe_ingredients_dataset/V_1000matrix.csv',index_col=0)
umatrix = pd.read_csv('../recipe_ingredients_dataset/U_1000matrix.csv',index_col=0)

In [33]:
# Make sample a Series for manipulation
x = pd.Series(x)

In [34]:
# Dot product of user input and the V matrix. 
xcon = x.dot(vmatrix)

In [35]:
# xconcept and U must both be numpy arrays, and must be properly reshaped. 

xcon = np.array(xcon)

umatrix = np.array(umatrix)

xcon = xcon.reshape(1, -1)

In [36]:
#cosine similarity with our U matrix
similarities = cosine_similarity(umatrix, xcon)

In [41]:
# Get recommended recipes and ingredients 
recommendations = get_recipes(data, similarities, 30)

In [42]:
recommendations

([48995    mexican Style  milk sugar
  Name: recipename, dtype: object,
  43649    british Style  milk all-purpose flour
  Name: recipename, dtype: object,
  2448    mexican Style  milk butter water
  Name: recipename, dtype: object,
  8824    french Style  milk butter water
  Name: recipename, dtype: object,
  24215    british Style  milk butter
  Name: recipename, dtype: object,
  22770    british Style  flour
  Name: recipename, dtype: object,
  34772    vietnamese Style  milk butter drippings
  Name: recipename, dtype: object,
  5024    mexican Style  milk
  Name: recipename, dtype: object,
  47994    japanese Style  milk eggs all-purpose flour
  Name: recipename, dtype: object,
  20319    french Style  milk eggs all-purpose flour
  Name: recipename, dtype: object],
 [['milk', 'butter', 'sugar', 'eggs'],
  ['milk', 'butter', 'eggs', 'all-purpose flour'],
  ['milk', 'butter', 'water', 'eggs', 'salt', 'all-purpose flour'],
  ['milk', 'butter', 'water', 'eggs', 'salt', 'all-purpose fl

In [None]:
import streamlit as st
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from rapidfuzz import process, fuzz

# Load data
data = pd.read_csv('../recipe_ingredients_dataset/ingredient_df_with_recipenames.csv')

# Load in SVD Matrices 
vmatrix = pd.read_csv('../recipe_ingredients_dataset/V_1000matrix.csv',index_col=0)
umatrix = pd.read_csv('../recipe_ingredients_dataset/U_1000matrix.csv',index_col=0)

# Get list of unique ingredients and cuisine styles from the dataset
unique_ingredients = [col for col in data.columns if col not in ['id', 'cuisine', 'recipename']]
cuisine_styles = data['cuisine'].unique().tolist()  # Assuming 'cuisine' is a column with categorical data

def main():
    
    st.title("Ingredient Input Interface")

    # Text input for ingredients
    ingredients = st.text_input("Enter ingredients, separated by commas:")
    

    # Dropdown menu for cuisine styles with a maximum of two selections
    selected_cuisines = st.multiselect("Select cuisine styles (max 2):", 
                                       cuisine_styles,
                                       default=None,
                                       help="You can select up to two cuisine styles.")

    if len(selected_cuisines) > 2:
        st.error("Please select no more than two cuisine styles.")
    else:
        if st.button("Submit"):
            st.success("Submitted successfully!")
            st.write("Entered Ingredients:", ingredients)
            st.write("Selected Cuisine Styles:", selected_cuisines)
            # Process the user's ingredient input through the function
            x = ingredients_to_binary(ingredients, unique_ingredients_list)
            # Make sample a Series for manipulation
            x = pd.Series(x)
            # Dot product of user input and the V matrix. 
            xcon = x.dot(vmatrix)
            # xconcept and U must both be numpy arrays, and must be properly reshaped. 
            xcon = np.array(xcon)
            umatrix = np.array(umatrix)
            xcon = xcon.reshape(1, -1)
            #cosine similarity with our U matrix
            similarities = cosine_similarity(umatrix, xcon)
            # Get recommended recipes and ingredients 
            recommendations = get_recipes(data, similarities, 30)
            
            # Display all recommendations
            if not recommendations.empty:
                for index, row in recommendations.iterrows():
                    recipe_name = row['recipename']
                    recipe_ingredients = row[unique_ingredients].where(row == 1).dropna().index.tolist()
                    st.write(f"**Recipe**: {recipe_name}")
                    st.write(f"**Ingredients**: {', '.join(recipe_ingredients)}")
            else:
                st.write("No recommendations found based on the input.")



if __name__ == "__main__":
    main()

    
streamlit run ingredient_app.py