In [9]:
import os
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [10]:
def read():
    ingredient = []
    file = open("ingredients.txt", "r",errors='ignore')
    #Enumerate() method adds a counter to an iterable and returns it in a form of enumerate object.
    #strip removes "\n"
    for index, line in enumerate(file): 
            ingredient.append(line.strip("\n"))

    instruction = []
    file = open("instruction.txt", "r",errors='ignore') 
    for index, line in enumerate(file): 
            instruction.append(line.strip("\n"))


    time = []
    file = open("time.txt", "r",errors='ignore') 
    for index, line in enumerate(file): 
            time.append(line.strip("\n"))


    title = []
    file = open("title.txt", "r",errors='ignore') 
    for index, line in enumerate(file): 
            title.append(line.strip("\n"))


    url = []
    file = open("url.txt", "r") 
    for index, line in enumerate(file): 
            url.append(line.strip("\n"))

    #creating dictinary and making dataframe out of it 
    data = {"url": url,
            "title": title,
            "time": time,
            "ingredient": ingredient,
            "instruction": instruction}
    #converting dictoray to dataframe
    df = pd.DataFrame.from_dict(data)
    #remove NULL values
    df.drop(df[df['url']=='NULL'].index, inplace = True) 
    df.drop(df[df['title']=='NULL'].index, inplace = True) 
    df.drop(df[df['time']=='NULL'].index, inplace = True) 
    df.drop(df[df['ingredient']=='NULL'].index, inplace = True) 
    df.drop(df[df['instruction']=='NULL'].index, inplace = True) 
    #making url as index and storing initial index numbers in index column
    df["index"] = df.index
    df= df.set_index("url")
    df['good']='NULL'
    return df


In [11]:
def pref(df):
    
    #first check if prefernce.txt exist, if doesnt then make prefernce file with NULL values
    #os.path.isfile() method in Python is used to check whether the specified path is an existing regular file or not.
    #os.getcwd() method tells us the location of current working directory (CWD).
    #shape[0] tells number of rows in df
    if not os.path.isfile(os.getcwd().replace("\\","/") + "/preference.txt"):
        with open("preference.txt", "w") as output:
            for row in range(df.shape[0]):
                output.write("NULL"+"\n")
                
    preference = []
    file = open("preference.txt", "r") 
    for index, line in enumerate(file): 
            preference.append(line.strip("\n"))
            
    return preference

In [12]:
def menu():
    print("Choose 1 to update prefernces\n")
    print("Choose 2 to evaluate a recipe\n")
    print("Choose 3 to get recipe recommendation\n")
    print("Choose 0 to quit\n\n")
    choice = input("Choice : ") 
    if choice in ['1','2', '3', '0']:
        return choice
    else:
        print(choice +"?")
        print("Sorry wrong input")
        return menu()

In [13]:
def update(df):
    print("\nHi, You have chosen to add ratings for dishes\n")
    
    #get url
    url = input("Please enter the url of the recipe (starting with 'https://www.'): ")
    
    #check if url exists in database
    while url.lower() not in df.index:
        print("Invalid option")
        url = input("Please enter the url of the recipe (starting with 'https://www.'): ")
    
    rating = input("Please enter your rating for the recipe. the ratings range from 1 to 5, 5 being the best\n")
    
    #validate rating
    while rating not in ['1','2','3','4','5']:
        print("Invalid option")
        rating = input("Please enter your rating for the recipe. the ratings range from 1 to 5, 5 being the best\n")    
    #update the rating
    df.loc[url, "preference"] = rating
    for i in df.index:
            if(df.loc[i, "preference"]=='3' or df.loc[i, "preference"]=='2' or df.loc[i, "preference"]=='1'):
                df.loc[i,"good"]='bad'
            elif(df.loc[i, "preference"]=='4' or df.loc[i, "preference"]=='5'):
                df.loc[i,"good"]='good'
    
    #write the ratings again
    with open("preference.txt", "w") as output:
            for value in df.preference:
                output.write(value +"\n")
    
    print("Updation complete")
    
    return df

In [14]:
def evaluate(df):
    print("\nHi, You have chosen to evaluate a recipe\n")
    
    #get url
    url = input("Please enter the url of the recipe (starting with 'https://www.'): ")

    #check if url exists in database
    while url.lower() not in df.index:
        print("Invalid option")
        url = input("Please enter the url of the recipe (starting with 'https://www.'): ")
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.pipeline import Pipeline
    from sklearn.naive_bayes import MultinomialNB
    #X will include ingredients of the evaluated prefernces
    data = df[df.preference != "NULL"]
    if data.empty==True:
        print("Sorry dont have enough information to rate a dish, kindly update prefernces.")
    #y will include the rating/ prefernces
    else :
        X_train = data['ingredient']
        y_train = data.good 
        #In the pipeline fisrt allpy transformers and estimators
        tools = [('tf', TfidfVectorizer()), ('nb', MultinomialNB())]
        c = Pipeline(tools)
        #to remove stopwords
        c.set_params(tf__stop_words = 'english')
        #fitting the data
        c = c.fit(X_train, y_train)
        X_test = [df.loc[url].ingredient]
        predicted = c.predict(X_test)
        prob = c.predict_proba(X_test)
        #Print results
        print("Predicted:", predicted)
        print("Classes:", c.classes_)
        print("Probability:", prob)
    return None

In [15]:
def recommend(df):
    print("\nRecipe Recommedations \n")
    from sklearn.feature_extraction.text import TfidfVectorizer
    #create tfidf matrix
    tfidf = TfidfVectorizer(stop_words = 'english').fit_transform(df.ingredient)
    #reset index
    df = df.reset_index()
    #filter data
    good_recipes_index = list(df[df.good == "good"].index)

    #comparison index (we wont recommend a bad dish)
    comparison_index = list(df[df.good == "NULL"].index)

    #compute coside similarity
    from sklearn.metrics.pairwise import linear_kernel
    mean_cosine_similarities = np.mean(linear_kernel(tfidf[comparison_index], tfidf[good_recipes_index]), axis = 1)

    #find top 5 most closely related recipes
    recom = mean_cosine_similarities.argsort()[:-6:-1]

    #pull urls of top 5 best index values
    best_title = df.iloc[recom].title
    best_url = df.iloc[recom].url
    t=pd.DataFrame(best_title)
    u=pd.DataFrame(best_url)

    #print results
    for i in t.index:
        print("Title:", best_title[i])
        print("URL:", best_url[i])

In [19]:
def main_loop():
    #df is dataframe
    df=read()
    #pr is list
    pr=pref(df)
    #adding prefernce colum
    df["preference"] = pr
    for i in df.index:
            if(df.loc[i, "preference"]=='3' or df.loc[i, "preference"]=='2' or df.loc[i, "preference"]=='1'):
                df.loc[i,"good"]='bad'
            elif(df.loc[i, "preference"]=='4' or df.loc[i, "preference"]=='5'):
                df.loc[i,"good"]='good'
    ans = menu()
    while True:
        if(ans=='0'):
            print("\nThankyou for visiting\n")
            break
        elif(ans=='1'):
            df=update(df)
            ans = menu()
        elif(ans=='2'):
            evaluate(df)
            ans = menu()
        elif(ans=='3'):
            recommend(df)
            ans=menu()
        else:
            print("Invalid choice.")
   
        
    
    

In [20]:
if __name__ == '__main__':
    main_loop()

Choose 1 to update prefernces

Choose 2 to evaluate a recipe

Choose 3 to get recipe recommendation

Choose 0 to quit


Choice : 1

Hi, You have chosen to add ratings for dishes

Please enter the url of the recipe (starting with 'https://www.'): https://www.allrecipes.com/recipe/219963/creamy-mushroom-meatloaf/
Please enter your rating for the recipe. the ratings range from 1 to 5, 5 being the best
2
Updation complete
Choose 1 to update prefernces

Choose 2 to evaluate a recipe

Choose 3 to get recipe recommendation

Choose 0 to quit


Choice : 2

Hi, You have chosen to evaluate a recipe

Please enter the url of the recipe (starting with 'https://www.'): https://www.allrecipes.com/recipe/241114/microwave-sweet-potato-chips/
Predicted: ['bad']
Classes: ['bad' 'good']
Probability: [[0.80931439 0.19068561]]
Choose 1 to update prefernces

Choose 2 to evaluate a recipe

Choose 3 to get recipe recommendation

Choose 0 to quit


Choice : 3

Recipe Recommedations 

Title: Peanut Butter Cup Coo