### Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
from collections import Counter
import re
from adam_functions import *

# Preprocessing data

### Filtering based on reviews

In [None]:
recipes_df = pd.read_csv('RAW_recipes.csv')
interactions_df = pd.read_csv('RAW_interactions.csv')

filter_recipes_by_rating(recipes_df, interactions_df, min_num_ratings=3, min_mean_rating=2.5)

### Loading data

In [None]:
df = load_data("filtered_recipes.csv")
df = filter_tags(df,["beverages"])
df.reset_index(drop=True, inplace=True)
df.info()

### Preprocessing for fine-tuning of Llama

In [None]:
create_data_split_for_finetuning(df)

# Ingredients Extraction

### White list — example usage

In [None]:
ingredients_white_list = get_white_list(df)
user_input = ["I have tomatoes, onions, and garlic powder. What can I cook with them?",
                "What dishes can I make with pasta, spinach, and blue cheese?"]
user_input = preprocess_arr(user_input)
extracted_ingredients = white_list_user_input(ingredients_white_list, user_input) 
[print(f"Ingredients: {ingrs}\n") for ingrs in extracted_ingredients]

# Recipe fetching

### Levenstein distance

In [None]:
ingredients_dict = {string: index for index, string in enumerate(ingredients_white_list)}    #chatgpt

user_ingredients = np.array(['garlic', 'onions', 'tomatoes', 'pasta', 'salmon'])
df_food = encode_ingredients_df(df,ingredients_dict)

top_recipes = get_recipes_levenstein(df_food, user_ingredients, ingredients_dict, 2)

print(top_recipes["name"])


# Evaluation

### Ingredients extraction evaluation

In [None]:
with open('user_inputs.json', 'r') as file:
    user_inputs = json.load(file)

with open('user_inputs_ingredients.json', 'r') as file:
    ground_truth = json.load(file)

df_eval = df.iloc[len(user_inputs):]
df.reset_index(drop=True, inplace=True)

white_list_results = white_list(df, user_inputs)

ner_results = extract_ingredients_NER(user_inputs)

llm_results = extractIngredients(user_inputs)
llm_results = llm_results_postprocess(llm_results)

ingredients_lists = [white_list_results, ner_results, llm_results]
methods = ["white_list", "ner", "llm"]
levenstein_results = {"white_list":[], "ner":[], "llm":[]}

for i,ingredients_list in enumerate(ingredients_lists):
    for j,ingredients in enumerate(ingredients_list):
        dist = levenstein_dist(sorted(ingredients),sorted(ground_truth[j]),0)
        levenstein_results[methods[i]].append(dist)

generate_violin_plot(levenstein_results)
