In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import random

class MPNetEntityInjector:
	no_knowledge_prompt = "Please answer this question"
	leading_prompt = "Below are facts in the form of the triple meaningful to answer the questions"
	
	def __init__(self, device=-1):
		self.model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
	
	def sentence_embedding(self, texts: list):
		return self.model.encode(texts)
	
	def top_k_triple_extractor(self, question: np.ndarray, emb_triples: np.ndarray, real_triples, k=10, random=False):
		"""
		Retrieve the top k triples of KGs used as context for the question
	
		:param question: question in form of sentence embeddings
		:param emb_triples: triples in form of sentence embeddings
        :param real_triples: triples list
		:param k: number of triples to retrieve
		:param random: if this is True, retrieve random knowledge
		:return: list of triples
		"""
		# in case number of triples is fewer than k
		if len(emb_triples) < k:
			k = len(emb_triples)
	
		if random:
			return random.sample(real_triples, k)
	
		# if not the baseline but the top k most similar
		similarities = cosine_similarity(question, emb_triples)
		top_k_indices = np.argsort(similarities[0])[-k:][::-1]
	
		return [real_triples[index] for index in top_k_indices]
	
	def injection(self, question: str, triples=None, no_knowledge=False):
		"""
		Create prompt based on question and retrieved triples
	
		:param question: question
		:param triples: list of triples (triples are in string)
		:param no_knowledge: if this is True, only add the no_knowledge_prompt only
		:return:
		"""
		if no_knowledge:
				return f"{MPNetEntityInjector.no_knowledge_prompt} Question: {question} Answer: "
		else:
			return f"{MPNetEntityInjector.leading_prompt} {', '.join(triples)} Question: {question} Answer: "
	
	def __call__(self, question: list, triples: list, k=10, random=False, no_knowledge=False):
		"""
		Retrieve the top k triples of KGs used as context for the question
	
		:param question: 1 question in form [question]
		:param triples: list of triples
		:param k: number of triples to retrieve
		:param random: whether to retrieve random knowledge instead of KAPING
		:param no_knowledge: whether to not use any extra knowledge at all
		:return:
		"""
		if no_knowledge:
			return self.injection(question, no_knowledge)
	
		# use MPNET to turn all into sentence embeddings
		emb_question = self.sentence_embedding(question)
		emb_triples = self.sentence_embedding(triples)
	
		# retrieve the top k triples
		top_k_triples = self.top_k_triple_extractor(emb_question, emb_triples, triples, k=k, random=random)
	
		# create prompt as input
		return self.injection(question, top_k_triples)
        
injector = MPNetEntityInjector(device="cuda")
prompt = injector(question=ingredient_list, triples=triples_list, k=top_k, random=False, no_knowledge=False)
print(prompt)

In [None]:
#input
ingredient_list=["Noodle, Cooked_rice, Paprika"]   # detection한 재료들을 문자열로 표현
triples_list=["(Noodle, belong to, spaghetti)", "(Shrimp, belong to, Shrimp Burger)"] #각각의 트리플을 문자열로 표현한 list
top_k=1   # 상위 몇개의 트리플을 가져올것인지

In [16]:
import ollama, os

In [17]:
modelfile="""
FROM Meta-Llama-3-70B-Instruct.Q3_K_M.gguf
"""

In [18]:
ollama.create(model='llama3:70b', modelfile=modelfile)

ResponseError: pull model manifest: file does not exist

In [None]:
#jsonl data preprocessing
import pandas as pd
import json

#jsonl 파일 경로
file_path = "/home/elicer/ollama/full_dataset_processed.jsonl"

#jsonㅣ 파일 정보
"""
{"id": "0", "text": "Title: No-Bake Nut Cookies\nIngredients: [\"1 c. firmly packed brown sugar\", \"1/2 c. evaporated milk\", \"1/2 tsp. vanilla\", \"1/2 c. broken nuts (pecans)\", \"2 Tbsp. butter or margarine\", \"3 1/2 c. bite size shredded rice biscuits\"]\nDirections: [\"In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine.\", \"Stir over medium heat until mixture bubbles all over top.\", \"Boil and stir 5 minutes more. Take off heat.\", \"Stir in vanilla and cereal; mix well.\", \"Using 2 teaspoons, drop and shape into 30 clusters on wax paper.\", \"Let stand until firm, about 30 minutes.\"]"}
{"id": "1", "text": "Title: Jewell Ball'S Chicken\nIngredients: [\"1 small jar chipped beef, cut up\", \"4 boned chicken breasts\", \"1 can cream of mushroom soup\", \"1 carton sour cream\"]\nDirections: [\"Place chipped beef on bottom of baking dish.\", \"Place chicken on top of beef.\", \"Mix soup and cream together; pour over chicken. Bake, uncovered, at 275\\u00b0 for 3 hours.\"]"}
{"id": "2", "text": "Title: Creamy Corn\nIngredients: [\"2 (16 oz.) pkg. frozen corn\", \"1 (8 oz.) pkg. cream cheese, cubed\", \"1/3 c. butter, cubed\", \"1/2 tsp. garlic powder\", \"1/2 tsp. salt\", \"1/4 tsp. pepper\"]\nDirections: [\"In a slow cooker, combine all ingredients. Cover and cook on low for 4 hours or until heated through and cheese is melted. Stir well before serving. Yields 6 servings.\"]"}
{"id": "3", "text": "Title: Chicken Funny\nIngredients: [\"1 large whole chicken\", \"2 (10 1/2 oz.) cans chicken gravy\", \"1 (10 1/2 oz.) can cream of mushroom soup\", \"1 (6 oz.) box Stove Top stuffing\", \"4 oz. shredded cheese\"]\nDirections: [\"Boil and debone chicken.\", \"Put bite size pieces in average size square casserole dish.\", \"Pour gravy and cream of mushroom soup over chicken; level.\", \"Make stuffing according to instructions on box (do not make too moist).\", \"Put stuffing on top of chicken and gravy; level.\", \"Sprinkle shredded cheese on top and bake at 350\\u00b0 for approximately 20 minutes or until golden and bubbly.\"]"}
{"id": "4", "text": "Title: Reeses Cups(Candy)\nIngredients: [\"1 c. peanut butter\", \"3/4 c. graham cracker crumbs\", \"1 c. melted butter\", \"1 lb. (3 1/2 c.) powdered sugar\", \"1 large pkg. chocolate chips\"]\nDirections: [\"Combine first four ingredients and press in 13 x 9-inch ungreased pan.\", \"Melt chocolate chips and spread over mixture. Refrigerate for about 20 minutes and cut into pieces before chocolate gets hard.\", \"Keep in refrigerator.\"]"}
"""

#jsonl to pandas dataframe 으로 변환
data = []
with open(file_path, 'r', encoding='utf-8') as f:
    for line in f:
        json_obj = json.loads(line)
        # "text" 필드를 파싱
        text_data = json_obj['text']
        title = text_data.split("\n")[0].replace("Title: ", "").strip()
        ingredients = eval(text_data.split("\n")[1].replace("Ingredients: ", "").strip())
        directions = eval(text_data.split("\n")[2].replace("Directions: ", "").strip())

        data.append({
            "id": json_obj["id"],
            "title": title,
            "ingredients": ingredients,
            "directions": directions
        })


#Dataframe 생성
df = pd.DataFrame(data)

# 데이터 확인
print(df.head())

In [None]:
def retrieve_recipes_by_ingredients(df, kaping_results): 
    """"
    df : foodName title, ingredients, directions
    kaping_results : KAPING - top k로 나온 foodName results
    """
    matched_recipes = []

    for _, row in df.iterrows():
        # KAPING으로 나온 완제품 results를 list로 받음.
        if any(kaping_results.lower() in map(str.lower, row['title']) for title in kaping_results):
            matched_recipes.append({
                "title" : row["title"],
                "ingredients" : row["ingredients"],
                "directions" : row["directions"]
            })
    
    return matched_recipes

#Example
user_ingredients = ["Chicken Casserole"]

# 검색된 레시피 출력
matched_recipes = retrieve_recipes_by_ingredients(df, user_ingredients)
print("Matched Recipes:", matched_recipes)


In [None]:
##LLaMA 모델 기반 프롬프트
if matched_recipes:
    prompt = f"""
    Based on your ingredients, here is a matching recipe:

    Title: {matched_recipes['title']}

    Ingredients:
    {', '.join(matched_recipes['ingredients'])}

    Directions:
    {'. '.join(matched_recipes['directions'])}

    Can you summarize this recipe in one sentence?
    """

In [None]:
ollama.generate(model='llama3:70b', prompt=prompt)['response']

In [None]:
import re

##LLaMA 모델 기반 프롬프트
if matched_recipes:
    # 대체 가능한 재료를 포함한 설명 작성 (나중에 코드 보고 수정)
    ingredient_descriptions = [
        f"{ingredient} ({alternative})" if alternative else ingredient
        for ingredient, alternative in matched_recipes.get('alternatives', {}).items()
    ]

    # 기존 레시피에서 중복된 번호가 있으면 제거
    cleaned_directions = [
    re.sub(r'^\d+\.\s*', '', step) for step in matched_recipes['directions']
    ]

    # 프롬프트 생성
    prompt = f"""
    Based on the following ingredients, create a recipe with step-by-step Directions:

    Title: {matched_recipes['title']}

    Ingredients:
    {', '.join(ingredient_descriptions)}

    Directions:
    {chr(10).join([f"{i+1}. {step}" for i, step in enumerate(cleaned_directions)])}

    Tasks:
    1. Begin the response with the recipe title.
    2. List all the 'ingredients' used in this recipe in a single line, including substitutes in parentheses, and specify the exact quantity for each ingredient. Example format: "flour - 2 cups, sugar (honey) - 100g".
    3. Provide the step-by-step instructions for this recipe in a clear and numbered format, where each step starts on a new line.
    4. Write a concise one-sentence 'summary of this recipe' that highlights its main ingredients and the core idea of the preparation process.
       Ensure the summary is detailed enough for someone unfamiliar with this dish to understand and visualize the final result.
    """
