In [5]:
import pandas as pd
import pickle
import os
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# –ü—É—Ç–∏ –¥–ª—è —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è
EMBEDDINGS_FILE = 'recipe_embeddings.pkl'
RECIPES_DATA_FILE = 'recipes_data.pkl'

# 1. –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö
df = pd.read_csv('cleaned_eda_all_recipes.csv')

# 2. –§–æ—Ä–º–∏—Ä—É–µ–º "–≤–∫—É—Å–æ–≤–æ–π –ø—Ä–æ—Ñ–∏–ª—å"
if 'ingridient_keywords' in df.columns:
    df['flavor_profile'] = df['ingridient_keywords'].str.replace(r"[{}']", "", regex=True)
elif 'ingredient_keywords' in df.columns:
    df['flavor_profile'] = df['ingredient_keywords'].str.replace(r"[{}']", "", regex=True)
else:
    df['flavor_profile'] = df['list_ingrid'].str.replace(r'[^\w\s]', '', regex=True)

# 3. –ü—Ä–æ–≤–µ—Ä—è–µ–º, –µ—Å—Ç—å –ª–∏ —É–∂–µ —Å–æ—Ö—Ä–∞–Ω—ë–Ω–Ω—ã–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–∏
if os.path.exists(EMBEDDINGS_FILE) and os.path.exists(RECIPES_DATA_FILE):
    print("üîÅ –ó–∞–≥—Ä—É–∂–∞–µ–º —Å–æ—Ö—Ä–∞–Ω—ë–Ω–Ω—ã–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–∏ –∏ –º–æ–¥–µ–ª—å...")
    with open(EMBEDDINGS_FILE, 'rb') as f:
        model, recipe_embeddings = pickle.load(f)
else:
    print("üß† –û–±—É—á–∞–µ–º –º–æ–¥–µ–ª—å –∏ –≤–µ–∫—Ç–æ—Ä–∏–∑—É–µ–º —Ä–µ—Ü–µ–ø—Ç—ã...")
    model = SentenceTransformer('sentence-transformers/distiluse-base-multilingual-cased-v2')
    recipe_embeddings = model.encode(df['flavor_profile'].tolist(), show_progress_bar=True)

    # –°–æ—Ö—Ä–∞–Ω—è–µ–º –¥–ª—è —Å–ª–µ–¥—É—é—â–∏—Ö –∑–∞–ø—É—Å–∫–æ–≤
    with open(EMBEDDINGS_FILE, 'wb') as f:
        pickle.dump((model, recipe_embeddings), f)

    # –°–æ—Ö—Ä–∞–Ω—è–µ–º —Ç–æ–ª—å–∫–æ –Ω—É–∂–Ω—ã–µ –¥–∞–Ω–Ω—ã–µ
    df[['name', 'list_ingrid', 'list_resipe']].to_pickle(RECIPES_DATA_FILE)

# 4. –§—É–Ω–∫—Ü–∏—è –ø–æ–∏—Å–∫–∞
def find_recipes(query, top_n=5):
    query_emb = model.encode([query])
    scores = cosine_similarity(query_emb, recipe_embeddings).flatten()
    idxs = scores.argsort()[-top_n:][::-1]
    return df.iloc[idxs][['name', 'list_ingrid', 'list_resipe']]

# 5. –ü—Ä–∏–º–µ—Ä –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è
if __name__ == "__main__":
    user_query = input("–í–≤–µ–¥–∏—Ç–µ –≤–∞—à–∏ –≤–∫—É—Å–æ–≤—ã–µ –ø—Ä–µ–¥–ø–æ—á—Ç–µ–Ω–∏—è: ")
    print("\nüîé –ü–æ–¥–±–∏—Ä–∞–µ–º —Ä–µ—Ü–µ–ø—Ç—ã...\n")

    results = find_recipes(user_query)
    for i, row in results.iterrows():
        print(f"üçΩÔ∏è –ù–∞–∑–≤–∞–Ω–∏–µ: {row['name']}")
        print(f"üõí –ò–Ω–≥—Ä–µ–¥–∏–µ–Ω—Ç—ã: {row['list_ingrid']}")
        print(f"üìù –†–µ—Ü–µ–ø—Ç: {row['list_resipe']}")
        print("-" * 60)

üîÅ –ó–∞–≥—Ä—É–∂–∞–µ–º —Å–æ—Ö—Ä–∞–Ω—ë–Ω–Ω—ã–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–∏ –∏ –º–æ–¥–µ–ª—å...

üîé –ü–æ–¥–±–∏—Ä–∞–µ–º —Ä–µ—Ü–µ–ø—Ç—ã...

üçΩÔ∏è –ù–∞–∑–≤–∞–Ω–∏–µ: –í–∞—Ä–µ–Ω—ã–π –∫—Ä–∞–±
üõí –ò–Ω–≥—Ä–µ–¥–∏–µ–Ω—Ç—ã: –ö—Ä–∞–±—ã 
üìù –†–µ—Ü–µ–ø—Ç: 1. –ü–æ—Å—Ç–∞–≤—å—Ç–µ –Ω–∞ –æ–≥–æ–Ω—å –æ–≥—Ä–æ–º–Ω—É—é –∫–∞—Å—Ç—Ä—é–ª—é —Å —Å–æ–ª–µ–Ω–æ–π –≤–æ–¥–æ–π. –¢–∞–∫ –±—ã—Å—Ç—Ä–æ, –∫–∞–∫ –≤—ã —Å–º–æ–∂–µ—Ç–µ, –æ–ø—É—Å—Ç–∏—Ç–µ –≤—Å–µ—Ö –∫—Ä–∞–±–æ–≤ –≤ –≤–æ–¥—É. –£–¥–æ–±–Ω–µ–µ –≤—Å–µ–≥–æ –≤–∞—Ä–∏—Ç—å –∫—Ä–∞–±–æ–≤ –≤ —Å–µ—Ç–∫–µ, –∏–ª–∏ –≤—ã –º–æ–∂–µ—Ç–µ –≤–æ—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å—Å—è —â–∏–ø—Ü–∞–º–∏.2. –ó–∞–∫—Ä–æ–π—Ç–µ –∫–∞—Å—Ç—Ä—é–ª—é –Ω–µ–º–µ–¥–ª–µ–Ω–Ω–æ. –û—Å—Ç–∞–≤—å—Ç–µ –∫—Ä–∞–±–æ–≤ –≤–∞—Ä–∏—Ç—å—Å—è –Ω–∞ 8 –º–∏–Ω—É—Ç.3. –î–æ—Å—Ç–∞–Ω—å—Ç–µ –∫—Ä–∞–±–æ–≤ –∏–∑ –∫–∞—Å—Ç—Ä—é–ª–∏ –∏ –ø–æ–º–µ—Å—Ç–∏—Ç–µ –≤ —Ö–æ–ª–æ–¥–∏–ª—å–Ω–∏–∫ –Ω–∞ –Ω–µ—Å–∫–æ–ª—å–∫–æ —á–∞—Å–æ–≤ (–µ—Å–ª–∏ –≤—ã –ø—Ä–µ–¥–ø–æ—á–∏—Ç–∞–µ—Ç–µ –Ω–µ –æ—á–µ–Ω—å —Ö–æ–ª–æ–¥–Ω—ã—Ö) –∏–ª–∏ –Ω–∞ –≤—Å—é –Ω–æ—á—å (–µ—Å–ª–∏ –≤–∞–º –Ω—Ä–∞–≤–∏—Ç—Å—è —Ö–æ–ª–æ–¥–Ω–æ–µ