In [16]:
import re
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
import nltk


nltk.download("punkt")
nltk.download("punkt_tab")

def read_recipes_from_txt(file_path):
    recipes = []
    with open(file_path, "r", encoding="utf-8") as f:
        content = f.read()

    blocks = content.strip().split("\n\n")

    for block in blocks:
        title = re.search(r"#TITLE:\s*(.*)", block)
        ingr = re.search(r"#INGREDIENTS:\s*(.*)", block)
        instr = re.search(r"#INSTRUCTIONS:\s*(.*)", block)

        if title and ingr and instr:
            recipes.append({
                "title": title.group(1).strip(),
                "ingredients": ingr.group(1).strip(),
                "instructions": instr.group(1).strip()
            })

    return pd.DataFrame(recipes)

df = read_recipes_from_txt("Receptai.txt")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [17]:
LEXICON = {
    # Cheese variants
    "parmesan": "cheese",
    "parmigiano": "cheese",
    "mozzarella": "cheese",
    "cheddar": "cheese",
    "gouda": "cheese",
    "feta": "cheese",
    "ricotta": "cheese",
    "cream cheese": "cheese",
    "goat cheese": "cheese",
    "gruyere": "cheese",
    "blue cheese": "cheese",
    "emmental": "cheese",
    "pecorino": "cheese",

    # Tomato products
    "tomato sauce": "tomatoes",
    "tomato paste": "tomatoes",
    "tomato puree": "tomatoes",
    "crushed tomatoes": "tomatoes",
    "diced tomatoes": "tomatoes",
    "cherry tomatoes": "tomatoes",
    "roma tomatoes": "tomatoes",

    # Oils / fats
    "olive oil": "oil",
    "vegetable oil": "oil",
    "sunflower oil": "oil",
    "canola oil": "oil",
    "rapeseed oil": "oil",
    "sesame oil": "oil",
    "butter": "fat",
    "margarine": "fat",
    "lard": "fat",

    # Garlic variants
    "garlic cloves": "garlic",
    "minced garlic": "garlic",
    "garlic powder": "garlic",
    "crushed garlic": "garlic",

    # Onions
    "red onion": "onion",
    "yellow onion": "onion",
    "white onion": "onion",
    "spring onion": "onion",
    "green onion": "onion",
    "shallots": "onion",

    # Sweeteners
    "brown sugar": "sugar",
    "white sugar": "sugar",
    "cane sugar": "sugar",
    "powdered sugar": "sugar",
    "icing sugar": "sugar",
    "honey": "sweetener",
    "maple syrup": "sweetener",

    # Milk products
    "whole milk": "milk",
    "low fat milk": "milk",
    "skim milk": "milk",
    "buttermilk": "milk",
    "heavy cream": "cream",
    "whipping cream": "cream",
    "sour cream": "cream",
    "yogurt": "yogurt",
    "greek yogurt": "yogurt",

    # Meat: chicken
    "chicken breast": "chicken",
    "chicken thighs": "chicken",
    "grilled chicken": "chicken",
    "minced chicken": "chicken",
    "ground chicken": "chicken",

    # Meat: beef
    "ground beef": "beef",
    "minced beef": "beef",
    "beef steak": "beef",
    "beef broth": "beef broth",

    # Meat: pork
    "minced pork": "pork",
    "ground pork": "pork",
    "pork chop": "pork",

    # Meat: fish
    "salmon fillet": "salmon",
    "smoked salmon": "salmon",
    "cod fillet": "cod",
    "tuna steak": "tuna",
    "canned tuna": "tuna",

    # Pasta & grains
    "spaghetti": "pasta",
    "penne": "pasta",
    "fusilli": "pasta",
    "macaroni": "pasta",
    "rice noodles": "noodles",
    "egg noodles": "noodles",

    "brown rice": "rice",
    "white rice": "rice",
    "basmati rice": "rice",
    "jasmine rice": "rice",

    # Herbs
    "fresh basil": "basil",
    "dried basil": "basil",
    "fresh parsley": "parsley",
    "dried parsley": "parsley",
    "fresh dill": "dill",
    "fresh mint": "mint",
    "rosemary": "rosemary",
    "thyme": "thyme",
    "oregano": "oregano",

    # Spices
    "black pepper": "pepper",
    "ground pepper": "pepper",
    "chili flakes": "chili",
    "red pepper flakes": "chili",
    "paprika powder": "paprika",
    "smoked paprika": "paprika",
    "cayenne pepper": "cayenne",
    "turmeric": "turmeric",
    "cumin": "cumin",
    "curry powder": "curry",

    # Bread products
    "white bread": "bread",
    "whole grain bread": "bread",
    "baguette": "bread",
    "breadcrumbs": "bread",

    # Eggs
    "egg yolk": "eggs",
    "egg white": "eggs",

    # Broths
    "vegetable broth": "broth",
    "chicken broth": "broth",
    "beef broth": "broth",

    # Misc
    "soy sauce": "soy sauce",
    "fish sauce": "fish sauce",
    "worcestershire sauce": "worcestershire",
    "balsamic vinegar": "vinegar",
    "white vinegar": "vinegar",
    "apple cider vinegar": "vinegar",
    "lemon juice": "lemon",
    "lime juice": "lime"
}

def normalize(text):
    if not isinstance(text, str):
        return ""

    text = text.lower()

    for k, v in LEXICON.items():
        text = text.replace(k, v)

    text = re.sub(r"[^a-ząčęėįšųūž, ]", " ", text)

    tokens = nltk.word_tokenize(text)

    return " ".join(tokens)


df["ing_list"] = df["ingredients"].apply(
    lambda x: [normalize(i.strip()) for i in x.split(",")]
)


In [18]:
tfidf = TfidfVectorizer()
X_tfidf = tfidf.fit_transform(df["ingredients"])

In [19]:


def ingredient_coverage(user_ings, recipe_ings):
    user_set = set([normalize(i) for i in user_ings])
    rec_set = set([normalize(i) for i in recipe_ings])

    matches = user_set & rec_set
    missing = rec_set - matches

    return len(matches), len(rec_set), matches, missing

def recommend_recipes(user_input, top_k=5):
    user_list = [i.strip() for i in user_input.split(",")]
    user_norm = normalize(user_input)

    tfidf_vec = tfidf.transform([user_norm])
    sim_tfidf = cosine_similarity(tfidf_vec, X_tfidf).flatten()

    combined_score = sim_tfidf

    results = []
    for i, score in enumerate(combined_score):
        matched, total, overlap, missing = ingredient_coverage(user_list, df.iloc[i]["ing_list"])
        coverage = matched / total

        results.append({
            "title": df.iloc[i]["title"],
            "ingredients": df.iloc[i]["ingredients"],
            "instructions": df.iloc[i]["instructions"],
            "score": score,
            "coverage": coverage,
            "missing": total - matched,
            "overlap": overlap,
            "missing_list": missing
        })

    results = sorted(results, key=lambda x: (x["coverage"], x["score"]), reverse=True)
    return results[:top_k]

In [23]:
print("MAISTO RECEPTŲ GENERATORIUS")
print("--------------------------------------------")

user = input("Įvesk produktus(pvz: eggs, cheese, milk, rice): ")

results = recommend_recipes(user)

print("\nTOP RECEPTAI:\n")

for r in results:
    print(f"  ~~~~{r['title']}~~~~")
    print(f"  Panašumas: {r['score']:.3f}")
    print(f"  Padengimas (coverage): {r['coverage']*100:.0f}%")
    print(f"  Reikalingi ingredientai: {r['ingredients']}")
    print(f"  Instrukcijos: {r['instructions']}")
    print(f"  Turi: {r['overlap']}")
    print(f"  Trūksta: {r['missing']}")
    print(f"  Trūksta: {', '.join(r['missing_list']) if r['missing_list'] else 'Nieko netrūksta!'}")
    print("--------------------------------------------\n")

# pvz: pecorino, tomatoes, olive oil

MAISTO RECEPTŲ GENERATORIUS
--------------------------------------------
Įvesk produktus(pvz: eggs, cheese, milk, rice): salt, pepper, chicken, bell pepper, cheese, butter, oil, eggs, milk

TOP RECEPTAI:

  ~~~~Omletas~~~~
  Panašumas: 0.665
  Padengimas (coverage): 100%
  Reikalingi ingredientai: eggs, milk, salt, pepper, oil
  Instrukcijos: Beat eggs with milk, fry until firm.
  Turi: {'milk', 'pepper', 'eggs', 'oil', 'salt'}
  Trūksta: 0
  Trūksta: Nieko netrūksta!
--------------------------------------------

  ~~~~Kiaušinienė~~~~
  Panašumas: 0.568
  Padengimas (coverage): 100%
  Reikalingi ingredientai: eggs, salt, pepper, oil
  Instrukcijos: Beat eggs, fry in oil, season.
  Turi: {'eggs', 'salt', 'pepper', 'oil'}
  Trūksta: 0
  Trūksta: Nieko netrūksta!
--------------------------------------------

  ~~~~Virtos kiaušinių pusryčiai~~~~
  Panašumas: 0.329
  Padengimas (coverage): 100%
  Reikalingi ingredientai: eggs, salt
  Instrukcijos: Boil eggs for 6–8 minutes, peel and serve.
