In [1]:
import spacy

# Load spaCy's English model
nlp = spacy.load("en_core_web_sm")

# List of words to ignore (measurements, quantities, descriptors)
stop_words = ['cup', 'cups', 'tablespoon', 'tablespoons', 'teaspoon', 'teaspoons',
              'according', 'taste', 'whole', 'cleaned', 'washed', 'nicely', 'finely',
              'chopped', 'and', 'or', 'diced', 'peeled', 'for', 'optional', 'adjust',
              'made', 'into', 'paste', 'size', 'variety', 'generous', 'pinch', 'small', 'grams', 'inch']

# Function to extract main ingredient names
def extract_main_ingredients(ingredient_string):
    # Process the text with spaCy NLP
    doc = nlp(ingredient_string)
    
    ingredients = []
    current_ingredient = []
    inside_parenthesis = False

    # Loop through tokens and extract only relevant nouns (ignoring stop words)
    for token in doc:
        # Detect parentheses and skip the content inside them
        if token.text == "(":
            inside_parenthesis = True
            continue
        elif token.text == ")":
            inside_parenthesis = False
            continue
        
        # Process the ingredient only if we're not inside parentheses
        if not inside_parenthesis:
            if token.pos_ in ['NOUN', 'PROPN', 'ADJ'] and token.text.lower() not in stop_words and not token.is_digit:
                current_ingredient.append(token.text)
            
            # When encountering punctuation or stop words, finish the ingredient
            elif token.is_punct or token.text.lower() in stop_words:
                if current_ingredient:
                    ingredients.append(" ".join(current_ingredient))
                    current_ingredient = []

    # Append the last ingredient if it exists
    if current_ingredient:
        ingredients.append(" ".join(current_ingredient))

    # Remove duplicates and return ingredients separated by commas
    unique_ingredients = list(dict.fromkeys(ingredients))
    
    return ', '.join(unique_ingredients)

# List of input ingredient strings
ingredient_lists = [
    '4 cups Indian borage (Doddapatre), cleaned and washed 1/2 cup Curd (Dahi / Yogurt) 1 tablespoon Cumin seeds (Jeera) 1/2 teaspoon Whole Black Peppercorns 1/4 cup Dessicated Coconut Salt, to taste 2 tablespoons Ghee',
    '1 Fish, nicely washed with bones on (I used Pompano) For marination 2 tablespoons Curd (Dahi / Yogurt) 1/4 cup Onions, finely chopped 2 Green Chillies, chopped 1 tablespoon Tandoori masala 1 teaspoon Ginger Garlic Paste 1 teaspoon Red Chilli powder 1 teaspoon Garam masala powder 1 teaspoon Coriander Powder (Dhania) 1/2 teaspoon Cumin powder (Jeera) 1 tablespoon Coriander (Dhania) Leaves, finely chopped 1 tablespoon Oil Salt, to taste',
    '300 grams Colocasia root (Arbi) 1 Green Bell Pepper (Capsicum), diced 1 Tomato, chopped 1 inch Ginger, grated 1 teaspoon Coriander Powder (Dhania) 1/2 teaspoon Turmeric powder (Haldi) 1/2 teaspoon Red Chilli powder 1 teaspoon Garam masala powder Salt, according to taste Oil, for cooking ',
    ' 1 cup Mango (Raw) , diced  2 tablespoons Jaggery  1/4 teaspoon Turmeric powder (Haldi)  1/4 cup Tamarind Water Salt , to taste For Ground Masala 1 cup Fresh coconut , grated  2 teaspoons White Urad Dal (Split)  1 teaspoon Chana dal (Bengal Gram Dal)  1/4 teaspoon Methi Seeds (Fenugreek Seeds)  1/2 teaspoon Cumin seeds (Jeera)  2 teaspoons Sesame seeds (Til seeds)  4 Dry Red Chillies , (adjust) For Tempering: 2 teaspoons Coconut Oil  1/4 teaspoon SSP Asafoetida (Hing)  1 teaspoon Mustard seeds (Rai/ Kadugu) Curry leaves , a few ',
    ' 3 Carrots (Gajjar) , grated  1/2 Cabbage (Patta Gobi/ Muttaikose) , finely chopped  1 Onion , finely chopped  4 Green Chillies , slit  1/2 teaspoon Turmeric powder (Haldi)  1 teaspoon Cumin powder (Jeera)  1 tablespoon Oil  1/2 teaspoon Mustard seeds (Rai/ Kadugu)  1/2 cup Fresh coconut , grated  6 Curry leaves Salt , as required ',
    ' 1/2 cup Green Moong Dal (Whole) For the coconut masala 1/2 cup Fresh coconut , grated  2 Dry Red Chilli , lightly roasted  1/4 teaspoon Tamarind Paste , or 1 small gooseberry sized tamarind (remove seeds and fibre if any)  2 teaspoons Coconut Oil  1 teaspoon Mustard seeds (Rai/ Kadugu)  1 sprig Curry leaves Salt , to taste ',
    ' 300 grams Boneless chicken , cut into chunks  4 sprig Curry leaves For the ginger-garlic green chilli paste 2 inch Ginger  6 cloves Garlic  2 Green Chillies For the chicken sholay kebab batter 1 tablespoon Rice flour  3 tablespoons All Purpose Flour (Maida)  1 tablespoon Gram flour (besan)  2 tablespoons Kashmiri Red Chilli Powder Salt , to taste  1 teaspoon Sambar Powder  1 teaspoon Cumin powder (Jeera)  1 teaspoon Coriander Powder (Dhania) Oil , as required, for deep frying ',
    ' 200 grams Paneer (Homemade Cottage Cheese) , cubed  2 Beetroot , peeled and diced  1 Onion , finely chopped  2 Green Chillies , slit  1 inch Ginger , grated  2 cloves Garlic , finely chopped  1 teaspoon Turmeric powder (Haldi)  1 teaspoon Coriander Powder (Dhania)  1 teaspoon Garam masala powder  200 ml Coconut milk  2 teaspoons Oil , for cooking Salt , to taste Red Chilli powder , to taste '
    # Add more ingredients here as needed
]

# Process each ingredient list and print the main ingredients
for ingredients_string in ingredient_lists:
    main_ingredients = extract_main_ingredients(ingredients_string)
    print(main_ingredients)


Indian borage, Curd, Cumin seeds, Black Peppercorns, Coconut Salt, Ghee
Fish, bones, marination, Curd, Onions, Green Chillies, Tandoori masala, Ginger Garlic, Red Chilli powder, Garam masala powder, Coriander Powder, Cumin powder, Coriander Leaves, Oil Salt
Colocasia root Green Bell Pepper, Tomato, Ginger, Coriander Powder, Turmeric powder, Red Chilli powder, Garam masala powder Salt, Oil
Mango, Jaggery, Turmeric powder, Tamarind Water Salt, Ground Masala, Fresh coconut, White Urad Dal, Chana dal, Methi Seeds, Cumin seeds, Sesame seeds Dry Red Chillies, Tempering, Coconut Oil, SSP Asafoetida, Mustard seeds Curry, few
Carrots, Cabbage, Onion, Green Chillies, slit, Turmeric powder, Cumin powder, Oil, Mustard seeds, Fresh coconut, Curry Salt
Green Moong Dal, coconut masala, Fresh coconut, Dry Red Chilli, Tamarind, gooseberry sized tamarind, Coconut Oil, Mustard seeds sprig Curry Salt
Boneless chicken, chunks sprig Curry, ginger, garlic green chilli, Ginger Garlic Green Chillies, chicken s