In [51]:
import pandas as pd
import spacy
import re
from IPython.display import display

In [61]:
nlp = spacy.load('en_core_web_sm')

In [62]:
df_nl = pd.read_csv('../data/mock_user_inputs.csv')
df_nl.head()

Unnamed: 0,user_input
0,For a a young person dude trying to gain muscl...
1,What would be a good meal for a senior citizen...
2,What would be a good meal for a young adult ge...
3,Can you suggest something for a in their 40s man?
4,"I am young adult and a lady, what meal would b..."


In [63]:
gender_keywords = ["man", "woman", "boy", "girl", "male", "female", "gentleman", "lady", "dude"]
age_number_pattern = r'\b(\d{1,3})\s?(?:years?|yrs?|y/o|old)?\b|\b(\d{2})s\b'
age_description_pattern = r'\b(?:young|teen|teenager|adult|middle-aged|senior|elderly|child|baby|infant)\b'

In [64]:
def extract_ner(text):
    doc = nlp(text)

    genders = [token.text for token in doc if token.text.lower() in gender_keywords]
    numerical_ages = [int(match.group(1) or match.group(2)) for match in re.finditer(age_number_pattern, text)]
    descriptive_ages = [match.group(0) for match in re.finditer(age_description_pattern, text)]

    age_numeric = numerical_ages[0] if numerical_ages else None
    age_descriptive = descriptive_ages[0] if descriptive_ages else None

    return {
        "gender_terms": genders,
        "age_numeric": age_numeric,
        "age_descriptive": age_descriptive
    }

In [67]:
def calculate_meal_calories(age, gender):
    male_keywords = {"man", "boy", "male", "gentleman", "dude"}
    female_keywords = {"woman", "girl", "female", "lady"}

    gender = gender.lower()
    if gender in male_keywords:
        weight = 85
        height = 178
        multiplier = 1.55
        lower_factor = 0.9
        upper_factor = 1.15
        gender_offset = 5
    elif gender in female_keywords:
        weight = 70
        height = 170
        multiplier = 1.45
        lower_factor = 0.85
        upper_factor = 1.08
        gender_offset = -161
    else:
        raise ValueError("Gender value not supported in list!")

    BMR = (10 * weight) + (6.25 * height) - (5 * age) + gender_offset
    TDEE = BMR * multiplier
    meal_calories = TDEE * 0.40

    lower_calories = meal_calories * lower_factor
    upper_calories = meal_calories * upper_factor

    return round(lower_calories), round(upper_calories)

In [65]:
df_nl["gender_terms"] = df_nl["user_input"].apply(lambda text: extract_ner(text)["gender_terms"])
df_nl["age_numeric"] = df_nl["user_input"].apply(lambda text: extract_ner(text)["age_numeric"])
df_nl["age_descriptive"] = df_nl["user_input"].apply(lambda text: extract_ner(text)["age_descriptive"])

In [66]:
df_nl

Unnamed: 0,user_input,gender_terms,age_numeric,age_descriptive
0,For a a young person dude trying to gain muscl...,[dude],,young
1,What would be a good meal for a senior citizen...,[man],,senior
2,What would be a good meal for a young adult ge...,[gentleman],,young
3,Can you suggest something for a in their 40s man?,[man],40.0,
4,"I am young adult and a lady, what meal would b...",[lady],,young
...,...,...,...,...
495,"As a middle-aged guy, I prefer meals high in p...",[],,middle-aged
496,If I am a teenager gentleman and trying to los...,[gentleman],,teenager
497,"I am a child dude, how much protein should I c...",[dude],,child
498,"Hey, I'm a 30 years old dude, can you help me ...",[dude],30.0,


In [21]:
texts = [
    "He is a 25-year-old man.",
    "A teenage girl won the competition.",
    "An elderly woman of 70 years attended the event.",
    "A 30-year-old professional was featured in the magazine."
]

df_text = pd.DataFrame({"text": texts})
df_text["keywords"] = df_text["text"].apply(extract_ner)

In [22]:
df_text.tail()

Unnamed: 0,text,keywords
0,He is a 25-year-old man.,"{'gender_terms': ['man'], 'age_terms': ['25-ye..."
1,A teenage girl won the competition.,"{'gender_terms': ['girl'], 'age_terms': []}"
2,An elderly woman of 70 years attended the event.,"{'gender_terms': ['woman'], 'age_terms': ['70 ..."
3,A 30-year-old professional was featured in the...,"{'gender_terms': [], 'age_terms': ['30-year-ol..."
