In [None]:
# Cell 0
!pip install -q sentence-transformers faiss-cpu openpyxl pandas scikit-learn

# If you want GPU and plan to fine-tune, use the GPU runtime (Runtime > Change runtime type)
# and install faiss-gpu instead: !pip install faiss-gpu

import os, re, math
import pandas as pd
from pathlib import Path
from sentence_transformers import SentenceTransformer, util
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Edit these paths if needed (or mount Drive)
input_path1 = "/content/Asthma_Meal_Plans.xlsx"   # diabetes meal dataset
input_path2 = "/content/yoga_dataset.xlsx"          # yoga dataset
output_reco_path = "meal_to_yoga_top1_contrastive_Asthma.xlsx"

print("libraries loaded")


libraries loaded


In [None]:
# Cell 1

df_meal = pd.read_excel(input_path1, engine="openpyxl")
df_yoga = pd.read_excel(input_path2, engine="openpyxl")

print("Meal columns:", df_meal.columns.tolist())
print("Yoga columns:", df_yoga.columns.tolist())
print("Meal rows:", len(df_meal), "Yoga rows:", len(df_yoga))

# show sample rows
display(df_meal.head(2))
display(df_yoga.head(2))

Meal columns: ['Plan', 'Meal_Type', 'Meal_Day', 'Meal_Name', 'Meal_Description', 'Calories (kcal)', 'Total_Fat (g)', 'Protein (g)', 'Carbohydrates (g)', 'Dietary_Fiber (g)', 'Added_Sugar (g)', 'Vitamin_C (mg)', 'Vitamin_E (mg)', 'Vitamin_D (µg)', 'Magnesium (mg)', 'Selenium (µg)', 'Omega-3 (ALA, g)', 'EPA + DHA (mg)', 'Zinc (mg)']
Yoga columns: ['Plan', 'Disease', 'Day', 'Time', 'Yoga (Detailed)', 'Exercise (Detailed)', 'Precautions (Detailed)']
Meal rows: 42 Yoga rows: 126


Unnamed: 0,Plan,Meal_Type,Meal_Day,Meal_Name,Meal_Description,Calories (kcal),Total_Fat (g),Protein (g),Carbohydrates (g),Dietary_Fiber (g),Added_Sugar (g),Vitamin_C (mg),Vitamin_E (mg),Vitamin_D (µg),Magnesium (mg),Selenium (µg),"Omega-3 (ALA, g)",EPA + DHA (mg),Zinc (mg)
0,Plan A,Breakfast,Monday,Oatmeal with chia and berries,"Cooked oats with chia seeds and mixed berries,...",608.3,17.6,26.3,53.8,14.7,1.4,37.2,3.1,2.7,101.0,15.3,0.44,148.8,4.5
1,Plan A,Lunch,Monday,Grilled salmon with quinoa and spinach,"Salmon fillet with quinoa, spinach, and a sque...",530.9,18.5,27.3,79.5,12.2,2.8,26.5,4.7,1.7,147.7,23.5,0.37,133.3,3.5


Unnamed: 0,Plan,Disease,Day,Time,Yoga (Detailed),Exercise (Detailed),Precautions (Detailed)
0,Plan A,Asthma,Monday,Morning,"For Monday Morning, perform Sukhasana with dia...",Engage in 25–30 minutes of indoor brisk walkin...,Avoid exposure to dust or cold air. Ensure the...
1,Plan A,Asthma,Monday,Afternoon,"For Monday Afternoon, perform Sukhasana with d...",Engage in 25–30 minutes of indoor brisk walkin...,Avoid exposure to dust or cold air. Ensure the...


In [None]:
# Cell 2
# robust day extractor
days_pattern = r'\b(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday|Mon|Tue|Tues|Wed|Thu|Thur|Fri|Sat|Sun|Day\s*\d{1,2})\b'

def extract_day_from_meal_day(x):
    if pd.isna(x): return pd.NA
    s = str(x)
    m = re.search(days_pattern, s, flags=re.IGNORECASE)
    if m:
        tok = m.group(1)
        if re.match(r'(?i)^day\s*\d', tok):
            return re.sub(r'\s+',' ', tok.title())
        return tok.title()
    return pd.NA

def mealtype_to_time(mt):
    if pd.isna(mt): return pd.NA
    s = str(mt).strip().lower()
    if any(k in s for k in ['breakfast','morning']):
        return 'Morning'
    if any(k in s for k in ['lunch','afternoon']):
        return 'Afternoon'
    if any(k in s for k in ['dinner','supper','evening']):
        return 'Evening'
    if any(k in s for k in ['snack']):
        return 'Anytime'
    return str(mt).title()

# Meal dataset: Day, Time
df_meal['Day'] = df_meal['Meal_Day'].apply(extract_day_from_meal_day)
df_meal['Time'] = df_meal['Meal_Type'].apply(mealtype_to_time)

# Yoga dataset normalization
df_yoga['Day'] = df_yoga['Day'].astype(str).str.strip().replace({'nan': pd.NA})
df_yoga['Time'] = df_yoga['Time'].astype(str).str.strip().replace({'nan': pd.NA})

# Standardize a few common tokens
df_yoga['Day'] = df_yoga['Day'].replace({r'^(mon|monday)$': 'Monday', r'^(tue|tues|tuesday)$': 'Tuesday'}, regex=True)

# Filter for asthma only (for recommendations)
df_yoga_asthma = df_yoga[df_yoga['Disease'].astype(str).str.lower().str.contains('asthma', na=False)].copy()

print("After filter: Yoga (diabetes) rows:", len(df_yoga_asthma))
display(df_meal[['Meal_Day','Day','Meal_Type','Time']].head(4))
display(df_yoga_asthma[['Disease','Day','Time']].head(4))


After filter: Yoga (diabetes) rows: 42


Unnamed: 0,Meal_Day,Day,Meal_Type,Time
0,Monday,Monday,Breakfast,Morning
1,Monday,Monday,Lunch,Afternoon
2,Monday,Monday,Dinner,Evening
3,Tuesday,Tuesday,Breakfast,Morning


Unnamed: 0,Disease,Day,Time
0,Asthma,Monday,Morning
1,Asthma,Monday,Afternoon
2,Asthma,Monday,Evening
3,Asthma,Tuesday,Morning


In [None]:
df_yoga_filtered = df_yoga_asthma

In [None]:
# Cell 3
# Cell 3 (edit)
nutrient_cols = [
    'Calories (kcal)','Total_Fat (g)','Protein (g)','Carbohydrates (g)',
    'Dietary_Fiber (g)','Added_Sugar (g)',
    'Vitamin_C (mg)','Vitamin_E (mg)','Vitamin_D (µg)',
    'Magnesium (mg)','Selenium (µg)',
    'Omega-3 (ALA, g)','EPA + DHA (mg)','Zinc (mg)'
]


def meal_context_text(row):
    parts = []
    if pd.notna(row.get('Plan')): parts.append(f"Plan: {row['Plan']}")
    if pd.notna(row.get('Day')): parts.append(f"Day: {row['Day']}")
    if pd.notna(row.get('Time')): parts.append(f"Time: {row['Time']}")
    if pd.notna(row.get('Meal_Name')): parts.append(f"Meal: {row['Meal_Name']}")
    if pd.notna(row.get('Meal_Description')): parts.append(f"Desc: {row['Meal_Description']}")
    # include short nutrient summary (only include non-null)
    nut_list = []
    for c in nutrient_cols:
        if c in row and pd.notna(row[c]):
            nut_list.append(f"{c.split('(')[0].strip()}:{row[c]}")
    if nut_list:
        parts.append("Nutrition: " + ", ".join(nut_list))
    return " | ".join(parts)

def yoga_context_text(row):
    parts = []
    if pd.notna(row.get('Plan')): parts.append(f"Plan: {row['Plan']}")
    if pd.notna(row.get('Day')): parts.append(f"Day: {row['Day']}")
    if pd.notna(row.get('Time')): parts.append(f"Time: {row['Time']}")
    if pd.notna(row.get('Yoga (Detailed)')): parts.append(f"Yoga: {row['Yoga (Detailed)']}")
    if pd.notna(row.get('Exercise (Detailed)')): parts.append(f"Exercise: {row['Exercise (Detailed)']}")
    if pd.notna(row.get('Precautions (Detailed)')): parts.append(f"Precautions: {row['Precautions (Detailed)']}")
    return " | ".join(parts)

df_meal['context_text'] = df_meal.apply(meal_context_text, axis=1)
df_yoga_filtered['context_text'] = df_yoga_filtered.apply(yoga_context_text, axis=1)

# keep original indices for traceability
df_meal = df_meal.reset_index(drop=False).rename(columns={'index':'meal_orig_index'})
df_yoga_filtered = df_yoga_filtered.reset_index(drop=False).rename(columns={'index':'yoga_orig_index'})

print("Sample meal context:")
display(df_meal[['meal_orig_index','Meal_Name','context_text']].head(3))
print("Sample yoga context:")
display(df_yoga_filtered[['yoga_orig_index','Yoga (Detailed)','context_text']].head(3))


Sample meal context:


Unnamed: 0,meal_orig_index,Meal_Name,context_text
0,0,Oatmeal with chia and berries,Plan: Plan A | Day: Monday | Time: Morning | M...
1,1,Grilled salmon with quinoa and spinach,Plan: Plan A | Day: Monday | Time: Afternoon |...
2,2,Baked cod with roasted vegetables,Plan: Plan A | Day: Monday | Time: Evening | M...


Sample yoga context:


Unnamed: 0,yoga_orig_index,Yoga (Detailed),context_text
0,0,"For Monday Morning, perform Sukhasana with dia...",Plan: Plan A | Day: Monday | Time: Morning | Y...
1,1,"For Monday Afternoon, perform Sukhasana with d...",Plan: Plan A | Day: Monday | Time: Afternoon |...
2,2,"For Monday Evening, perform Sukhasana with dia...",Plan: Plan A | Day: Monday | Time: Evening | Y...


In [None]:
# Cell 4
def same_day_time(mday, yday, mtime, ytime):
    # conservatively require exact match (case-insensitive), but allow NA as wildcard
    def eq(a,b):
        if pd.isna(a) or pd.isna(b): return False
        return str(a).strip().lower() == str(b).strip().lower()
    return eq(mday,yday) and eq(mtime,ytime)

# Build candidate pos pairs by Day+Time equality
pos_pairs = []
for _, mrow in df_meal.iterrows():
    mday, mtime = mrow['Day'], mrow['Time']
    # find yoga rows same day & time
    candidates = df_yoga_filtered[df_yoga_filtered.apply(lambda r: same_day_time(mday, r['Day'], mtime, r['Time']), axis=1)]
    for _, yrow in candidates.iterrows():
        pos_pairs.append((mrow['context_text'], yrow['context_text'], mrow['meal_orig_index'], yrow['yoga_orig_index']))

print("Positive pairs found:", len(pos_pairs))
# show examples
pos_pairs[:3]


Positive pairs found: 84


[('Plan: Plan A | Day: Monday | Time: Morning | Meal: Oatmeal with chia and berries | Desc: Cooked oats with chia seeds and mixed berries, topped with a sprinkle of almonds. | Nutrition: Calories:608.3, Total_Fat:17.6, Protein:26.3, Carbohydrates:53.8, Dietary_Fiber:14.7, Added_Sugar:1.4, Vitamin_C:37.2, Vitamin_E:3.1, Vitamin_D:2.7, Magnesium:101.0, Selenium:15.3, Omega-3:0.44, EPA + DHA:148.8, Zinc:4.5',
  'Plan: Plan A | Day: Monday | Time: Morning | Yoga: For Monday Morning, perform Sukhasana with diaphragmatic breathing for 10 minutes followed by Anulom Vilom for 7 cycles. This promotes pulmonary expansion and autonomic stability. | Exercise: Engage in 25–30 minutes of indoor brisk walking or treadmill walking at 3–4 km/h, maintaining steady breathing. End with upper body flexibility stretches for 5 minutes. | Precautions: Avoid exposure to dust or cold air. Ensure the exercise environment is well-ventilated. Maintain hydration before and after the session.',
  0,
  0),
 ('Plan: P

In [None]:
# Cell 5
from sentence_transformers import InputExample

# Tune these params
model_name = "all-mpnet-base-v2"   # base model; can swap
max_pairs_for_training = 20000     # cap to avoid huge training; adjust
batch_size = 16
num_epochs = 2
warmup_steps = 100

# Create InputExample list (texts=[anchor, positive])
train_examples = []
for i,(mtext, ytext, midx, yidx) in enumerate(pos_pairs):
    if i >= max_pairs_for_training: break
    if not mtext or not ytext: continue
    train_examples.append(InputExample(texts=[mtext, ytext]))

print("Train examples:", len(train_examples))
if len(train_examples) < 20:
    print("WARNING: fewer than 20 positive pairs — contrastive fine-tuning will be limited. Consider relaxing positive rules or augmenting data.")


Train examples: 84


In [None]:
# Cell 6
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, losses, evaluation

# Load model (two-tower pooling is default)
model = SentenceTransformer(model_name)

if len(train_examples) >= 2:
    train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=batch_size)
    train_loss = losses.MultipleNegativesRankingLoss(model)
    # optional: small evaluator can be added if you have dev set
    model.fit(train_objectives=[(train_dataloader, train_loss)],
              epochs=num_epochs,
              warmup_steps=warmup_steps,
              show_progress_bar=True)
    # save fine-tuned model
    ft_model_path = "sbert_meal_yoga_contrastive"
    model.save(ft_model_path)
    print("Saved fine-tuned model to", ft_model_path)
else:
    print("Skipping fine-tuning due to insufficient pairs; using base model.")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkshitizsikriwal16[0m ([33mkshitizsikriwal16-central-university-of-haryana[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss


Saved fine-tuned model to sbert_meal_yoga_contrastive


In [None]:
# Cell 7
# If fine-tuned, reload it to be explicit
if os.path.exists("sbert_meal_yoga_contrastive"):
    model = SentenceTransformer("sbert_meal_yoga_contrastive")
else:
    model = SentenceTransformer(model_name)

meal_texts = df_meal['context_text'].fillna('').tolist()
yoga_texts = df_yoga_filtered['context_text'].fillna('').tolist()

# Normalize embeddings for cosine similarity (set normalize_embeddings=True)
meal_embeddings = model.encode(meal_texts, batch_size=32, show_progress_bar=True, convert_to_numpy=True, normalize_embeddings=True)
yoga_embeddings = model.encode(yoga_texts, batch_size=32, show_progress_bar=True, convert_to_numpy=True, normalize_embeddings=True)

print("Emb shapes:", meal_embeddings.shape, yoga_embeddings.shape)


In [None]:
# Cell 8
K = 5  # compute top-K internally (we will take top-1), K>1 helps in case top-1 violates business filter
nn = NearestNeighbors(n_neighbors=K, metric='cosine').fit(yoga_embeddings)
distances, indices = nn.kneighbors(meal_embeddings)  # distances are cosine distances in [0,2]
# similarity = 1 - distance (since sklearn cosine distance = 1 - cosine_similarity)
sims = 1 - distances

results = []
for meal_idx in range(len(meal_texts)):
    # iterate candidate yoga matches sorted by similarity
    selected = None
    for rank in range(K):
        y_idx = indices[meal_idx, rank]
        sim_score = float(sims[meal_idx, rank])
        # optional business filters: require same Day and Time OR allow if missing
        mday = df_meal.loc[meal_idx, 'Day']
        mtime = df_meal.loc[meal_idx, 'Time']
        yday = df_yoga_filtered.loc[y_idx, 'Day']
        ytime = df_yoga_filtered.loc[y_idx, 'Time']
        # decide acceptance rule:
        # Accept if (Day matches OR Day is NA in either) AND (Time matches OR Time is NA in either)
        def eq_or_na(a,b):
            if pd.isna(a) or pd.isna(b): return True
            return str(a).strip().lower() == str(b).strip().lower()
        if eq_or_na(mday, yday) and eq_or_na(mtime, ytime):
            selected = (y_idx, sim_score)
            break
    # fallback: if still None, choose top candidate regardless
    if selected is None:
        y_idx = indices[meal_idx, 0]
        selected = (y_idx, float(sims[meal_idx, 0]))
    y_idx, sim_score = selected
    # collect merged info
    meal_row = df_meal.loc[meal_idx]
    y_row = df_yoga_filtered.loc[y_idx]
    merged = {
        'meal_index': int(meal_row['meal_orig_index']),
        'meal_plan': meal_row.get('Plan'),
        'meal_day': meal_row.get('Day'),
        'meal_time': meal_row.get('Time'),
        'meal_name': meal_row.get('Meal_Name'),
        'meal_description': meal_row.get('Meal_Description'),
        # include nutrients (if exist)
    }
    # attach nutrients
    for c in nutrient_cols:
        merged[c] = meal_row[c] if c in meal_row else np.nan
    # attach yoga fields
    merged.update({
        'yoga_index': int(y_row['yoga_orig_index']),
        'yoga_plan': y_row.get('Plan'),
        'yoga_day': y_row.get('Day'),
        'yoga_time': y_row.get('Time'),
        'yoga_detailed': y_row.get('Yoga (Detailed)'),
        'yoga_exercise': y_row.get('Exercise (Detailed)'),
        'yoga_precautions': y_row.get('Precautions (Detailed)'),
        'similarity': sim_score
    })
    results.append(merged)

df_final = pd.DataFrame(results)
df_final.to_excel(output_reco_path, index=False, engine="openpyxl")
print("Saved final mapping to", output_reco_path)
display(df_final.head(8))


Saved final mapping to meal_to_yoga_top1_contrastive_Asthma.xlsx


Unnamed: 0,meal_index,meal_plan,meal_day,meal_time,meal_name,meal_description,Calories (kcal),Total_Fat (g),Protein (g),Carbohydrates (g),...,EPA + DHA (mg),Zinc (mg),yoga_index,yoga_plan,yoga_day,yoga_time,yoga_detailed,yoga_exercise,yoga_precautions,similarity
0,0,Plan A,Monday,Morning,Oatmeal & Berries,"Steel-cut oats with almonds, flaxseed, and mix...",450,15,15,70,...,0,2.0,69,Plan B,Wednesday,Morning,Gentle Viniyasa flow: Sukhasana to Bitilasana-...,30 minutes of treadmill walking at a steady pa...,The flow should be smooth and breath-initiated...,0.212828
1,1,Plan A,Monday,Afternoon,Chicken & Black Bean Salad,Grilled chicken and black bean salad with spin...,700,25,45,80,...,0,4.0,1,Plan A,Monday,Afternoon,Seated Parvatasana (Mountain Pose) with interc...,15 minutes of light upper body flexibility exe...,Monitor for any signs of bronchoconstriction; ...,0.187244
2,2,Plan A,Monday,Evening,Grilled Salmon & Quinoa,"Grilled salmon with lemon-dill, quinoa, and ro...",850,20,40,100,...,300,4.0,20,Plan A,Sunday,Evening,Supported Matsyasana (Fish Pose) for 7 minutes...,No exercise prescribed. Final restorative sess...,"Ensure all props (bolsters, blankets) are used...",0.21196
3,3,Plan A,Tuesday,Morning,Egg & Avocado Toast,"Scrambled eggs with spinach, served with avoca...",500,28,25,40,...,50,2.5,20,Plan A,Sunday,Evening,Supported Matsyasana (Fish Pose) for 7 minutes...,No exercise prescribed. Final restorative sess...,"Ensure all props (bolsters, blankets) are used...",0.204695
4,4,Plan A,Tuesday,Afternoon,Turkey & Bell Pepper Stir-fry,Lean turkey stir-fry with red and yellow bell ...,750,20,40,95,...,100,3.5,7,Plan A,Wednesday,Afternoon,"Pawanmuktasana (Wind-Relieving Pose), 5 repeti...",15 minutes of seated dynamic stretching (arm c...,Avoid forcing the knees to the chest in Pawanm...,0.215862
5,5,Plan A,Tuesday,Evening,Chicken & Sweet Potato,Baked chicken breast with roasted sweet potato...,750,20,45,90,...,150,3.0,1,Plan A,Monday,Afternoon,Seated Parvatasana (Mountain Pose) with interc...,15 minutes of light upper body flexibility exe...,Monitor for any signs of bronchoconstriction; ...,0.22051
6,6,Plan A,Wednesday,Morning,Greek Yogurt & Walnuts,"Plain Greek yogurt with walnuts, chia seeds, a...",450,20,25,45,...,0,2.0,69,Plan B,Wednesday,Morning,Gentle Viniyasa flow: Sukhasana to Bitilasana-...,30 minutes of treadmill walking at a steady pa...,The flow should be smooth and breath-initiated...,0.175832
7,7,Plan A,Wednesday,Afternoon,Tofu & Mushroom Skewers,"Tofu, UV-treated mushrooms, and zucchini skewe...",700,30,30,80,...,0,3.0,7,Plan A,Wednesday,Afternoon,"Pawanmuktasana (Wind-Relieving Pose), 5 repeti...",15 minutes of seated dynamic stretching (arm c...,Avoid forcing the knees to the chest in Pawanm...,0.193273


In [None]:
# Cell 9
threshold = 0.55
df_final_filtered = df_final[df_final['similarity'] >= threshold].copy()
print("Total recommended (>=%.2f): %d / %d" % (threshold, len(df_final_filtered), len(df_final)))
df_final_filtered.to_excel("meal_to_yoga_top1_thresholded.xlsx", index=False, engine="openpyxl")
display(df_final_filtered.head(8))


In [None]:
# Cell 10
df_for_labeling = df_final[['meal_index','meal_name','meal_description','yoga_index','yoga_detailed','yoga_exercise','yoga_precautions','similarity']]
df_for_labeling.to_csv("meal_yoga_for_human_labeling.csv", index=False)
print("Saved meal_yoga_for_human_labeling.csv — annotate with 1=good / 0=bad in a 'label' column, then re-upload for automatic evaluation.")
