In [3]:
# Step 1: Install Necessary Libraries
!pip install fasttext



In [4]:
# Step 2: Import Libraries
import pandas as pd
import fasttext
import os

# Step 3: Load Dataset

In [5]:
file_path = '/kaggle/input/setsss/dataset.csv'  # Update with your dataset location
df = pd.read_csv(file_path)
another_file = '/kaggle/input/food-ingredients-and-allergens/food_ingredients_and_allergens.csv'
new_df = pd.read_csv(another_file)

# Step 4: Preprocess Data for Allergen Detection

In [6]:
def preprocess_for_allergens(df):
    # Fill missing values with 'None'
    df.fillna('None', inplace=True)

    # Convert all text to lowercase
    for col in df.columns:
        df[col] = df[col].str.lower()
    
    # Combine ingredient-related fields into one column for training
    df['combined_text'] = df['Main Ingredient'] + ' ' + df['Sweetener'] + ' ' + df['Fat/Oil'] + ' ' + df['Seasoning']
    
    # Simplify labels for allergen presence
    df['Allergens'] = df['Allergens'].apply(
        lambda x: '__label__contains' if x != 'none' else '__label__does_not_contain'
    )
    
    return df

df = preprocess_for_allergens(df)

new_df = preprocess_for_allergens(new_df)



In [7]:
new_df

Unnamed: 0,Food Product,Main Ingredient,Sweetener,Fat/Oil,Seasoning,Allergens,Prediction,combined_text
0,almond cookies,almonds,sugar,butter,flour,__label__contains,contains,almonds sugar butter flour
1,almond cookies,almonds,sugar,butter,flour,__label__contains,contains,almonds sugar butter flour
2,chicken noodle soup,chicken broth,none,none,salt,__label__contains,contains,chicken broth none none salt
3,chicken noodle soup,chicken broth,none,none,salt,__label__contains,contains,chicken broth none none salt
4,cheddar cheese,cheese,none,none,salt,__label__contains,contains,cheese none none salt
...,...,...,...,...,...,...,...,...
394,lemon bars,lemon juice,sugar,butter,"flour, eggs",__label__contains,contains,"lemon juice sugar butter flour, eggs"
395,pecan pie,pecans,sugar,butter,corn syrup,__label__contains,contains,pecans sugar butter corn syrup
396,zucchini bread,zucchini,sugar,butter,"cinnamon, nuts",__label__contains,contains,"zucchini sugar butter cinnamon, nuts"
397,banana bread,bananas,sugar,butter,"cinnamon, nuts",__label__contains,contains,"bananas sugar butter cinnamon, nuts"


# Step 5: Save Preprocessed Data for FastText
MODEL TRAINED WITH ONLY OUR DATASET

In [8]:
train_file = '/kaggle/working/fasttext_train_allergen.txt'
with open(train_file, 'w') as f:
    for i, row in df.iterrows():
        f.write(f"{row['Allergens']} {row['combined_text']}\n")

# Step 6&7: Training model and saving it

In [9]:
model = fasttext.train_supervised(train_file, epoch=25, lr=0.1, wordNgrams=2, dim=100)

model.save_model("/kaggle/working/allergen_detection_model.bin")

Read 0M words
Number of words:  80
Number of labels: 2
Progress: 100.0% words/sec/thread:   63004 lr:  0.000000 avg.loss:  0.465180 ETA:   0h 0m 0s


# Step 8: Evaluate the Model

In [10]:
def evaluate_model(model, df):
    correct, total = 0, 0
    for i, row in df.iterrows():
        prediction = model.predict(row['combined_text'])[0][0]
        if prediction == row['Allergens']:
            correct += 1
        total += 1
    accuracy = correct / total
    return accuracy

accuracy = evaluate_model(model, df)
print(f"Model Accuracy: {accuracy * 100:.2f}%")



Model Accuracy: 85.83%


# Step 9: Test with New Input

In [11]:
def predict_allergen(model, text):
    prediction = model.predict(text.lower())
    return prediction[0][0]  # Return the predicted label

# Example Usage
new_dish = "tree seeds"
result = predict_allergen(model, new_dish)
print(f"The dish '{new_dish}' is classified as: {result}")


The dish 'tree seeds' is classified as: __label__contains


In [12]:
# Step 1: Combine Old and New Datasets
combined_df = pd.concat([df, new_df], ignore_index=True)

# Step 2: Save the Combined Dataset for FastText
combined_train_file = '/kaggle/working/combined_fasttext_train.txt'
with open(combined_train_file, 'w') as f:
    for i, row in combined_df.iterrows():
        f.write(f"{row['Allergens']} {row['combined_text']}\n")

# Step 3: Retrain the Model
model = fasttext.train_supervised(input=combined_train_file, epoch=25, lr=0.1, wordNgrams=2, dim=100)

# Step 4: Save the Retrained Model
model.save_model("/kaggle/working/allergen_detection_model_combined.bin")

# Step 5: Evaluate the New Model
accuracy = evaluate_model(model, combined_df)  # Reuse evaluation function
print(f"Combined Model Accuracy: {accuracy * 100:.2f}%")


Read 0M words
Number of words:  340
Number of labels: 2
Progress: 100.0% words/sec/thread:  324521 lr:  0.000000 avg.loss:  0.234678 ETA:   0h 0m 0s


Combined Model Accuracy: 99.23%


## Retraining with even bigger dataset

In [13]:
test_text = "Rice"
result = model.predict(test_text.lower())

# Step 2: Add Explicit Non-Allergen Examples for "Chicken"
additional_data = [
    '__label__does_not_contain chicken',
    '__label__does_not_contain plain chicken',
    '__label__does_not_contain chicken broth'
]

# Step 3: Append to Training File
with open(combined_train_file, 'a') as f:
    for line in additional_data:
        f.write(f"{line}\n")

# Step 4: Retrain the Model
model = fasttext.train_supervised(input=combined_train_file, epoch=25, lr=0.1, wordNgrams=2, dim=100)

# Step 5: Retest with "Chicken"
result = model.predict(test_text.lower())
print(f"After retraining, the dish '{test_text}' is classified as: {result[0][0]}")


Read 0M words
Number of words:  341
Number of labels: 2


After retraining, the dish 'Rice' is classified as: __label__does_not_contain


Progress: 100.0% words/sec/thread:  325587 lr:  0.000000 avg.loss:  0.223015 ETA:   0h 0m 0s


In [14]:
!pip install tabulate




In [15]:
# Step 1: Load Substitution Dataset
substitution_file = '/kaggle/input/replacements/substitute_dataset.csv'  # Update with your file path
substitution_df = pd.read_csv(substitution_file)

# Convert to a dictionary for quick lookup
substitution_map = dict(zip(substitution_df['Allergen'].str.lower(), substitution_df['Substitute Food Item'].str.lower()))

# Step 2: Detect Allergens (Dummy Function for Simplicity - Replace with Model Integration)
def detect_allergens(ingredients, allergen_list):
    """
    Dummy allergen detection function.
    Replace this with actual model integration for detecting allergens.
    
    Parameters:
        ingredients (list): List of ingredient names.
        allergen_list (list): List of possible allergens.
    
    Returns:
        list: Detected allergens present in the ingredients.
    """
    detected = []
    for allergen in allergen_list:
        for ingredient in ingredients:
            if allergen in ingredient.lower():
                detected.append(allergen)
    return list(set(detected))  # Return unique allergens

# Step 3: Replace Allergens with Substitutes
def replace_allergens_with_substitutes(ingredients, detected_allergens, substitution_map):
    updated_ingredients = ingredients[:]
    for allergen in detected_allergens:
        if allergen in substitution_map:
            substitute = substitution_map[allergen]
            updated_ingredients = [
                substitute if allergen in ingredient.lower() else ingredient 
                for ingredient in updated_ingredients
            ]
    return updated_ingredients

# Step 4: Interactive User Input
def main():
    print("Enter the list of ingredients (comma-separated):")
    user_input = input().strip()
    dish_ingredients = [ingredient.strip().lower() for ingredient in user_input.split(",")]

    # List of possible allergens (keys from the substitution map)
    allergen_list = list(substitution_map.keys())

    # Detect allergens in the dish
    detected_allergens = detect_allergens(dish_ingredients, allergen_list)
    if detected_allergens:
        print(f"Detected Allergens: {', '.join(detected_allergens)}")
        
        # Replace allergens with substitutes
        updated_ingredients = replace_allergens_with_substitutes(dish_ingredients, detected_allergens, substitution_map)
        print(f"Updated Ingredients: {', '.join(updated_ingredients)}")
    else:
        print("No allergens detected in the provided ingredients.")

# Run the interactive program
main()


Enter the list of ingredients (comma-separated):


 Dairy, Peanuts


Detected Allergens: peanuts, dairy
Updated Ingredients: plant-based milk, sunflower seed butter


In [None]:
import shutil

# Create a zip file
shutil.make_archive("/kaggle/working/allergen_detection_model_combined.bin", 'zip', "/kaggle/working")

# The zip file will be saved as 'allergen_model_files.zip' in /kaggle/working
