<a href="https://colab.research.google.com/github/HimanshuKumar05/food-label-compliance-checker/blob/main/foodLabelComplianceChecker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Simulating a dataset with common food labels
data = {
    "product_name": ["Brand A Chips", "Brand B Biscuits", "Brand C Juice", "Brand D Noodles"],
    "ingredients": ["Potatoes, Salt, Vegetable Oil", "Wheat Flour, Sugar, Butter", "Water, Sugar, Mango Pulp, Preservatives", "Wheat Flour, Salt, Artificial Flavors"],
    "nutrition_facts": ["Calories: 200, Fats: 10g, Carbs: 20g", "Calories: 150, Fats: 5g, Carbs: 25g", "Calories: 100, Sugar: 25g", "Calories: 300, Fats: 12g, Salt: 500mg"],
    "health_claims": ["Low Fat", "Rich in Fiber", "No Added Sugar", "Contains Whole Grains"],
}

df = pd.DataFrame(data)
df.to_csv("food_labels.csv", index=False)
print("Dataset created!")


Dataset created!


In [None]:
# Simulating FSSAI rules for compliance
fssai_rules = {
    "ingredients": ["Potatoes", "Wheat Flour", "Salt", "Sugar", "Vegetable Oil", "Mango Pulp"],
    "nutrition_facts": {
        "calories_max": 500,
        "fats_max": 20,
        "sugar_max": 30,
        "salt_max": 600
    },
    "allowed_health_claims": ["Low Fat", "Rich in Fiber", "No Added Sugar", "Contains Whole Grains"]
}


In [None]:
import spacy
import re

# Load spacy NLP model for English
nlp = spacy.load("en_core_web_sm")

# Helper function to clean and tokenize text
def clean_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove punctuation and extra spaces
    text = re.sub(r'[^\w\s]', '', text)
    return text

# Process ingredients, nutrition facts, and health claims
def extract_ingredients(text):
    doc = nlp(text)
    ingredients = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]]
    return ingredients

def extract_nutrition_facts(text):
    facts = {}
    for part in text.split(","):
        key, value = part.split(":")
        facts[key.strip()] = int(re.search(r'\d+', value).group())
    return facts

def extract_health_claims(text):
    claims = clean_text(text).split(", ")
    return claims


In [None]:
# Compliance check function
def check_compliance(row):
    # Extracted ingredients, nutrition facts, and claims
    extracted_ingredients = extract_ingredients(row['ingredients'])
    extracted_nutrition = extract_nutrition_facts(row['nutrition_facts'])
    extracted_claims = extract_health_claims(row['health_claims'])

    issues = []

    # Ingredient compliance check
    for ingredient in extracted_ingredients:
        if ingredient not in fssai_rules['ingredients']:
            issues.append(f"Non-compliant ingredient: {ingredient}")

    # Nutrition facts compliance check
    for fact, value in extracted_nutrition.items():
        if fact == "calories" and value > fssai_rules['nutrition_facts']['calories_max']:
            issues.append(f"Calories exceed limit: {value}")
        if fact == "fats" and value > fssai_rules['nutrition_facts']['fats_max']:
            issues.append(f"Fats exceed limit: {value}")
        if fact == "sugar" and value > fssai_rules['nutrition_facts']['sugar_max']:
            issues.append(f"Sugar exceeds limit: {value}")
        if fact == "salt" and value > fssai_rules['nutrition_facts']['salt_max']:
            issues.append(f"Salt exceeds limit: {value}")

    # Health claims compliance check
    for claim in extracted_claims:
        if claim not in fssai_rules['allowed_health_claims']:
            issues.append(f"Non-compliant health claim: {claim}")

    if len(issues) == 0:
        return "Compliant"
    else:
        return ", ".join(issues)

# Apply compliance check to dataset
df['compliance_report'] = df.apply(check_compliance, axis=1)
df.to_csv("compliance_report.csv", index=False)
df


Unnamed: 0,product_name,ingredients,nutrition_facts,health_claims,compliance_report
0,Brand A Chips,"Potatoes, Salt, Vegetable Oil","Calories: 200, Fats: 10g, Carbs: 20g",Low Fat,"Non-compliant ingredient: Vegetable, Non-compl..."
1,Brand B Biscuits,"Wheat Flour, Sugar, Butter","Calories: 150, Fats: 5g, Carbs: 25g",Rich in Fiber,"Non-compliant ingredient: Wheat, Non-compliant..."
2,Brand C Juice,"Water, Sugar, Mango Pulp, Preservatives","Calories: 100, Sugar: 25g",No Added Sugar,"Non-compliant ingredient: Water, Non-compliant..."
3,Brand D Noodles,"Wheat Flour, Salt, Artificial Flavors","Calories: 300, Fats: 12g, Salt: 500mg",Contains Whole Grains,"Non-compliant ingredient: Wheat, Non-compliant..."


In [None]:
print(df[['product_name', 'compliance_report']])

       product_name                                  compliance_report
0     Brand A Chips  Non-compliant ingredient: Vegetable, Non-compl...
1  Brand B Biscuits  Non-compliant ingredient: Wheat, Non-compliant...
2     Brand C Juice  Non-compliant ingredient: Water, Non-compliant...
3   Brand D Noodles  Non-compliant ingredient: Wheat, Non-compliant...
