<a href="https://colab.research.google.com/github/Sarthakpunj/Food_label_reader/blob/main/food_label_reader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pytesseract
from PIL import Image
import re
import io
from google.colab import files

# Set up Tesseract executable path for Colab
!apt-get install -y tesseract-ocr
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'

# Function to detect text in an image using Tesseract
def detect_text(image):
    text = pytesseract.image_to_string(image)
    return text

# Upload image file
uploaded = files.upload()

# Check if a file was uploaded
if len(uploaded) > 0:
    for filename in uploaded.keys():
        image = Image.open(io.BytesIO(uploaded[filename]))

        # Detect text from the image
        detected_text = detect_text(image)

        # Function to extract specified nutritional information
        def extract_nutrition_info(text):
            # Clean the text and remove unwanted parts
            cleaned_text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces/newlines with a single space

            # Define patterns to match nutritional information
            patterns = {
                "Serving Size": r'Serving\s*size\s*[:\-]?\s*(\d+[\d\.,]*\s*[g|pack]*)',
                "Calories": r'Calories\s*[:\-]?\s*(\d+)',
                "Calories from Fat": r'Calories\s*from\s*Fat\s*[:\-]?\s*(\d+)',
                "Total Fat": r'Total\s*Fat\s*[:\-]?\s*(\d+)\s*(?:\d+%)?',  # Remove percentage
                "Saturated Fat": r'Saturated\s*Fat\s*[:\-]?\s*(\d+g)\s*(?:\d+%)?',  # Remove percentage
                "Trans Fat": r'Trans\s*Fat\s*[:\-]?\s*(\d+g)',
                "Cholesterol": r'Cholesterol\s*[:\-]?\s*(\d+mg)',
                "Sodium": r'Sodium\s*[:\-]?\s*(\d+mg)',
                "Total Carbohydrates": r'Total\s*Carbohydrate\s*[:\-]?\s*(\d+g)',
                "Dietary Fiber": r'Dietary\s*Fiber\s*[:\-]?\s*(\d+g)',
                "Sugars": r'Total\s*Sugars\s*[:\-]?\s*(\d+)',
                "Protein": r'Protein\s*[:\-]?\s*(\d+g)',
            }

            # Extract information using regex patterns
            nutrition_info = {}
            for key, pattern in patterns.items():
                match = re.search(pattern, cleaned_text, re.IGNORECASE)
                if match:
                    nutrition_info[key] = ' '.join(match.groups())
                else:
                    nutrition_info[key] = 'Not Found'

            return nutrition_info

        # Extract nutritional information
        nutrition_info = extract_nutrition_info(detected_text)

        # Create a summary
        summary = []
        summary.append("### Nutritional Information:\n")
        summary.append(f"- **Serving Size**: {nutrition_info.get('Serving Size', 'Not Found')}")
        summary.append(f"- **Calories**: {nutrition_info.get('Calories', 'Not Found')}")
        summary.append(f"- **Calories from Fat**: {nutrition_info.get('Calories from Fat', 'Not Found')}")
        summary.append(f"- **Total Fat**: {nutrition_info.get('Total Fat', 'Not Found')}")
        summary.append(f"- **Saturated Fat**: {nutrition_info.get('Saturated Fat', 'Not Found')}")
        summary.append(f"- **Trans Fat**: {nutrition_info.get('Trans Fat', 'Not Found')}")
        summary.append(f"- **Cholesterol**: {nutrition_info.get('Cholesterol', 'Not Found')}")
        summary.append(f"- **Sodium**: {nutrition_info.get('Sodium', 'Not Found')}")
        summary.append(f"- **Total Carbohydrates**: {nutrition_info.get('Total Carbohydrates', 'Not Found')}")
        summary.append(f"- **Dietary Fiber**: {nutrition_info.get('Dietary Fiber', 'Not Found')}")
        summary.append(f"- **Sugars**: {nutrition_info.get('Sugars', 'Not Found')}")
        summary.append(f"- **Protein**: {nutrition_info.get('Protein', 'Not Found')}")

        # Recommendations based on nutritional information
        recommendations = []
        recommendations.append("### Recommendations:\n")

        # Calories
        calories = nutrition_info.get('Calories', 'Not Found')
        if calories != 'Not Found':
            calories_value = float(re.search(r'\d+', calories).group())
            if calories_value > 2000:  # General guideline for daily intake
                recommendations.append("High calorie content. Be mindful of daily caloric intake to avoid weight gain.")
            else:
                recommendations.append("Calorie content is acceptable.")

        # Calories from Fat
        calories_from_fat = nutrition_info.get('Calories from Fat', 'Not Found')
        if calories_from_fat != 'Not Found':
            calories_from_fat_value = float(re.search(r'\d+', calories_from_fat).group())
            if calories_from_fat_value > 100:  # This is a rough estimate, can vary
                recommendations.append("High calories from fat. Consider reducing fat intake to manage weight and health.")
            else:
                recommendations.append("Calories from fat are within acceptable limits.")

        # Total Fat
        total_fat = nutrition_info.get('Total Fat', 'Not Found')
        if total_fat != 'Not Found':
            total_fat_value = float(re.search(r'\d+', total_fat).group())
            if total_fat_value > 20:  # Typical recommendation is to limit total fat intake
                recommendations.append("High total fat content. Limit intake to manage heart health.")
            else:
                recommendations.append("Total fat level is acceptable.")

        # Saturated Fat
        saturated_fat = nutrition_info.get('Saturated Fat', 'Not Found')
        if saturated_fat != 'Not Found':
            saturated_fat_value = float(re.search(r'\d+', saturated_fat).group())
            if saturated_fat_value > 5:  # Limit to about 5g or less per serving
                recommendations.append("High saturated fat content. Limit intake to reduce risk of heart disease.")
            else:
                recommendations.append("Saturated fat level is acceptable.")

        # Trans Fat
        trans_fat = nutrition_info.get('Trans Fat', 'Not Found')
        if trans_fat != 'Not Found':
            trans_fat_value = float(re.search(r'\d+', trans_fat).group())
            if trans_fat_value > 0:
                recommendations.append("Avoid trans fat as much as possible. It is associated with increased risk of heart disease.")
            else:
                recommendations.append("No trans fat found. This is good for heart health.")

        # Cholesterol
        cholesterol = nutrition_info.get('Cholesterol', 'Not Found')
        if cholesterol != 'Not Found':
            cholesterol_value = float(re.search(r'\d+', cholesterol).group())
            if cholesterol_value > 300:  # Recommended daily limit is about 300mg
                recommendations.append("High cholesterol content. Limit intake to reduce risk of heart disease.")
            else:
                recommendations.append("Cholesterol level is within acceptable limits.")

        # Sodium
        sodium = nutrition_info.get('Sodium', 'Not Found')
        if sodium != 'Not Found':
            sodium_value = float(re.search(r'\d+', sodium).group())
            if sodium_value > 2300:  # Recommended limit for daily intake
                recommendations.append("High sodium content. Excessive sodium can lead to high blood pressure and other health issues.")
            else:
                recommendations.append("Sodium level is within acceptable limits.")

        # Total Carbohydrates
        total_carbs = nutrition_info.get('Total Carbohydrates', 'Not Found')
        if total_carbs != 'Not Found':
            total_carbs_value = float(re.search(r'\d+', total_carbs).group())
            if total_carbs_value < 30:  # General guideline for daily intake
                recommendations.append("Low total carbohydrates. Ensure a balanced intake of carbs for energy.")
            else:
                recommendations.append("Total carbohydrates level is acceptable.")

        # Dietary Fiber
        dietary_fiber = nutrition_info.get('Dietary Fiber', 'Not Found')
        if dietary_fiber != 'Not Found':
            dietary_fiber_value = float(re.search(r'\d+', dietary_fiber).group())
            if dietary_fiber_value < 3:  # Aim for at least 3g per serving
                recommendations.append("Low dietary fiber content. Increase fiber intake for better digestive health.")
            else:
                recommendations.append("Dietary fiber level is good.")

        # Sugars
        sugars = nutrition_info.get('Sugars', 'Not Found')
        if sugars != 'Not Found':
            sugars_value = float(re.search(r'\d+', sugars).group())
            if sugars_value > 5:  # Limit added sugars to 5g or less per serving
                recommendations.append("High sugar content. Excessive sugar can contribute to weight gain and diabetes.")
            else:
                recommendations.append("Sugar level is acceptable.")

        # Protein
        protein = nutrition_info.get('Protein', 'Not Found')
        if protein != 'Not Found':
            protein_value = float(re.search(r'\d+', protein).group())
            if protein_value < 5:  # General guideline for protein intake per serving
                recommendations.append("Low protein content. Ensure adequate protein intake for muscle maintenance and overall health.")
            else:
                recommendations.append("Protein level is acceptable.")

        # Print summary and recommendations
        print("\n".join(summary))
        print("\n".join(recommendations))
else:
    print("No file uploaded.")


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.


Saving Back-of-pack-ingredients-and-nutrition-labels-Examples-of-ingredient-and-nutritional_Q640.jpg to Back-of-pack-ingredients-and-nutrition-labels-Examples-of-ingredient-and-nutritional_Q640 (11).jpg
### Nutritional Information:

- **Serving Size**: 1 pack
- **Calories**: Not Found
- **Calories from Fat**: Not Found
- **Total Fat**: 59
- **Saturated Fat**: 3g
- **Trans Fat**: 0g
- **Cholesterol**: Not Found
- **Sodium**: 105mg
- **Total Carbohydrates**: 24g
- **Dietary Fiber**: Not Found
- **Sugars**: 179
- **Protein**: Not Found
### Recommendations:

High total fat content. Limit intake to manage heart health.
Saturated fat level is acceptable.
No trans fat found. This is good for heart health.
Sodium level is within acceptable limits.
Low total carbohydrates. Ensure a balanced intake of carbs for energy.
High sugar content. Excessive sugar can contribute to weight gain and diabetes.
