In [None]:
import pytesseract
from PIL import Image
import pandas as pd
import numpy as np
import joblib
from fuzzywuzzy import process
from flask import Flask, request, jsonify
from flask_cors import CORS
import re
import os
from werkzeug.utils import secure_filename

# Set Tesseract path
#pytesseract.pytesseract.tesseract_cmd = r"C://Users//arunp//AppData//Local//Programs//Tesseract-OCR//tesseract.exe"

# Initialize Flask app
app = Flask(__name__)
CORS(app)

UPLOAD_FOLDER = 'uploads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

# Load models and data
try:
    ingredient_data = joblib.load('ingredient_data.pkl')
    health_score_prediction_model = joblib.load('health_risk_score_model.pkl')
    target_model = joblib.load('target_model.pkl')
    label_encoder = joblib.load('label_encoder.pkl')
    vectorizer = joblib.load('vectorizer.pkl')
    scaler = joblib.load('scaler.pkl')
except Exception as e:
    print(f"Error loading models: {e}")

try:
    ingredients_df = pd.read_csv('Food_updated_1.csv')
    ingredients = ingredients_df['Ingredient_Name'].tolist()
except Exception as e:
    print(f"Error loading ingredients from CSV: {e}")
    ingredients = []

def clean_ingredient_list(unstructured_text, ingredient_data_list=ingredients):
    lowercase_ingredients = [ingredient.lower() for ingredient in ingredient_data_list]
    ingr_contain = [i for i in lowercase_ingredients if i in unstructured_text.lower()]
    print(ingr_contain)
    return ingr_contain

def get_best_match(ingredient_name):
    try:
        ingredient_name = ingredient_name.lower()
        choices = ingredient_data['Ingredient_Name'].str.lower().tolist()
        best_match = process.extractOne(ingredient_name, choices)
        return best_match[0] if best_match and best_match[1] > 20 else None
    except Exception as e:
        print(f"Error in fuzzy matching: {e}")
        return None

def predict_from_ingredient(ingredient_name):
    best_match = get_best_match(ingredient_name)
    
    if best_match:
        encoded_ingredient = label_encoder.transform([best_match])
        description = ingredient_data[ingredient_data['Ingredient_Name'] == best_match]['Description'].values[0]
    else:
        encoded_ingredient = label_encoder.transform(['unknown'])
        description = ingredient_name
    
    description_vector = vectorizer.transform([description]).toarray()
    input_data = np.hstack((encoded_ingredient.reshape(1, -1), description_vector))
    input_data = scaler.transform(input_data)

    health_score_prediction = health_score_prediction_model.predict(input_data)[0]
    target_prediction = target_model.predict(input_data)[0]

    daily_intake_prediction = 0 if health_score_prediction > 3.5 else 1
    description_prediction = description

    daily_intake_prediction = 'No' if daily_intake_prediction == 0 else 'Yes'
    target_prediction = 'Hazardous' if target_prediction > 3 else 'Safe'

    return health_score_prediction, target_prediction, description_prediction, daily_intake_prediction, best_match if best_match else ingredient_name

@app.route('/upload', methods=['POST'])
def upload_file():
    if 'image' not in request.files:
        return jsonify({'message': 'No file part'}), 400

    file = request.files['image']
    if file.filename == '':
        return jsonify({'message': 'No selected file'}), 400

    if file:
        filename = secure_filename(file.filename)
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(file_path)

        # Process image with Tesseract OCR
        image = Image.open(file_path)
        extracted_text = pytesseract.image_to_string(image)
        os.remove(file_path)

        if not extracted_text.strip():
            return jsonify({'message': 'No text detected'}), 200

        ingredient_names = clean_ingredient_list(extracted_text, ingredient_data_list=ingredients)

        results = []
        for ingredient_name in ingredient_names:
            health_score, target, description, daily_intake, best_match = predict_from_ingredient(ingredient_name)
            if health_score is None:
                results.append({'ingredient_name': ingredient_name, 'error': 'Ingredient not found'})
            else:
                results.append({
                    'ingredient_name': best_match,
                    'health_score': float(health_score),
                    'target': target,
                    'description': description,
                    'daily_intake': daily_intake
                })

        return jsonify(results)

@app.route('/', methods=['GET'])
def hello():
    return "Hello, world!"

if __name__ == "__main__":
    app.run(host='localhost', port=3000)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


 * Serving Flask app '__main__'
 * Debug mode: off


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
 * Running on http://localhost:3000
Press CTRL+C to quit
127.0.0.1 - - [19/Mar/2025 20:30:00] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [19/Mar/2025 20:30:01] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [19/Mar/2025 20:30:01] "GET /favicon.ico HTTP/1.1" 404 -
