In [None]:
import re
from datetime import datetime
import torch
from transformers import AutoModel, AutoTokenizer

def extract_mrp_exp_date_(image_file):
    # Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load pre-trained model and tokenizer
    model_name = "stepfun-ai/GOT-OCR2_0"
    model = AutoModel.from_pretrained(model_name, trust_remote_code=True).to(device)
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

    # Perform OCR
    res = model.chat(tokenizer, image_file, ocr_type='ocr')  # Adjust based on your OCR model's API
    text = res  # OCR output
    print("OCR Text:", text)

    # Correct OCR errors
    def correct_ocr_errors(text):
        return text.replace("O", "0").replace("l", "1")

    # Convert MR PRs. to MRPRs.
    def convert_to_mrpr(text):
        return text.replace('MR PRs.', 'MRPRs.')

    # Extract MRP
    def extract_mrp(text):
        text = convert_to_mrpr(correct_ocr_errors(text))
        mrp_pattern = r'(?i)(?:mr\s*r\s*|mr\s*prs?\s*|rs\.?|₹)\s*[:\-]?\s*([\d/]+)'
        match = re.search(mrp_pattern, text)
        if match:
            return re.sub(r'[^\d.,]', '', match.group(1))
        return None

    # Preprocess text for date extraction
    def preprocess_text(text):
        keywords = {
            "EXP": ["XP", "EX", "EXR", "Expires"],
            "MFD": ["MFR", "MF", "MFG", "PROD"]
        }
        for k, variants in keywords.items():
            for variant in variants:
                text = re.sub(r'\b' + variant + r'\b', k, text, flags=re.IGNORECASE)
        return text.upper()

    # Parse dates
    def parse_date(date_string):
        formats = ['%d/%m/%y', '%d/%m/%Y', '%d-%m-%y', '%d-%m-%Y', '%b %d, %Y']
        for fmt in formats:
            try:
                return datetime.strptime(date_string, fmt)
            except ValueError:
                continue
        return None

    # Extract most recent date
    def find_most_recent_date(text):
        text = preprocess_text(text)
        date_pattern = r'\b(\d{1,2}[-/]\d{1,2}[-/]\d{2,4}|\b[A-Z]{3,9}\s*\d{1,2},?\s*\d{4})\b'
        dates = [parse_date(d) for d in re.findall(date_pattern, text) if parse_date(d)]
        return max(dates).strftime('%d/%m/%Y') if dates else None

    # Get results
    extracted_mrp = extract_mrp(text)
    most_recent_date = find_most_recent_date(text)

    return {"MRP": extracted_mrp, "Expiry_Date": most_recent_date}

# Example usage
image_file = "example_image.png"  # Path to your image
result = extract_mrp_exp_date_(image_file)
print(result)


In [None]:
from flask import Flask, request,jsonify
from pyngrok import ngrok
from flask_cors import CORS  # Import CORS
from PIL import Image
import io

# Set your ngrok authentication token
ngrok.set_auth_token("YOUR_NGROK_AUTH_TOKEN")

app = Flask(__name__)

# Enable CORS for the app
CORS(app)

@app.route('/webhook/mrpexp', methods=['POST'])
def webhook():
    # Get the image from the request
    if 'file' not in request.files:
        print("No image part", 400)

    image_file = request.files['file']

    if image_file.filename == '':
        print("No image selected", 400)
     
    # Read the image using PIL
    image = Image.open(image_file)
    temp_image_path = "/tmp/temp_image.jpg"  # Temporary path to store the image
    image.save(temp_image_path)

    # Extract JSON data from the form
    data = request.form.to_dict()
    print("Received data:", data)

    # Process the image with the product_name function
    result = extract_mrp_exp_date_(temp_image_path)  # Ensure product_name is defined

    # return result, 200
    print(result)

    # Return the response from the second server to the client
    # if response.status_code == 200:
    # else:
    #     return "Error analyzing image on second server", response.status_code
    return jsonify({"analysis": result}), 200

# Start ngrok
ngrok_tunnel = ngrok.connect(5000)
print("Ngrok URL:", ngrok_tunnel.public_url)

# Run the Flask app
app.run(port=5000, debug=True, use_reloader=False)