<a href="https://colab.research.google.com/github/Keerthivasan004/webpro-colab/blob/colab/project1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
%%writefile app.py
import logging
import tempfile
import os
import time
import json
import io
import threading
from flask import Flask, render_template, request, jsonify, redirect, url_for, send_file
from pyngrok import ngrok
import easyocr
from PIL import Image
import language_tool_python
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Paragraph
from reportlab.lib.styles import getSampleStyleSheet

# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Flask application
app = Flask(__name__, static_folder='static')
MAX_IMAGE_SIZE = 5 * 1024 * 1024  # 5 MB limit

# In-memory temporary storage
temp_storage = {}  # {temp_id: {'input': ..., 'corrected_text': ..., 'corrections': ...}}

# LanguageTool configuration
tool = language_tool_python.LanguageTool('en-US', remote_server='https://api.languagetool.org/v2')

# Initialize EasyOCR reader
reader = easyocr.Reader(['en'])  # English language

# Extract text from an image using EasyOCR
def extract_text(image_path):
    try:
        result = reader.readtext(image_path, detail=0)
        text = " ".join(result)
        logger.info("Text extracted from image successfully using EasyOCR")
        return text if text.strip() else "Error: No text detected."
    except Exception as e:
        logger.error(f"EasyOCR error: {str(e)}")
        return f"Error: {str(e)}"

# Process text using LanguageTool
def process_text(text):
    try:
        matches = tool.check(text)
        corrected_text = tool.correct(text)
        corrections = [{"error": m.context, "message": m.message, "suggestions": m.replacements} for m in matches]
        logger.info(f"Text processed: Original='{text}', Corrected='{corrected_text}', Corrections={corrections}")
        return corrected_text, corrections
    except Exception as e:
        logger.error(f"LanguageTool error: {str(e)}")
        return text, [{"error": "Processing failed", "message": str(e), "suggestions": []}]

# Process text synchronously
def process_text_sync(temp_id):
    try:
        logger.info(f"Processing started for temp_id: {temp_id}")
        if temp_id not in temp_storage or 'input' not in temp_storage[temp_id]:
            logger.error(f"No input found in storage for temp_id: {temp_id}")
            raise ValueError(f"No input found for temp_id: {temp_id}")

        raw_text = temp_storage[temp_id]['input']
        logger.info(f"Retrieved raw_text: {raw_text}")

        corrected_text, corrections = process_text(raw_text)
        logger.info(f"Corrections completed: {len(corrections)} corrections applied")

        temp_storage[temp_id].update({
            'corrected_text': corrected_text,
            'corrections': json.dumps(corrections),
            'expiration': time.time() + 3600
        })
        logger.info(f"Stored results for temp_id: {temp_id}")

        return {'temp_id': temp_id, 'raw_text': raw_text, 'corrected_text': corrected_text, 'corrections': corrections}
    except Exception as e:
        logger.error(f"Processing failed for temp_id: {temp_id} - Error: {str(e)}", exc_info=True)
        raise

# Flask Routes
@app.route('/')
def home():
    return render_template('web1.html')

@app.route('/test')
def test():
    return render_template('web2.html')

@app.route('/store_input', methods=['POST'])
def store_input():
    logger.info("Received store_input request")
    text = request.form.get('text')
    file = request.files.get('file')
    temp_id = str(int(time.time()))

    if not text and not file:
        logger.error("No input provided")
        return jsonify({'error': 'No input provided'}), 400

    try:
        if file:
            file.seek(0, os.SEEK_END)
            if file.tell() > MAX_IMAGE_SIZE:
                logger.error("File too large")
                return jsonify({'error': 'File too large'}), 400
            file.seek(0)
            with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp:
                temp.write(file.read())
                image_path = temp.name
            extracted_text = extract_text(image_path)
            os.unlink(image_path)
            temp_storage[temp_id] = {'input': extracted_text}
            logger.info(f"Stored image text in memory with temp_id: {temp_id}")
        else:
            temp_storage[temp_id] = {'input': text}
            logger.info(f"Stored text in memory with temp_id: {temp_id}")
    except Exception as e:
        logger.error(f"Error storing input for temp_id: {temp_id} - {str(e)}")
        return jsonify({'error': 'Failed to store input'}), 500

    return jsonify({'temp_id': temp_id})

@app.route('/analyze', methods=['POST'])
def analyze():
    logger.info("Received analyze request")
    temp_id = request.form.get('temp_id')
    logger.debug(f"Temp ID received: {temp_id}")
    if not temp_id:
        logger.error("No temp_id provided in request")
        return jsonify({'error': 'No temp_id provided'}), 400
    if temp_id not in temp_storage or 'input' not in temp_storage[temp_id]:
        logger.error(f"No input found in memory for temp_id: {temp_id}")
        return jsonify({'error': 'No input found for this temp_id'}), 404

    try:
        result = process_text_sync(temp_id)
        logger.info(f"Processing completed for temp_id: {temp_id}")
        return jsonify({'temp_id': temp_id, 'message': 'Analysis completed'})
    except Exception as e:
        logger.error(f"Failed to process for temp_id: {temp_id} - {str(e)}")
        return jsonify({'error': 'Failed to analyze'}), 500

@app.route('/result/<temp_id>', methods=['GET', 'POST'])
def result(temp_id):
    if request.method == 'POST':
        if 'go_back' in request.form:  # Check for Go Back form submission
            logger.info("Go Back requested, clearing temp_storage and redirecting to /test")
            temp_storage.clear()  # Clear all temporary storage
            return redirect(url_for('test'))  # Redirect to web2.html

    if temp_id not in temp_storage or 'input' not in temp_storage[temp_id]:
        logger.error(f"No raw text found for temp_id: {temp_id}")
        return redirect(url_for('no_data'))
    if 'corrected_text' not in temp_storage[temp_id] or 'corrections' not in temp_storage[temp_id]:
        logger.error(f"Analysis results not found for temp_id: {temp_id}")
        return redirect(url_for('no_data'))

    if time.time() > temp_storage[temp_id].get('expiration', float('inf')):
        del temp_storage[temp_id]
        return redirect(url_for('no_data'))

    raw_text = temp_storage[temp_id]['input']
    corrected_text = temp_storage[temp_id]['corrected_text']
    corrections = json.loads(temp_storage[temp_id]['corrections'])

    logger.info(f"Rendering result for temp_id: {temp_id}")
    return render_template('result.html',
                          raw_text=raw_text,
                          corrected_text=corrected_text,
                          corrections=corrections,
                          temp_id=temp_id)

@app.route('/download/<temp_id>')
def download(temp_id):
    if temp_id not in temp_storage or 'input' not in temp_storage[temp_id] or 'corrected_text' not in temp_storage[temp_id]:
        logger.error(f"Data missing for download with temp_id: {temp_id}")
        return redirect(url_for('no_data'))

    if time.time() > temp_storage[temp_id].get('expiration', float('inf')):
        del temp_storage[temp_id]
        return redirect(url_for('no_data'))

    raw_text = temp_storage[temp_id]['input']
    corrected_text = temp_storage[temp_id]['corrected_text']

    buffer = io.BytesIO()
    doc = SimpleDocTemplate(buffer, pagesize=A4)
    styles = getSampleStyleSheet()
    elements = [
        Paragraph("Your Input:", styles['Title']),
        Paragraph(raw_text, styles['Normal']),
        Paragraph("Corrected Input:", styles['Title']),
        Paragraph(corrected_text, styles['Normal'])
    ]
    doc.build(elements)
    buffer.seek(0)

    logger.info(f"Generated PDF for temp_id: {temp_id}")
    return send_file(buffer, as_attachment=True, download_name="analysis.pdf", mimetype='application/pdf')

@app.route('/no-data', methods=['GET', 'POST'])
def no_data():
    if request.method == 'POST':
        if 'go_back' in request.form:  # Check for Go Back form submission
            logger.info("Go Back requested from no-data, clearing temp_storage and redirecting to /test")
            temp_storage.clear()  # Clear all temporary storage
            return redirect(url_for('test'))  # Redirect to web2.html
    return render_template('no_data.html', message="No data available. Please analyze some text or upload an image first.")

@app.route('/error', methods=['GET', 'POST'])
def error():
    if request.method == 'POST':
        if 'go_back' in request.form:  # Check for Go Back form submission
            logger.info("Go Back requested from error, clearing temp_storage and redirecting to /test")
            temp_storage.clear()  # Clear all temporary storage
            return redirect(url_for('test'))  # Redirect to web2.html
    return render_template('error.html', message="An error occurred during analysis. Please try again.")

# Function to run Flask app
def run_app():
    app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=False)

# Start Flask with Ngrok
if __name__ == '__main__':
    logger.info("Starting Flask application with Ngrok...")

    # Set Ngrok authtoken (replace with your own token)
    ngrok.set_auth_token("2uz3KJgyTeVUEXtz8KL5FJOb1LU_L1roM8DjCBSqhDyGdrmp")

    # Open an Ngrok tunnel to port 5000
    public_url = ngrok.connect(5000).public_url
    logger.info(f"Ngrok tunnel established at: {public_url}")
    print(f" * Ngrok tunnel available at: {public_url}")

    # Start Flask in a separate thread
    flask_thread = threading.Thread(target=run_app)
    flask_thread.start()

    # Keep the main thread alive (optional, for Colab)
    try:
        flask_thread.join()
    except KeyboardInterrupt:
        logger.info("Shutting down Flask and Ngrok...")
        ngrok.kill()

Writing app.py


In [7]:
!python app.py

 * Ngrok tunnel available at: https://e351-35-230-17-10.ngrok-free.app
 * Serving Flask app 'app'
 * Debug mode: on
 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [29/Mar/2025 10:39:08] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [29/Mar/2025 10:39:09] "GET /static/images/google.png HTTP/1.1" 200 -
127.0.0.1 - - [29/Mar/2025 10:39:09] "GET /static/js/web1.js HTTP/1.1" 200 -
127.0.0.1 - - [29/Mar/2025 10:39:09] "GET /static/css/web1.css HTTP/1.1" 200 -
127.0.0.1 - - [29/Mar/2025 10:39:09] "GET /static/images/process.jpg HTTP/1.1" 200 -
127.0.0.1 - - [29/Mar/2025 10:39:09] "GET /static/images/device.jpg HTTP/1.1" 200 -
127.0.0.1 - - [29/Mar/2025 10:39:09] "GET /static/images/upload.jpg HTTP/1.1" 200 -
127.0.0.1 - - [29/Mar/2025 10:39:10] "GET /static/images/result.jpg HTTP/1.1" 200 -
127.0.0.1 - - [29/Mar/2025 10:39:10] "GET /static/images/view.jpg HTTP/1.1" 200 -
127.0.0.1 - - [29/Mar/2

In [5]:
!pip install flask pyngrok easyocr language_tool_python reportlab pillow
!apt-get update
!apt-get install -y libglib2.0-0 libsm6 libxext6 libxrender-dev
!apt-get install -y tesseract-ocr

Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:4 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Building dependency tree... Done
Reading