In [None]:
!lsof -i :5000

In [None]:
!kill -9 893

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
!apt-get install nodejs npm -y

In [None]:
%cd /content/drive/MyDrive/project-bolt-sb1-x4xjyrpa/project

!npm install
!npm run build

In [None]:
nvm --version

In [None]:
!node -v
!apt remove nodejs -y

In [None]:
!curl -fsSL https://deb.nodesource.com/setup_18.x | bash -
!apt install -y nodejs
!node -v


In [None]:
%cd /content/drive/MyDrive/project-bolt-sb1-x4xjyrpa/project
!npm install


In [None]:

!chmod +x node_modules/@esbuild/linux-x64/bin/esbuild


In [None]:
!npm run build


In [None]:
!ls -l /content/drive/MyDrive/project-bolt-sb1-x4xjyrpa/project/build
!ls -l /content/drive/MyDrive/project-bolt-sb1-x4xjyrpa/project/dist

In [None]:
%cd /content/drive/MyDrive/project-bolt-sb1-x4xjyrpa/project
!npm run dev

In [None]:
# Install required libraries
!pip install flask flask-ngrok transformers torch sentence-transformers pdfplumber python-docx spacy yake flask-cors
!python -m spacy download en_core_web_sm

import os
import pdfplumber
import docx
import spacy
import yake
from transformers import T5Tokenizer, T5ForConditionalGeneration, BartTokenizer, BartForConditionalGeneration
from flask import Flask, request, jsonify, send_from_directory
from flask_ngrok import run_with_ngrok
from flask_cors import CORS

# Load spaCy NLP model
nlp = spacy.load("en_core_web_sm")

# Load Summarization Models
t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")
bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
bart_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")

# Initialize Flask app
app = Flask(__name__)
CORS(app)  # Enable CORS for frontend-backend interaction
run_with_ngrok(app)  # Enable ngrok

FRONTEND_DIR = "/content/drive/MyDrive/project-bolt-sb1-x4xjyrpa/project"


# Serve static files from the frontend directory
@app.route("/<path:filename>", methods=["GET"])
def serve_frontend(filename):
    return send_from_directory(FRONTEND_DIR, filename)

@app.route("/", methods=["GET"])
def index():
    return send_from_directory(FRONTEND_DIR, "index.html")

# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            extracted_text = page.extract_text()
            if extracted_text:
                text += extracted_text + "\n"
    return text

# Function to extract text from DOCX
def extract_text_from_docx(docx_file):
    doc = docx.Document(docx_file)
    return "\n".join([para.text for para in doc.paragraphs])

# Extractive summary function
def extractive_summary(text, num_sentences=5):
    doc = nlp(text)
    sentences = [sent.text.strip() for sent in doc.sents]
    return sentences[:num_sentences]

# Keyword extraction function
def extract_keywords(text, num_keywords=5):
    kw_extractor = yake.KeywordExtractor(top=num_keywords, stopwords=None)
    keywords = kw_extractor.extract_keywords(text)
    return [kw[0] for kw in keywords]

# Document classification function
def classify_document(text):
    categories = {
        "legal": ["court", "law", "agreement", "contract", "policy"],
        "technical": ["AI", "algorithm", "data", "software", "engineering"],
        "academic": ["research", "study", "university", "experiment", "paper"],
        "general": ["news", "blog", "report", "story", "review"]
    }
    doc = nlp(text.lower())
    word_counts = {category: sum(1 for token in doc if token.text in words) for category, words in categories.items()}
    return max(word_counts, key=word_counts.get).capitalize()

# Function to generate abstractive summary
def abstractive_summary(text, model, tokenizer, max_length=150):
    input_text = "summarize: " + text
    input_ids = tokenizer.encode(input_text, return_tensors="pt", truncation=True)
    output_ids = model.generate(input_ids, max_length=max_length, num_beams=5, early_stopping=True)
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

# Function for single-sentence explanation using Bart
def single_sentence_explanation(text):
    input_text = "summarize: " + text
    input_ids = bart_tokenizer.encode(input_text, return_tensors="pt", truncation=True)
    output_ids = bart_model.generate(input_ids, max_length=30, num_beams=5, early_stopping=True)
    return bart_tokenizer.decode(output_ids[0], skip_special_tokens=True)

# Generate summary function
def generate_summary(text, num_extractive=5):
    key_points = extractive_summary(text, num_extractive)
    extracted_text = " ".join(key_points)

    t5_summary = abstractive_summary(extracted_text, t5_model, t5_tokenizer)
    final_summary = t5_summary
    explanation = single_sentence_explanation(extracted_text)
    keywords = extract_keywords(text, num_keywords=5)
    document_type = classify_document(text)

    return key_points, final_summary, explanation, keywords, document_type

# API endpoint for document processing
@app.route("/process", methods=["POST"])
def process_document():
    uploaded_file = request.files.get("file")
    input_text = request.form.get("text")

    if uploaded_file:
        file_ext = uploaded_file.filename.split(".")[-1]
        if file_ext == "pdf":
            extracted_text = extract_text_from_pdf(uploaded_file)
        elif file_ext in ["docx", "doc"]:
            extracted_text = extract_text_from_docx(uploaded_file)
        else:
            return jsonify({"error": "Unsupported file type"}), 400
    elif input_text:
        extracted_text = input_text
    else:
        return jsonify({"error": "No input provided"}), 400

    key_points, summary, explanation, keywords, document_type = generate_summary(extracted_text)

    return jsonify({
        "document_type": document_type,
        "keywords": keywords,
        "key_points": key_points,
        "summary": summary,
        "explanation": explanation
    })

# API Health Check
@app.route("/health", methods=["GET"])
def health_check():
    return jsonify({"status": "healthy"}), 200

if __name__ == "__main__":
    app.run()
