In [None]:
import streamlit as st
import os
import torch
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, TextLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline
from deep_translator import GoogleTranslator
from fpdf import FPDF
from keybert import KeyBERT

# Initialize models
checkpoint = "MBZUAI/LaMini-Flan-T5-248M"
tokenizer = T5Tokenizer.from_pretrained(checkpoint, legacy=False)
base_model = T5ForConditionalGeneration.from_pretrained(
    checkpoint,
    device_map="auto",
    torch_dtype=torch.float32,
    offload_folder="./model_offload"
)

# UI Translations
translations = {
    "en": {
        "title": "Document Analyzer",
        "upload": "Upload PDF/TXT file",
        "select_ui": "Interface Language",
        "select_summary": "Summary Language",
        "process": "Process Document",
        "summary": "Summary",
        "keywords": "Key Terms",
        "download": "Download PDF"
    },
    "hi": {
        "title": "दस्तावेज़ विश्लेषक",
        "upload": "PDF/TXT फ़ाइल अपलोड करें",
        "select_ui": "इंटरफ़ेस भाषा",
        "select_summary": "सारांश भाषा",
        "process": "प्रक्रिया दस्तावेज़",
        "summary": "सारांश",
        "keywords": "मुख्य शब्द",
        "download": "PDF डाउनलोड करें"
    },
    "kn": {
        "title": "ದಾಖಲೆ ವಿಶ್ಲೇಷಕ",
        "upload": "PDF/TXT ಫೈಲ್ ಅಪ್ಲೋಡ್ ಮಾಡಿ",
        "select_ui": "ಇಂಟರ್ಫೇಸ್ ಭಾಷೆ",
        "select_summary": "ಸಾರಾಂಶ ಭಾಷೆ",
        "process": "ದಾಖಲೆ ಪ್ರಕ್ರಿಯೆ",
        "summary": "ಸಾರಾಂಶ",
        "keywords": "ಪ್ರಮುಖ ಪದಗಳು",
        "download": "PDF ಡೌನ್ಲೋಡ್ ಮಾಡಿ"
    }
}

def process_file(file):
    file_extension = file.name.split('.')[-1].lower()
    file_path = os.path.join(os.getcwd(), file.name)
    with open(file_path, "wb") as f:
        f.write(file.getbuffer())
    
    if file_extension == 'pdf':
        loader = PyPDFLoader(file_path)
    elif file_extension == 'txt':
        loader = TextLoader(file_path)
    else:
        return None, "Unsupported format"
    
    pages = loader.load_and_split()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    return text_splitter.split_documents(pages), None

def translate_text(text, target_lang="en"):
    try:
        if not text or not isinstance(text, str):
            return text
        return GoogleTranslator(
            source='auto',
            target=target_lang
        ).translate(text)
    except Exception as e:
        st.error(f"Translation error: {str(e)}")
        return text

def generate_summary(text_chunks):
    try:
        summarizer = pipeline(
            'summarization',
            model=base_model,
            tokenizer=tokenizer,
            max_length=500,
            min_length=50
        )
        return " ".join([summarizer(chunk.page_content)[0]['summary_text'] for chunk in text_chunks])
    except Exception as e:
        st.error(f"Summarization failed: {str(e)}")
        return ""

def save_as_pdf(summary, filename):
    try:
        pdf = FPDF()
        pdf.add_page()
        pdf.add_font('Kannada', '', 'NotoSansKannada-Regular.ttf', uni=True)
        pdf.set_font('Kannada', size=14)
        pdf.multi_cell(0, 10, summary)
        pdf.output(filename)
    except Exception as e:
        st.error(f"PDF error: {str(e)}")
        with open(filename.replace(".pdf", ".txt"), "w", encoding="utf-8") as f:
            f.write(summary)

def main():
    # UI Language Selection
    ui_lang = st.sidebar.selectbox("🌐 Interface Language", ["English", "हिंदी", "ಕನ್ನಡ"])[:2].lower()
    
    # Main UI
    st.title(translations[ui_lang]["title"])
    uploaded_file = st.file_uploader(translations[ui_lang]["upload"], type=['pdf', 'txt'])
    language = st.selectbox(translations[ui_lang]["select_summary"], ["ಕನ್ನಡ", "हिंदी", "English"])
    
    if uploaded_file and st.button(translations[ui_lang]["process"]):
        with st.spinner(translations[ui_lang].get("processing", "Processing...")):
            try:
                text_chunks, error = process_file(uploaded_file)
                if error:
                    st.error(error)
                    return
                
                summary = generate_summary(text_chunks)
                lang_map = {"ಕನ್ನಡ": "kn", "हिंदी": "hi", "English": "en"}
                translated = translate_text(summary, lang_map[language])
                
                st.subheader(translations[ui_lang]["summary"])
                st.write(translated)
                
                keywords = KeyBERT().extract_keywords(translated, top_n=5)
                st.subheader(translations[ui_lang]["keywords"])
                st.write(", ".join([kw[0] for kw in keywords]))
                
                base_name = os.path.splitext(uploaded_file.name)[0]
                save_as_pdf(translated, f"{base_name}_summary.pdf")
                
                with open(f"{base_name}_summary.pdf", "rb") as f:
                    st.download_button(
                        translations[ui_lang]["download"],
                        f,
                        file_name=f"{base_name}_summary.pdf"
                    )
                
            except Exception as e:
                st.error(f"Error: {str(e)}")

if __name__ == "__main__":
    main()

In [2]:
pwd

'/Users/shivuku'