In [2]:
# installing dependencies
!pip install gradio transformers python-docx python-pptx googletrans==4.0.0-rc1 PyMuPDF reportlab sentence-transformers faiss-gpu fpdf

Collecting gradio
  Downloading gradio-4.42.0-py3-none-any.whl.metadata (15 kB)
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting python-pptx
  Downloading python_pptx-1.0.2-py3-none-any.whl.metadata (2.5 kB)
Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting PyMuPDF
  Downloading PyMuPDF-1.24.9-cp310-none-manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting reportlab
  Downloading reportlab-4.2.2-py3-none-any.whl.metadata (1.4 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading 

In [6]:
import gradio as gr
import fitz  # PyMuPDF for PDF
from pptx import Presentation  # for PPTX
from docx import Document  # for DOCX
from transformers import pipeline

# Load a better model for text generation
generator = pipeline('text-generation', model='gpt2')

# Function to extract text based on file type
def extract_text_from_file(file):
    if file.name.endswith('.pdf'):
        doc = fitz.open(stream=file.read(), filetype="pdf")
        text = ''.join([page.get_text() for page in doc])
    elif file.name.endswith('.pptx'):
        prs = Presentation(file)
        text = ''.join([shape.text for slide in prs.slides for shape in slide.shapes if hasattr(shape, "text")])
    elif file.name.endswith('.docx'):
        doc = Document(file)
        text = '\n'.join([para.text for para in doc.paragraphs])
    elif file.name.endswith('.txt'):
        text = file.read().decode("utf-8")
    else:
        text = 'Unsupported file format'
    return text

# Function to generate the tender document
def generate_tender(prompt_file, tender_files, country_code):
    prompt_text = extract_text_from_file(prompt_file)
    tender_texts = [extract_text_from_file(f) for f in tender_files]
    previous_tenders = '\n'.join(tender_texts)

    # Combine prompt and previous tenders for generation
    combined_text = f"Prompt: {prompt_text}\nPrevious Tenders: {previous_tenders}\nCountry Code: {country_code}"

    # Generate the tender
    generated_tender = generator(combined_text, max_length=1024)  # Adjust max_length as needed
    return generated_tender[0]['generated_text']

# Gradio interface
def interface():
    with gr.Blocks() as iface:
        gr.Markdown("# Tender Document Generator")

        with gr.Row():
            prompt_file = gr.File(label="Upload Prompt File")
            tender_files = gr.File(label="Upload Previous Tender Files", file_count="multiple")
            country_code = gr.Dropdown(label="Country Code", choices=["EN", "FR", "ES", "DE", "CN"])

        generate_btn = gr.Button("Generate Tender")
        output = gr.Textbox(label="Generated Tender", lines=10)

        def generate_tender_wrapper(prompt_file, tender_files, country_code):
            if not isinstance(tender_files, list):
                tender_files = [tender_files]  # Ensure it's a list
            return generate_tender(prompt_file, tender_files, country_code)

        generate_btn.click(fn=generate_tender_wrapper, inputs=[prompt_file, tender_files, country_code], outputs=output)

    iface.launch()

if __name__ == "__main__":
    interface()

IMPORTANT: You are using gradio version 4.19.1, however version 4.29.0 is available, please upgrade.
--------
Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://1aa5aba074a0dac8d6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
