In [1]:
!pip install gradio
!pip install faiss-cpu
!pip install PyMuPDF
!pip install sentence-transformers
!pip install torch transformers

Collecting gradio
  Downloading gradio-5.9.1-py3-none-any.whl.metadata (16 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.27.0-py3-none-any.whl.metadata (13 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 k

In [2]:
!pip install pdfplumber

Collecting pdfplumber
  Downloading pdfplumber-0.11.5-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.5/42.5 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pdfminer.six==20231228 (from pdfplumber)
  Downloading pdfminer.six-20231228-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Downloading pdfplumber-0.11.5-py3-none-any.whl (59 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.5/59.5 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfminer.six-20231228-py3-none-any.whl (5.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m57.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading 

# TEXT BOX Interface

In [5]:
import gradio as gr
import numpy as np
from typing import List, Tuple
import faiss
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import re
from dataclasses import dataclass
import pickle
import os
from pathlib import Path
from io import BytesIO
import pdfplumber
import tempfile

# Configuration
CACHE_DIR = Path("cache")
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 200
EMBED_DIMENSION = 384  # Dimension for MiniLM embeddings
TOP_K_MATCHES = 5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

@dataclass
class Document:
    text: str
    embedding: np.ndarray = None

def load_models():
    # Load embedding model
    embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=DEVICE)

    # Load LLM and tokenizer
    model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
        low_cpu_mem_usage=True
    ).to(DEVICE)

    return embedding_model, model, tokenizer

def create_cache_dir():
    """Create cache directory if it doesn't exist"""
    CACHE_DIR.mkdir(exist_ok=True)

def get_cache_path(filename: str) -> Path:
    """Get path for cached embeddings"""
    return CACHE_DIR / f"{filename}.pkl"

def extract_text_from_pdf(file_obj) -> str:
    """Extract text from uploaded PDF file using pdfplumber"""
    try:
        # Create a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
            # If it's a Gradio file object
            if hasattr(file_obj, 'name'):
                with open(file_obj.name, 'rb') as f:
                    tmp_file.write(f.read())
            else:
                tmp_file.write(file_obj.read())
            tmp_path = tmp_file.name

        text = ""
        # Use pdfplumber to extract text
        with pdfplumber.open(tmp_path) as pdf:
            for page in pdf.pages:
                try:
                    text += page.extract_text() or ""
                except Exception as e:
                    print(f"Error on page: {str(e)}")
                    continue

        # Clean up temporary file
        os.unlink(tmp_path)

        if not text.strip():
            alternative_text = extract_text_with_backup_method(tmp_path)
            if alternative_text:
                return alternative_text
            return "Error: No readable text found in the PDF."

        return text

    except Exception as e:
        if 'tmp_path' in locals():
            os.unlink(tmp_path)
        return f"Error extracting text from PDF: {str(e)}"

def extract_text_with_backup_method(pdf_path):
    """Backup method using pdf2text if available"""
    try:
        from pdfminer.high_level import extract_text
        return extract_text(pdf_path)
    except:
        try:
            import textract
            return textract.process(pdf_path).decode('utf-8')
        except:
            return None

def preprocess_text(text: str) -> str:
    """Clean and preprocess extracted text"""
    # Remove excessive whitespace
    text = re.sub(r'\s+', ' ', text)
    # Remove special characters but keep basic punctuation
    text = re.sub(r'[^\w\s.,!?;:-]', '', text)
    # Remove multiple periods
    text = re.sub(r'\.{2,}', '.', text)
    # Fix spaces around punctuation
    text = re.sub(r'\s+([.,!?;:])', r'\1', text)
    return text.strip()

def create_chunks(text: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[str]:
    """Split text into overlapping chunks"""
    chunks = []
    start = 0
    text_length = len(text)

    # Handle very short texts
    if text_length < chunk_size:
        return [text] if text else []

    while start < text_length:
        end = start + chunk_size

        if end >= text_length:
            chunks.append(text[start:])
            break

        # Find the last period or appropriate breaking point
        last_period = text.rfind('.', start, end)
        if last_period != -1 and last_period > start + chunk_size/2:
            end = last_period + 1
        else:
            # If no period found, try to break at a space
            while end > start and text[end] != ' ':
                end -= 1
            if end == start:  # If no space found, force break at chunk_size
                end = start + chunk_size

        chunk = text[start:end].strip()
        if chunk:  # Only add non-empty chunks
            chunks.append(chunk)
        start = max(start + chunk_size - overlap, end - overlap)

    return chunks

def get_embeddings(texts: List[str], embedding_model: SentenceTransformer) -> List[np.ndarray]:
    """Generate embeddings using sentence-transformers"""
    try:
        embeddings = embedding_model.encode(texts, show_progress_bar=False)
        return [np.array(embedding) for embedding in embeddings]
    except Exception as e:
        print(f"Error generating embeddings: {str(e)}")
        return None

def create_faiss_index(embeddings: List[np.ndarray]) -> faiss.IndexFlatL2:
    """Create and populate FAISS index"""
    embeddings_array = np.array(embeddings).astype('float32')
    index = faiss.IndexFlatL2(EMBED_DIMENSION)
    index.add(embeddings_array)
    return index

def get_relevant_chunks(query: str, chunks: List[str], faiss_index: faiss.IndexFlatL2, embedding_model: SentenceTransformer) -> List[str]:
    """Retrieve most relevant chunks for the query"""
    query_embedding = embedding_model.encode([query])[0]
    D, I = faiss_index.search(
        np.array([query_embedding]).astype('float32'),
        min(TOP_K_MATCHES, len(chunks))
    )
    return [chunks[i] for i in I[0]]

def generate_answer(query: str, context: List[str], model, tokenizer) -> str:
    """Generate answer using TinyLlama"""
    try:
        # Prepare prompt
        context_text = "\n".join(context)
        prompt = f"""<|system|>
You are a helpful assistant. Answer the question based only on the provided context. If the answer cannot be found in the context, say so.

Context:
{context_text}

<|user|>
{query}

<|assistant|>"""

        # Generate response
        inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)

        outputs = model.generate(
            **inputs,
            max_new_tokens=2048,
            num_return_sequences=1,
            temperature=0.6,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )

        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        try:
            response = response.split("<|assistant|>")[-1].strip()
        except:
            response = response.strip()

        return response
    except Exception as e:
        return f"Error generating answer: {str(e)}"

class PDFQuestionAnswering:
    def __init__(self):
        create_cache_dir()
        print(f"Using device: {DEVICE}")
        self.embedding_model, self.llm_model, self.tokenizer = load_models()
        self.current_chunks = None
        self.current_faiss_index = None

    def process_pdf(self, pdf_file):
        if pdf_file is None:
            return "Please upload a PDF file."

        try:
            # Check cache for processed embeddings
            cache_path = get_cache_path(pdf_file.name)

            if cache_path.exists():
                print("Loading from cache...")
                with cache_path.open('rb') as f:
                    cached_data = pickle.load(f)
                    self.current_chunks = cached_data['chunks']
                    self.current_faiss_index = cached_data['faiss_index']
                return "PDF loaded from cache! You can now ask questions."

            print("Processing new PDF...")
            # Process PDF
            text = extract_text_from_pdf(pdf_file)
            if isinstance(text, str) and not text.startswith("Error"):
                print("Text extracted successfully, preprocessing...")
                processed_text = preprocess_text(text)
                if not processed_text:
                    return "No readable text found in the PDF."

                print("Creating chunks...")
                self.current_chunks = create_chunks(processed_text)

                if not self.current_chunks:
                    return "No valid text chunks could be created from the PDF."

                # Generate embeddings
                print("Generating embeddings...")
                embeddings = get_embeddings(self.current_chunks, self.embedding_model)
                if embeddings:
                    # Create FAISS index
                    print("Creating FAISS index...")
                    self.current_faiss_index = create_faiss_index(embeddings)

                    # Cache the processed data
                    print("Caching results...")
                    with cache_path.open('wb') as f:
                        pickle.dump({
                            'chunks': self.current_chunks,
                            'faiss_index': self.current_faiss_index
                        }, f)

                    return f"PDF processed successfully! Extracted {len(self.current_chunks)} chunks of text. You can now ask questions."
                else:
                    return "Failed to generate embeddings"
            else:
                return text  # Return error message
        except Exception as e:
            return f"Error processing PDF: {str(e)}"

    def answer_question(self, question):
        if self.current_chunks is None or self.current_faiss_index is None:
            return "Please upload and process a PDF first."

        if not question.strip():
            return "Please enter a question."

        try:
            # Get relevant chunks
            print("Finding relevant chunks...")
            relevant_chunks = get_relevant_chunks(
                question,
                self.current_chunks,
                self.current_faiss_index,
                self.embedding_model
            )

            if not relevant_chunks:
                return "Could not find relevant context in the document."

            # Generate answer
            print("Generating answer...")
            answer = generate_answer(
                question,
                relevant_chunks,
                self.llm_model,
                self.tokenizer
            )

            # Format response with relevant chunks
            response = f"Answer: {answer}\n\nRelevant Context:\n"
            for i, chunk in enumerate(relevant_chunks, 1):
                response += f"\nChunk {i}:\n{chunk}\n---"

            return response
        except Exception as e:
            return f"Error answering question: {str(e)}"

def create_gradio_interface():
    qa_system = PDFQuestionAnswering()

    with gr.Blocks(title="PDF Question-Answering with RAG") as interface:
        gr.Markdown("# 📚 PDF Question-Answering with RAG")
        gr.Markdown("Upload a PDF document and ask questions about its content!")

        with gr.Row():
            with gr.Column():
                pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
                process_button = gr.Button("Process PDF")
                status_output = gr.Textbox(label="Status", lines=2)

            with gr.Column():
                question_input = gr.Textbox(label="Ask a question about the document")
                answer_button = gr.Button("Get Answer")
                answer_output = gr.Textbox(label="Response", lines=10)

        process_button.click(
            fn=qa_system.process_pdf,
            inputs=[pdf_input],
            outputs=[status_output]
        )

        answer_button.click(
            fn=qa_system.answer_question,
            inputs=[question_input],
            outputs=[answer_output]
        )

    return interface

# For Google Colab, add these installation commands at the top of your notebook:
"""
!pip install -q gradio faiss-cpu pdfplumber sentence-transformers torch transformers pdfminer.six textract
"""

if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(share=True)

Using device: cuda




* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://b614201686ecf23ea8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Finding relevant chunks...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generating answer...
Finding relevant chunks...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generating answer...
Finding relevant chunks...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generating answer...
Finding relevant chunks...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generating answer...
Finding relevant chunks...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generating answer...
Processing new PDF...
Text extracted successfully, preprocessing...
Creating chunks...
Generating embeddings...
Creating FAISS index...
Caching results...


# CHATBOT Interface

In [9]:
import gradio as gr
import numpy as np
from typing import List, Tuple
import faiss
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import re
from dataclasses import dataclass
import pickle
import os
from pathlib import Path
from io import BytesIO
import pdfplumber
import tempfile

# Configuration
CACHE_DIR = Path("cache")
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 200
EMBED_DIMENSION = 384
TOP_K_MATCHES = 5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

@dataclass
class Document:
    text: str
    embedding: np.ndarray = None

def load_models():
    embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=DEVICE)
    model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
        low_cpu_mem_usage=True
    ).to(DEVICE)
    return embedding_model, model, tokenizer

def create_cache_dir():
    CACHE_DIR.mkdir(exist_ok=True)

def get_cache_path(filename: str) -> Path:
    return CACHE_DIR / f"{filename}.pkl"

def extract_text_from_pdf(file_obj) -> str:
    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
            if hasattr(file_obj, 'name'):
                with open(file_obj.name, 'rb') as f:
                    tmp_file.write(f.read())
            else:
                tmp_file.write(file_obj.read())
            tmp_path = tmp_file.name

        text = ""
        with pdfplumber.open(tmp_path) as pdf:
            for page in pdf.pages:
                try:
                    text += page.extract_text() or ""
                except Exception as e:
                    print(f"Error on page: {str(e)}")
                    continue

        os.unlink(tmp_path)

        if not text.strip():
            return "Error: No readable text found in the PDF."

        return text

    except Exception as e:
        if 'tmp_path' in locals():
            os.unlink(tmp_path)
        return f"Error extracting text from PDF: {str(e)}"

def preprocess_text(text: str) -> str:
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'[^\w\s.,!?;:-]', '', text)
    text = re.sub(r'\.{2,}', '.', text)
    text = re.sub(r'\s+([.,!?;:])', r'\1', text)
    return text.strip()

def create_chunks(text: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[str]:
    chunks = []
    start = 0
    text_length = len(text)

    if text_length < chunk_size:
        return [text] if text else []

    while start < text_length:
        end = start + chunk_size

        if end >= text_length:
            chunks.append(text[start:])
            break

        last_period = text.rfind('.', start, end)
        if last_period != -1 and last_period > start + chunk_size/2:
            end = last_period + 1
        else:
            while end > start and text[end] != ' ':
                end -= 1
            if end == start:
                end = start + chunk_size

        chunk = text[start:end].strip()
        if chunk:
            chunks.append(chunk)
        start = max(start + chunk_size - overlap, end - overlap)

    return chunks

def get_embeddings(texts: List[str], embedding_model: SentenceTransformer) -> List[np.ndarray]:
    try:
        embeddings = embedding_model.encode(texts, show_progress_bar=False)
        return [np.array(embedding) for embedding in embeddings]
    except Exception as e:
        print(f"Error generating embeddings: {str(e)}")
        return None

def create_faiss_index(embeddings: List[np.ndarray]) -> faiss.IndexFlatL2:
    embeddings_array = np.array(embeddings).astype('float32')
    index = faiss.IndexFlatL2(EMBED_DIMENSION)
    index.add(embeddings_array)
    return index

def get_relevant_chunks(query: str, chunks: List[str], faiss_index: faiss.IndexFlatL2, embedding_model: SentenceTransformer) -> List[str]:
    query_embedding = embedding_model.encode([query])[0]
    D, I = faiss_index.search(
        np.array([query_embedding]).astype('float32'),
        min(TOP_K_MATCHES, len(chunks))
    )
    return [chunks[i] for i in I[0]]

def generate_answer(query: str, context: List[str], model, tokenizer) -> str:
    try:
        context_text = "\n".join(context)
        prompt = f"""<|system|>
You are a helpful assistant. Answer the question based only on the provided context. If the answer cannot be found in the context, say so.

Context:
{context_text}

<|user|>
{query}

<|assistant|>"""

        inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
        outputs = model.generate(
            **inputs,
            max_new_tokens=2048,
            num_return_sequences=1,
            temperature=0.6,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )

        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        try:
            response = response.split("<|assistant|>")[-1].strip()
        except:
            response = response.strip()

        return response
    except Exception as e:
        return f"Error generating answer: {str(e)}"

class PDFQuestionAnswering:
    def __init__(self):
        create_cache_dir()
        print(f"Using device: {DEVICE}")
        self.embedding_model, self.llm_model, self.tokenizer = load_models()
        self.current_chunks = None
        self.current_faiss_index = None
        self.chat_history = []

    def process_pdf(self, pdf_file):
        if pdf_file is None:
            return [], "Please upload a PDF file."

        try:
            cache_path = get_cache_path(pdf_file.name)

            if cache_path.exists():
                print("Loading from cache...")
                with cache_path.open('rb') as f:
                    cached_data = pickle.load(f)
                    self.current_chunks = cached_data['chunks']
                    self.current_faiss_index = cached_data['faiss_index']
                return [], "PDF loaded from cache! You can now ask questions about the document."

            print("Processing new PDF...")
            text = extract_text_from_pdf(pdf_file)
            if isinstance(text, str) and not text.startswith("Error"):
                processed_text = preprocess_text(text)
                if not processed_text:
                    return [], "No readable text found in the PDF."

                self.current_chunks = create_chunks(processed_text)
                if not self.current_chunks:
                    return [], "No valid text chunks could be created from the PDF."

                embeddings = get_embeddings(self.current_chunks, self.embedding_model)
                if embeddings:
                    self.current_faiss_index = create_faiss_index(embeddings)

                    with cache_path.open('wb') as f:
                        pickle.dump({
                            'chunks': self.current_chunks,
                            'faiss_index': self.current_faiss_index
                        }, f)

                    return [], f"PDF processed successfully! You can now ask questions about the document."
                else:
                    return [], "Failed to generate embeddings"
            else:
                return [], text
        except Exception as e:
            return [], f"Error processing PDF: {str(e)}"

    def chat(self, message, history):
        if self.current_chunks is None or self.current_faiss_index is None:
            return "Please upload and process a PDF first."

        if not message.strip():
            return "Please enter a question."

        try:
            relevant_chunks = get_relevant_chunks(
                message,
                self.current_chunks,
                self.current_faiss_index,
                self.embedding_model
            )

            if not relevant_chunks:
                return "I couldn't find relevant information in the document to answer your question."

            answer = generate_answer(
                message,
                relevant_chunks,
                self.llm_model,
                self.tokenizer
            )

            source_context = "\n\n🔍 *Source Context*:\n"
            for i, chunk in enumerate(relevant_chunks, 1):
                source_context += f"\n{chunk}\n---"

            return answer + source_context
        except Exception as e:
            return f"Error answering question: {str(e)}"

def create_gradio_interface():
    qa_system = PDFQuestionAnswering()

    with gr.Blocks(title="PDF Chat with RAG") as interface:
        gr.Markdown("# 📚 Chat with your PDF")
        gr.Markdown("Upload a PDF document and start a conversation about its content!")

        with gr.Row():
            with gr.Column(scale=1):
                pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
                process_button = gr.Button("Process PDF")
            
            with gr.Column(scale=3):
                chatbot = gr.Chatbot(height=450)
                message = gr.Textbox(
                    label="Ask a question about the document",
                    placeholder="Type your question here...",
                    lines=2
                )
                with gr.Row():
                    submit = gr.Button("Send")
                    clear = gr.Button("Clear Chat")

        def respond(message, history):
            bot_message = qa_system.chat(message, history)
            history.append((message, bot_message))
            return "", history

        def clear_chat():
            return None, None

        process_button.click(
            fn=qa_system.process_pdf,
            inputs=[pdf_input],
            outputs=[chatbot, message]
        )

        submit.click(
            fn=respond,
            inputs=[message, chatbot],
            outputs=[message, chatbot]
        )
        message.submit(
            fn=respond,
            inputs=[message, chatbot],
            outputs=[message, chatbot]
        )
        clear.click(
            fn=clear_chat,
            outputs=[message, chatbot],
        )

    return interface

if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(share=True)

Using device: cuda




* Running on local URL:  http://127.0.0.1:7863
* Running on public URL: https://0e7338b92c6a615e0c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Loading from cache...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (2048). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]