In [1]:
!pip install -q transformers datasets sentence-transformers accelerate evaluate gradio pypdf2 python-docx

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    AutoModelForTokenClassification,
    AutoModelForQuestionAnswering,
    pipeline,
)
from sentence_transformers import SentenceTransformer
import numpy as np
import evaluate
import gradio as gr
import re
import pandas as pd
import os
import PyPDF2
import docx

device = 0 if torch.cuda.is_available() else -1
print("Using device:", "GPU" if device == 0 else "CPU")

Using device: CPU


In [3]:
# PubMed summarization dataset: medical articles + abstracts
raw_datasets = load_dataset("ccdv/pubmed-summarization")

print(raw_datasets)
print("Train size:", len(raw_datasets["train"]))
print("Validation size:", len(raw_datasets["validation"]))
print("Test size:", len(raw_datasets["test"]))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

section/train-00000-of-00005.parquet:   0%|          | 0.00/210M [00:00<?, ?B/s]

section/train-00001-of-00005.parquet:   0%|          | 0.00/208M [00:00<?, ?B/s]

section/train-00002-of-00005.parquet:   0%|          | 0.00/207M [00:00<?, ?B/s]

section/train-00003-of-00005.parquet:   0%|          | 0.00/211M [00:00<?, ?B/s]

section/train-00004-of-00005.parquet:   0%|          | 0.00/210M [00:00<?, ?B/s]

section/validation-00000-of-00001.parque(…):   0%|          | 0.00/59.0M [00:00<?, ?B/s]

section/test-00000-of-00001.parquet:   0%|          | 0.00/58.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/119924 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/6633 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6658 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['article', 'abstract'],
        num_rows: 119924
    })
    validation: Dataset({
        features: ['article', 'abstract'],
        num_rows: 6633
    })
    test: Dataset({
        features: ['article', 'abstract'],
        num_rows: 6658
    })
})
Train size: 119924
Validation size: 6633
Test size: 6658


In [4]:
def clean_text(text: str) -> str:
    if text is None:
        return ""
    # Normalize whitespace and lowercase
    text = text.replace("\n", " ")
    text = re.sub(r"\s+", " ", text)
    text = text.lower().strip()
    return text

def preprocess_example(example):
    article = clean_text(example["article"])
    abstract = clean_text(example["abstract"])
    return {
        "article": article,
        "abstract": abstract,
        # this dataset may not have section_names so we just keep empty placeholder
        "section_names": ""
    }

cleaned_datasets = raw_datasets.map(
    preprocess_example,
    batched=False
)

# Use a subset of train for speed, but still >= 50,000
train_cleaned = cleaned_datasets["train"].select(range(60000))
val_cleaned   = cleaned_datasets["validation"]
test_cleaned  = cleaned_datasets["test"]

print("Train (used) size:", len(train_cleaned))
print("Example keys:", train_cleaned[0].keys())
print("Sample article:\n", train_cleaned[0]["article"][:400])

Map:   0%|          | 0/119924 [00:00<?, ? examples/s]

Map:   0%|          | 0/6633 [00:00<?, ? examples/s]

Map:   0%|          | 0/6658 [00:00<?, ? examples/s]

Train (used) size: 60000
Example keys: dict_keys(['article', 'abstract', 'section_names'])
Sample article:
 a recent systematic analysis showed that in 2011 , 314 ( 296 - 331 ) million children younger than 5 years were mildly , moderately or severely stunted and 258 ( 240 - 274 ) million were mildly , moderately or severely underweight in the developing countries . in iran a study among 752 high school girls in sistan and baluchestan showed prevalence of 16.2% , 8.6% and 1.5% , for underweight , overwe


In [5]:
# Take a small sample of cleaned train data
train_sample = train_cleaned.select(range(5))
df_train_sample = train_sample.to_pandas()

# Create preview columns to avoid huge text in one cell
df_train_sample["article_preview"] = df_train_sample["article"].str.slice(0, 300)
df_train_sample["abstract_preview"] = df_train_sample["abstract"].str.slice(0, 200)

# Only show previews (section_names is placeholder)
df_train_sample_display = df_train_sample[["article_preview", "abstract_preview"]]

print("Sample of cleaned training dataset (preview):")
df_train_sample_display

Sample of cleaned training dataset (preview):


Unnamed: 0,article_preview,abstract_preview
0,a recent systematic analysis showed that in 20...,background : the present study was carried out...
1,it occurs in more than 50% of patients and may...,backgroundanemia in patients with cancer who a...
2,"tardive dystonia ( td ) , a rarer side effect ...",tardive dystonia ( td ) is a serious side effe...
3,"lepidoptera include agricultural pests that , ...",many lepidopteran insects are agricultural pes...
4,syncope is caused by transient diffuse cerebra...,we present an unusual case of recurrent cough ...


In [6]:
# Number of examples to use for evaluation (can increase later)
EVAL_SAMPLES = 50

eval_dataset = val_cleaned.select(range(min(EVAL_SAMPLES, len(val_cleaned))))
print("Eval subset size:", len(eval_dataset))


Eval subset size: 50


In [7]:
!pip install rouge_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import evaluate
rouge_metric = evaluate.load("rouge")
bleu_metric = evaluate.load("bleu")

def evaluate_summarizer(model_name, max_summary_len=128, batch_size=2):
    print(f"\n=== Evaluating summarization model: {model_name} ===")

    summarizer = pipeline(
        "summarization",
        model=model_name,
        tokenizer=model_name,
        device=device
    )

    references = []
    predictions = []

    for i in range(0, len(eval_dataset), batch_size):
        batch = eval_dataset[i:i+batch_size]
        inputs = batch["article"]
        refs = batch["abstract"]

        # Generate summaries
        summaries = summarizer(
            inputs,
            truncation=True,
            max_length=max_summary_len,
            min_length=30,
            do_sample=False
        )
        preds = [s["summary_text"] for s in summaries]

        references.extend(refs)
        predictions.extend(preds)

        print(f"Processed {min(i+batch_size, len(eval_dataset))}/{len(eval_dataset)}", end="\r")

    # ROUGE expects untokenized strings
    rouge_scores = rouge_metric.compute(
        predictions=predictions,
        references=references,
        use_stemmer=True
    )

    # For BLEU, pass untokenized predictions and format references as list of lists of untokenized strings
    bleu_scores = bleu_metric.compute(
        predictions=predictions,  # Predictions are already a list of untokenized strings
        references=[[r] for r in references] # Each reference is a single string, wrap it in a list to match expected format: List[List[str]]
    )

    print("\nROUGE scores:", rouge_scores)
    print("BLEU score:", bleu_scores)

    # Return flat dict for easy comparison
    result = {
        "rouge1": rouge_scores.get("rouge1", 0.0),
        "rouge2": rouge_scores.get("rouge2", 0.0),
        "rougeL": rouge_scores.get("rougeL", 0.0),
        "rougeLsum": rouge_scores.get("rougeLsum", 0.0),
        "bleu": bleu_scores.get("bleu", 0.0),
    }

    # Convert reference & predicted summaries into binary labels
    # 1 = important keywords present, 0 = absent
    def to_binary_labels(refs, preds):
        labels_true = []
        labels_pred = []

        for ref, pred in zip(refs, preds):
            # Pick important keywords from reference summary
            keywords = ref.split()[:10]  # first 10 keywords

            # Check if keywords appear in prediction
            true_label = 1
            pred_label = 1 if any(k in pred for k in keywords) else 0

            labels_true.append(true_label)
            labels_pred.append(pred_label)

        return labels_true, labels_pred

    # Generate binary labels for metrics
    y_true, y_pred = to_binary_labels(references, predictions)

    accuracy  = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall    = recall_score(y_true, y_pred, zero_division=0)
    f1        = f1_score(y_true, y_pred, zero_division=0)

    # Print results
    print("\nAdditional Metrics:")
    print("Accuracy :", accuracy)
    print("Precision:", precision)
    print("Recall   :", recall)
    print("F1 Score :", f1)

    # Add these metrics to result dictionary
    result.update({
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1
    })

    return result

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=737ae53a328f9fe1f81ebc2b2ba4351debb784f3ec12cf991c61640a7a82403a
  Stored in directory: /root/.cache/pip/wheels/85/9d/af/01feefbe7d55ef5468796f0c68225b6788e85d9d0a281e7a70
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

In [8]:
model_1_name = "t5-small"
model_2_name = "google/flan-t5-base"

scores_model_1 = evaluate_summarizer(model_1_name, max_summary_len=128)
scores_model_2 = evaluate_summarizer(model_2_name, max_summary_len=128)

# Build comparison DataFrame
comparison_df = pd.DataFrame(
    [scores_model_1, scores_model_2],
    index=[model_1_name, model_2_name]
)

print("\n=== Model Comparison (ROUGE + BLEU) ===")
comparison_df



=== Evaluating summarization model: t5-small ===


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 2/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 4/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 6/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 8/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 10/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 12/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 14/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 16/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 18/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 20/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 22/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 24/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 26/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 28/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 30/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 32/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 34/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 36/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 38/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 40/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 42/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 44/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 46/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 48/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 50/50
ROUGE scores: {'rouge1': np.float64(0.19427040007908594), 'rouge2': np.float64(0.045057903273320724), 'rougeL': np.float64(0.12285881969827919), 'rougeLsum': np.float64(0.1230692354392432)}
BLEU score: {'bleu': 0.005999044065047178, 'precisions': [0.46179775280898877, 0.11145038167938931, 0.04708171206225681, 0.027777777777777776], 'brevity_penalty': 0.06623099378279179, 'length_ratio': 0.2692075015124017, 'translation_length': 2670, 'reference_length': 9918}

Additional Metrics:
Accuracy : 1.0
Precision: 1.0
Recall   : 1.0
F1 Score : 1.0

=== Evaluating summarization model: google/flan-t5-base ===


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cpu
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 2/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 4/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 6/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 8/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 10/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 12/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 14/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 16/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 18/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 20/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 22/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 24/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 26/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 28/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 30/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 32/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 34/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 36/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 38/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 40/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 42/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 44/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 46/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 48/50

Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=256) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Processed 50/50
ROUGE scores: {'rouge1': np.float64(0.14192627851959627), 'rouge2': np.float64(0.05444609752552962), 'rougeL': np.float64(0.10981366223798306), 'rougeLsum': np.float64(0.10963578296448057)}
BLEU score: {'bleu': 9.96912036149321e-05, 'precisions': [0.5463494667760459, 0.17621899059024806, 0.07596067917783736, 0.03367633302151544], 'brevity_penalty': 0.0007957884335685777, 'length_ratio': 0.12290784432345231, 'translation_length': 1219, 'reference_length': 9918}

Additional Metrics:
Accuracy : 0.98
Precision: 1.0
Recall   : 0.98
F1 Score : 0.98989898989899

=== Model Comparison (ROUGE + BLEU) ===


Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,bleu,accuracy,precision,recall,f1_score
t5-small,0.19427,0.045058,0.122859,0.123069,0.005999,1.0,1.0,1.0,1.0
google/flan-t5-base,0.141926,0.054446,0.109814,0.109636,0.0001,0.98,1.0,0.98,0.989899


In [9]:
ner_model_name = "d4data/biomedical-ner-all"

ner_tokenizer = AutoTokenizer.from_pretrained(ner_model_name)
ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_name)

ner_pipeline = pipeline(
    "ner",
    model=ner_model,
    tokenizer=ner_tokenizer,
    aggregation_strategy="simple",
    device=device
)

# Quick test on one abstract
sample_text = eval_dataset[0]["abstract"]
print("Sample abstract:\n", sample_text[:400])

ner_entities = ner_pipeline(sample_text[:500])
ner_entities[:10]


tokenizer_config.json:   0%|          | 0.00/373 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/266M [00:00<?, ?B/s]

Device set to use cpu


Sample abstract:
 background and aim : there is lack of substantial indian data on venous thromboembolism ( vte ) . the aim of this study was to provide real - world information on patient characteristics , management strategies , clinical outcomes , and temporal trends in vte.subjects and methods : multicentre retrospective registry involving 549 medical records of patients with confirmed diagnosis of vte ( deep v


[{'entity_group': 'Disease_disorder',
  'score': np.float32(0.9748868),
  'word': 'th',
  'start': 72,
  'end': 74},
 {'entity_group': 'Disease_disorder',
  'score': np.float32(0.9045048),
  'word': '##romboembolism',
  'start': 74,
  'end': 87},
 {'entity_group': 'Disease_disorder',
  'score': np.float32(0.7546227),
  'word': 'vte',
  'start': 90,
  'end': 93},
 {'entity_group': 'Disease_disorder',
  'score': np.float32(0.9998235),
  'word': 'vt',
  'start': 256,
  'end': 258},
 {'entity_group': 'Lab_value',
  'score': np.float32(0.8327091),
  'word': '549',
  'start': 328,
  'end': 331},
 {'entity_group': 'Disease_disorder',
  'score': np.float32(0.99753267),
  'word': 'vte',
  'start': 388,
  'end': 391},
 {'entity_group': 'Disease_disorder',
  'score': np.float32(0.82992494),
  'word': 'th',
  'start': 404,
  'end': 406},
 {'entity_group': 'Disease_disorder',
  'score': np.float32(0.9628831),
  'word': '##rom',
  'start': 406,
  'end': 409},
 {'entity_group': 'Disease_disorder',
  

In [10]:
SIM_DOCS = 2000  # number of docs to index for similarity

embed_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embed_model = SentenceTransformer(embed_model_name, device="cuda" if device == 0 else "cpu")

corpus = [train_cleaned[i]["abstract"] for i in range(SIM_DOCS)]
corpus_ids = list(range(SIM_DOCS))

print("Building embeddings for similarity search on", len(corpus), "documents...")
corpus_embeddings = embed_model.encode(corpus, convert_to_numpy=True, show_progress_bar=True)
corpus_embeddings = corpus_embeddings / np.linalg.norm(corpus_embeddings, axis=1, keepdims=True)

def find_similar_abstracts(query_text, top_k=5):
    query_emb = embed_model.encode([query_text], convert_to_numpy=True)
    query_emb = query_emb / np.linalg.norm(query_emb, axis=1, keepdims=True)
    scores = np.dot(corpus_embeddings, query_emb[0])
    top_idx = np.argsort(-scores)[:top_k]
    results = []
    for idx in top_idx:
        results.append({
            "score": float(scores[idx]),
            "abstract": corpus[idx][:500]  # preview
        })
    return results

# Test similarity on one abstract
similar = find_similar_abstracts(eval_dataset[0]["abstract"])
similar[:3]


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Building embeddings for similarity search on 2000 documents...


Batches:   0%|          | 0/63 [00:00<?, ?it/s]

[{'score': 0.6135754585266113,
  'abstract': 'pulmonary thromboembolism is a very rare event in children , but the mortality rate is reported to be approximately 10% . the majority of children with thromboemboli have multiple risk factors , such as a catheter - related thrombosis , an infection , and a congenital prothrombotic disorder . hypereosinophilia is very rarely associated with pulmonary emboli in adults ; however , this condition has not been reported in children . we present a 12-year - old boy who had a pulmonary thromboembolism '},
 {'score': 0.6068921089172363,
  'abstract': 'background : agenesis of the inferior vena cava ( ivc ) as a cause of recurrent deep vein thrombosis ( dvt ) is uncommon.case:a 33-year - old male with no family history of thrombophilia , who had experienced multiple recurrent episodes of dvt over a 15-year period of unknown cause , was admitted into our hospital because of cellulitis in the right leg . computer tomography with contrast of the abdome

In [12]:
# Use the better model from your evaluation, e.g., FLAN-T5-base
app_summarizer_name = "google/flan-t5-base"  # or "t5-small" if memory is low

app_summarizer = pipeline(
    "summarization",
    model=app_summarizer_name,
    tokenizer=app_summarizer_name,
    device=device
)

Device set to use cpu


In [13]:
def read_report_file(file):
    if file is None:
        return ""

    file_path = getattr(file, "name", None)
    if file_path is None:
        return ""

    ext = os.path.splitext(file_path)[1].lower()

    try:
        if ext == ".txt":
            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                text = f.read()

        elif ext == ".pdf":
            text = ""
            with open(file_path, "rb") as f:
                reader = PyPDF2.PdfReader(f)
                for page in reader.pages:
                    text += page.extract_text() or ""

        elif ext == ".docx":
            doc = docx.Document(file_path)
            text = "\n".join([p.text for p in doc.paragraphs])

        else:
            # Try reading as plain text for unknown extensions
            with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                text = f.read()
    except Exception as e:
        print("Error reading file:", e)
        text = ""

    return text

In [14]:
def health_insight_from_file(file, user_question):
    try:
        print("\n===== DEBUG START =====")

        # 1) Read file
        text = read_report_file(file)
        print("Raw extracted text length:", len(text))
        if len(text.strip()) == 0:
            return "❌ Could not extract any text from the document.", "", "", ""

        # 2) Clean text
        text_clean = clean_text(text)
        print("Cleaned text length:", len(text_clean))

        # 3) Summarization
        print("Running summarizer...")
        summary_out = app_summarizer(
            text_clean,
            truncation=True,
            max_length=160,
            min_length=60,
            do_sample=False
        )
        # summarizer pipeline returns a list of dicts
        summary = summary_out[0]["summary_text"]

        # 4) NER → table
        print("Running NER...")
        ner_res = ner_pipeline(text_clean[:1500])
        ner_df = ner_to_table(ner_res)
        ner_str = ner_df.to_markdown(index=False)

        # 5) Similar cases (semantic similarity)
        print("Finding similar cases...")
        similar = find_similar_abstracts(summary, top_k=3)
        similar_lines = []
        for i, r in enumerate(similar, start=1):
            similar_lines.append(
                f"#{i} (score={r['score']:.3f}):\n{r['abstract']}\n"
            )
        similar_str = "\n".join(similar_lines)

        # 6) Q&A
        print("Running QA...")
        context_for_qa = text_clean[:3000]
        question = (user_question or "").strip()

        if question:
            # Use the user's question
            ans = answer_question(question, context_for_qa)
            qa_answer_str = f"Q: {question}\n\nA: {ans}"
        else:
            # Auto-questions if user did not ask anything
            auto_questions = [
                "What is the main diagnosis or issue in this report?",
                "What important findings or symptoms are described?",
                "What tests or investigations are mentioned?",
                "What treatment or recommendations are given?",
                "What follow-up or next steps are suggested?"
            ]
            qa_pairs = []
            for q in auto_questions:
                ans = answer_question(q, context_for_qa)
                qa_pairs.append(f"Q: {q}\nA: {ans}")
            qa_answer_str = "\n\n".join(qa_pairs)

        print("===== DEBUG END =====")

        # 🔴 VERY IMPORTANT: return ALL FOUR in this order
        summary_md = f"## 📝 Summary\n\n{summary}"
        ner_md = f"## 🧬 Named Entities\n\n{ner_str}"
        similar_md = f"## 🔍 Similar Cases\n\n{similar_str}"
        qa_md = f"## ❓ Q&A Section\n\n{qa_answer_str}"

        return summary_md, ner_md, similar_md, qa_md


    except Exception as e:
        print("Error in health_insight_from_file:", e)
        return f"An error occurred: {e}", "", "", ""


In [15]:
qa_model_name = "deepset/roberta-base-squad2"
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
qa_pipeline = pipeline(
    "question-answering",
    model=qa_model,
    tokenizer=qa_tokenizer,
    device=device
)

tokenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/496M [00:00<?, ?B/s]

Device set to use cpu


In [16]:
def ner_to_table(ner_res):
    """
    Convert NER list of dicts into a clean table.
    """
    rows = []
    for e in ner_res:
        rows.append({
            "Entity": e["word"],
            "Type": e["entity_group"],
            "Score": float(e["score"]),
            "Start": e["start"],
            "End": e["end"],
        })
    df = pd.DataFrame(rows)
    df = df.drop_duplicates(subset=["Entity", "Type"]).reset_index(drop=True)
    return df


def answer_question(question, context):
    """
    Run QA on one question and return just the answer text.
    """
    qa_out = qa_pipeline({"question": question, "context": context})
    return qa_out.get("answer", "").strip()


In [17]:
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 HealthInsight – Upload Medical Report & Get NLP Insights")

    file_input = gr.File(
        label="Upload medical report (TXT / PDF / DOCX)",
        file_types=[".txt", ".pdf", ".docx"]
    )

    question_input = gr.Textbox(
        lines=2,
        label="Your question about this report (optional)",
        placeholder="e.g. What is the medicine?"
    )

    run_button = gr.Button("Analyze Report")

    summary_output = gr.Markdown(label="Summary")
    ner_output = gr.Markdown(label="Named Entities (NER)")
    similar_output = gr.Markdown(label="Similar Cases (from dataset)")
    answer_output = gr.Markdown(label="Q&A Answer")

    run_button.click(
        fn=health_insight_from_file,
        inputs=[file_input, question_input],
        outputs=[summary_output, ner_output, similar_output, answer_output]
    )


demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3e2ee9d446fb1bf545.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


