In [1]:
# Loading necessary libraries
from datasets import load_dataset   # loading pubmed dataset
import re   # for using regex functions
from transformers import T5Tokenizer, T5ForConditionalGeneration  # for model training 
import ipywidgets as widgets   # for displaying widgets
from IPython.display import display, HTML, clear_output
from ipywidgets import Output, Button

# Load the PubMed Summarization dataset
dataset = load_dataset("ccdv/pubmed-summarization", "document")

# Access the 'train' split of the dataset
train_dataset = dataset['train']

# Preprocessing function
def preprocess_text(text):
    # Remove special characters and unnecessary spaces
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'[^\w\s]', '', text)
    return text

# Load T5 model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Function to generate summary
def generate_summary(article_text):
    inputs = tokenizer.encode("summarize: " + article_text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Function to handle file upload and summarization
def on_upload(btn):
    if len(upload_widget.value) == 0:
        return  # No file selected
    
    file_content = upload_widget.value[list(upload_widget.value.keys())[0]]['content'].decode('utf-8')
    preprocessed_article = preprocess_text(file_content)
    summary = generate_summary(preprocessed_article)
    
    with output:
        clear_output(wait=True)
        display(HTML("<h3>Original Article</h3>"))
        display(HTML("<p>{}</p>".format(file_content.replace('\n', '<br>'))))

        display(HTML("<h3>Summarized Article</h3>"))
        display(HTML("<p>{}</p>".format(summary.replace('\n', '<br>'))))

# Create file upload widget
upload_widget = widgets.FileUpload(
    accept='.txt',  # Accept .txt files
    multiple=False  # Do not allow multiple file uploads
)

# Create button widget to trigger upload and summarize action
upload_button = Button(description='Upload and Summarize')
upload_button.on_click(on_upload)

# Output widget to display results
output = Output()

# Display widgets
display(widgets.VBox([upload_widget, upload_button]))
display(output)


SyntaxError: '(' was never closed (3756976782.py, line 52)