In [1]:
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Define constants
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MAX_TARGET_LENGTH = 512  # Maximum length for the output summary

# Load the pre-trained FLAN-T5 model and tokenizer
model_name = "kconstable/summation-flant5"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(DEVICE)
tokenizer = AutoTokenizer.from_pretrained(model_name)

def get_prompt(doc):
    """Format prompts for text summarization using FLAN-T5 models."""
    prompt = "Summarize the following document:\n\n"
    prompt += f"{doc}"
    prompt += "\n\n Summary:"
    return prompt

def get_response(prompt, model, tokenizer):
    """Generate a text summary from the prompt."""
    # Tokenize the prompt
    encoded_input = tokenizer(
        prompt,
        return_tensors="pt",
        add_special_tokens=False,
        padding='max_length',
        truncation=True,
        max_length=1024  # Adjust if the input text is large
    )

    # Move the inputs to the same device as the model (GPU or CPU)
    model_inputs = encoded_input.to(DEVICE)

    # Generate the response
    generated_ids = model.generate(
        **model_inputs,
        max_length=MAX_TARGET_LENGTH,
        num_beams=4,  # You can adjust the number of beams for beam search
        early_stopping=True
    )

    # Decode the response back to text
    decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return decoded_output

def print_example(summary, document):
    """Print the original document, the human summary, and the generated summary."""
    print(f"Document: {summary['index']}")
    print("=" * 100)
    print(document)
    print("\nHuman Summary", '-' * 88)
    print(summary['human'], '\n')
    print("GenAI Summary", '-' * 88)
    print(summary['genai'])
    print("=" * 100, '\n')

# Example usage:
# Example document for summarization
document = """
Machine learning is a branch of artificial intelligence that focuses on building systems that learn from data and improve over time.
It involves the use of statistical methods and algorithms to model and analyze patterns in data, with applications ranging from
healthcare to finance and beyond.
"""

# Prepare the prompt
prompt = get_prompt(document)

# Get the response (summary)
summary = get_response(prompt, model, tokenizer)

# Print the result
print("Generated Summary:")
print(summary)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


adapter_config.json:   0%|          | 0.00/637 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/7.10M [00:00<?, ?B/s]

OSError: Can't load tokenizer for 'kconstable/summation-flant5'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'kconstable/summation-flant5' is the correct path to a directory containing all relevant files for a T5TokenizerFast tokenizer.

In [4]:
import pandas as pd
import numpy as np
import json
import random
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import date
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    AutoModelForCausalLM,
    GenerationConfig,
    TrainingArguments,
    Trainer,
    pipeline,
    BitsAndBytesConfig,
    DataCollatorForSeq2Seq
)
import torch
import evaluate
from peft import (
    LoraConfig,
    get_peft_model,
    TaskType,
    PeftModel,
    PeftConfig,
)
from huggingface_hub import login

In [2]:
from datasets import load_dataset

# summarization dataset
data_xsum = load_dataset("xsum")
data_xsum

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Generating train split:   0%|          | 0/204045 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/11332 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11334 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['document', 'summary', 'id'],
        num_rows: 204045
    })
    validation: Dataset({
        features: ['document', 'summary', 'id'],
        num_rows: 11332
    })
    test: Dataset({
        features: ['document', 'summary', 'id'],
        num_rows: 11334
    })
})

In [23]:
from datasets import load_dataset

# Load the XSum summarization dataset
data_xsum = load_dataset("xsum")

# Display the first 5 entries of the dataset
print(data_xsum["train"][:5])



In [5]:
# FLANT5 Base Model
base_model_name = 'google/flan-t5-base'
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

# base model for training
base_model = AutoModelForSeq2SeqLM.from_pretrained(base_model_name, torch_dtype=torch.bfloat16, device_map='auto')

# original model for evaluation
original_model = AutoModelForSeq2SeqLM.from_pretrained(base_model_name, torch_dtype=torch.bfloat16, device_map='auto')

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [6]:
def get_prompt(doc):
    """Format prompts for text summarization using FLAN-T5 models"""

    # T5 models require the 'summarize' keyword generate text summaries
    prompt = "Summarize the following document:\n\n"
    prompt += f"{doc}"
    prompt += "\n\n Summary:"
    return prompt

In [7]:


def get_response(prompt, model, tokenizer):
  """ Generate a text summary from the prompt """

  # tokenize the prompt
  # truncation and padding is required to ensure each document
  # is the same length
  encoded_input = tokenizer(
      prompt,
      return_tensors="pt",
      add_special_tokens=False,
      padding='max_length',
      truncation=True
      )
  # move the inputs to the same DEVICE
  model_inputs = encoded_input.to(DEVICE)

  # generate the response
  generated_ids = model.generate(
      **model_inputs,
      max_new_tokens=MAX_TARGET_LENGTH,
      )

  # decode the response back to text
  decoded_output = tokenizer.batch_decode(generated_ids,skip_special_tokens=True)[0]

  return decoded_output

In [8]:

def print_example(summary,document ):
  """Print the original document, the human summary and
  the generated summary
   """
  # print the summaries
  print(f"Document:{summary['index']}")
  print("="*100)
  print(document)
  print("\nHuman Summary",'-'*88)
  print(summary['human'],'\n')
  print("GenAI Summary",'-'*88)
  print(summary['genai'])
  print("="*100,'\n')


In [9]:
def generate_responses(data, model, indexes, verbose=True):
    """Generate summaries for each example in the dataset"""

    # accumulator
    summaries=[]

    # for each example in the indexes
    for idx in indexes:
        # extract the document text and human summary
        document = data['test'][idx]['document']
        human_summary = data['test'][idx]['summary']

        # prepare the prompt
        prompt = get_prompt(document)

        # get the response
        genai_summary = get_response(prompt, model, tokenizer)

        # consolidate the example inputs, human summary and generated summary
        summary = {'index':idx,'human':human_summary,'genai':genai_summary}

        # accumulate summaries
        summaries.append(summary)

        # optional print dialogue & summary
        if verbose:
          print_example(summary,document)

    return summaries

In [11]:

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda')

In [13]:
PREFIX = 'Summarize: '
MAX_INPUT_LENGTH = 1024
MAX_TARGET_LENGTH = 128

In [14]:
# sample generations from the base model
summaries = generate_responses(
    data_xsum,
    base_model,
    [100,200],
    True
    )
summaries

Document:100
The British Transport Police said the move was a "proportionate response" in the face of a mounting terrorism threat.
Specially trained officers will begin carrying the stun weapons over the next few weeks.
It brings the Scottish force into line with their counterpart in England, where Tasers have been used since 2011.
The weapons are used to incapacitate suspects through the use of an electric current.
Temporary Assistant Chief Constable Alun Thomas said: "This decision is not based on specific intelligence of any criminal behaviour or imminent threat, but will allow us the option to deploy Taser devices where, in the course of their duty, an officer needs to protect the public or themselves by using force.
"The current threat to the UK from international terrorism remains 'severe', meaning an attack is highly likely.
"Recent terrorist attacks across the world are a stark reminder that the threat from terrorism is a genuine risk, and it is important that we keep our secur

[{'index': 100,
  'human': "Officers who police Scotland's railways are to be armed with Tasers in a bid to increase security on the network.",
  'genai': "Tasers are to be used by Scotland's police force to incapacitate suspects."},
 {'index': 200,
  'human': 'Harry Potter spin-off Fantastic Beasts and Where To Find Them has had the biggest UK box office opening weekend of the year so far.',
  'genai': 'Harry Potter: The Twilight Saga has topped the UK and US box office, taking an estimated $18.9m (£71.5m) in its opening weekend.'}]

In [15]:
def preprocess_for_summarization(examples):
  """Prepare the dataset for instrucion fine-tuning """

  # document inputs
  docs = [PREFIX + doc for doc in examples['document']]

  # tokenize the documents
  model_inputs = tokenizer(
      docs,
      max_length=MAX_INPUT_LENGTH,
      truncation=True
      )

  # tokenize human-generated summaries
  labels = examples['summary']
  label_ids = tokenizer(
      text_target=['summary'],
      max_length=MAX_TAexamplesRGET_LENGTH,
      padding='max_length',
      truncation=True
  )

  model_inputs['labels'] = label_ids['input_ids']

  return model_inputs

In [16]:
# tokenize all examples in each dataset, remove unnecessary features
tokenized_datasets = data_xsum.map(preprocess_for_summarization,batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['document','summary','id'])
tokenized_datasets

Map:   0%|          | 0/204045 [00:00<?, ? examples/s]

Map:   0%|          | 0/11332 [00:00<?, ? examples/s]

Map:   0%|          | 0/11334 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 204045
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 11332
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 11334
    })
})

In [17]:
# LoRA configuation
lora_config = LoraConfig(
    r = 32, # dimension of adaptors, rank
    lora_alpha=64, # alpha scaling
    target_modules = ["q","v"], # add LoRA adaptors to these layers in the base model
    lora_dropout=0.10, # regularization
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM # text summarization
)

# create the PEFT model
peft_model = get_peft_model(base_model, lora_config)
peft_model.print_trainable_parameters()

trainable params: 3,538,944 || all params: 251,116,800 || trainable%: 1.4093


In [24]:
# Data Collator: This function dynamically sets the padding during training
# ensures prompts of are equal length
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=base_model)



project = "FlanT5-finetune-Text-Summarizer"
base_model_name = "biomistral"
run_name = base_model_name + "-" + project
output_dir = "./" + run_name



# Training Config
config_training = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=5e-4,
    logging_steps=1,
    max_steps=100
)

# Trainer
trainer = Trainer(
    model=peft_model,
    args=config_training,
    data_collator = data_collator,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)

# train
trainer.train()


# Save the fine-tuned model and tokenizer
peft_model.save_pretrained(output_dir)  # Save the fine-tuned model
tokenizer.save_pretrained(output_dir)  # Save the tokenizer

NameError: name 'DataCollatorForSeq2Seq' is not defined

In [6]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch

# Define the device for inference (GPU or CPU)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MAX_TARGET_LENGTH = 300  # Adjust as needed

# Load the fine-tuned model and tokenizer from the output directory
output_dir = "./biomistral-FlanT5-finetune-Text-Summarizer"
model = AutoModelForSeq2SeqLM.from_pretrained(output_dir).to(DEVICE)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

# Define the prompt for summarization
def get_prompt(doc):
    """Format prompts for text summarization using FLAN-T5 models."""
    prompt = "Summarize the following document:\n\n"
    prompt += f"{doc}"
    prompt += "\n\n Summary:"
    return prompt

# Generate response (summary) from the model
def get_response(prompt, model, tokenizer):
    """Generate a text summary from the prompt."""
    # Tokenize the prompt
    encoded_input = tokenizer(
        prompt,
        return_tensors="pt",
        add_special_tokens=True,
        padding='max_length',
        truncation=True,
        max_length=3024  # Adjust if the input text is large
    )

    # Move the inputs to the same device as the model (GPU or CPU)
    model_inputs = encoded_input.to(DEVICE)

    # Generate the response
    generated_ids = model.generate(
        **model_inputs,
        max_length=MAX_TARGET_LENGTH,
        num_beams=4,  # You can adjust the number of beams for beam search
        early_stopping=True
    )

    # Decode the response back to text
    decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return decoded_output

# Example document for summarization
document = """
Education is a fundamental pillar in shaping the future of individuals and societies. It provides the knowledge and skills necessary to navigate the world, fosters critical thinking, and promotes social and cultural awareness. A strong educational system empowers people to reach their full potential, contributes to economic development, and reduces inequality. As technology continues to transform various industries, education must evolve to prepare students for the demands of the modern workforce. Investing in education is essential for building a more equitable and prosperous future for all.
"""

# Prepare the prompt
prompt = get_prompt(document)

# Get the response (summary)
summary = get_response(prompt, model, tokenizer)

# Print the result
print("Generated Summary:")
print(summary)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Generated Summary:
Understand the importance of education.


In [3]:
import pdfplumber
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
import os

# Load fine-tuned model and tokenizer
output_dir = "./biomistral-FlanT5-finetune-Text-Summarizer"  # Your output directory where the model was saved
tokenizer = AutoTokenizer.from_pretrained(output_dir)
model = AutoModelForSeq2SeqLM.from_pretrained(output_dir)

# Ensure the model is on the correct device (GPU or CPU)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
model.to(DEVICE)

# Maximum target length (800 words max, adjust for tokens)
MAX_TARGET_LENGTH = 300  # Adjusted for reasonable summary length

# Function to extract text from a PDF file using pdfplumber
def extract_text_from_pdf(pdf_file_path):
    if not os.path.exists(pdf_file_path):
        raise FileNotFoundError(f"The file at {pdf_file_path} does not exist.")

    with pdfplumber.open(pdf_file_path) as pdf:
        text = ""
        for page in pdf.pages:
            text += page.extract_text()
    return text

# Function to trim text to a reasonable length
def trim_text(text, max_words=800):
    words = text.split()
    return " ".join(words[:max_words])

# Function to format the prompt for summarization
def get_prompt(doc):
    prompt = "Summarize the following document:\n\n"
    prompt += f"{doc}"
    prompt += "\n\n Summary:"
    return prompt

# Function to get the summary of the document
def get_response(prompt, model, tokenizer):
    # Tokenize the prompt
    encoded_input = tokenizer(
        prompt,
        return_tensors="pt",
        padding="longest",  # Make sure padding works as expected
        truncation=True,
        max_length=1024
    )

    # Move the inputs to the same device as the model
    model_inputs = encoded_input.to(DEVICE)

    # Generate the summary
    generated_ids = model.generate(
        **model_inputs,
        max_length=MAX_TARGET_LENGTH,
        num_beams=3,  # Lower number of beams
        early_stopping=True,
        no_repeat_ngram_size=3  # Prevent repetition
    )

    # Decode the response
    decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return decoded_output

# Function to upload and summarize the PDF
def summarize_pdf(pdf_file_path):
    try:
        # Extract text from the PDF
        text = extract_text_from_pdf(pdf_file_path)

        # Trim the extracted text to ensure it doesn't exceed 800 words
        text = trim_text(text, max_words=1100)

        # Prepare the prompt
        prompt = get_prompt(text)

        # Get the summary
        summary = get_response(prompt, model, tokenizer)

        return summary
    except FileNotFoundError as e:
        return str(e)
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Example Usage
pdf_file_path = "1000-Word-Essay-on-discipline.pdf"  # Replace with the actual path
summary = summarize_pdf(pdf_file_path)
print("Generated Summary:")
print(summary)


Generated Summary:
Decide if you want to discipline yourself.


In [4]:
pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Downloading pyngrok-7.2.1-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.1


In [5]:
!ngrok config add-authtoken 2pnVmWy04kYVi1iqeua6vEuSP5o_2VWdescXdEfSWGp8npCXN

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [16]:
!streamlit run summarizer.py --server.port 8501 --server.headless true &> /dev/null &

In [19]:
from pyngrok import ngrok

# Start the Streamlit app in the background
!streamlit run summarizer.py &>/dev/null &

# Expose the app on port 8502 (the correct port where Streamlit is running)
public_url = ngrok.connect(8501)  # Ensure the port matches the Streamlit app port
print(f"Streamlit app is live at: {public_url}")

Streamlit app is live at: NgrokTunnel: "https://8b82-34-142-181-141.ngrok-free.app" -> "http://localhost:8501"


In [21]:
from pyngrok import ngrok

# Close the ngrok tunnel
ngrok.kill()

In [20]:
!curl http://localhost:8501

<!--
 Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2024)

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->

<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta
      name="viewport"
      content="width=device-width, initial-scale=1, shrink-to-fit=no"
    />
    <link rel="shortcut icon" href="./favicon.png" />
    <link
      rel="preload"
      href="./static/media/SourceSansPro-Regular.DZLUzqI4.woff2"
      as="font"
      type="font/woff2"
      crossorigin


In [1]:
!streamlit run summarizer.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.142.181.141:8501[0m
[0m
[34m  Stopping...[0m
[34m  Stopping...[0m


In [10]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.41.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.41.0-py2.py3-none-any.whl (23.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.4/23.4 MB[0m [31m37.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m42.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m

In [7]:
pip install huggingface_hub



In [8]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: fineGrained).
The token `logintoken` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-a

In [9]:
from huggingface_hub import HfApi

repo_name = "ShahzaibDev/flant5-finetuned-summarizer"
local_model_path = "./biomistral-FlanT5-finetune-Text-Summarizer"

api = HfApi()
api.create_repo(repo_name, private=False)

# Upload the entire directory
from huggingface_hub import upload_folder
upload_folder(
    repo_id=repo_name,
    folder_path=local_model_path,
    commit_message="Upload fine-tuned BioMistral model"
)


adapter_model.safetensors:   0%|          | 0.00/14.2M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/14.2M [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/28.4M [00:00<?, ?B/s]

Upload 8 LFS files:   0%|          | 0/8 [00:00<?, ?it/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

events.out.tfevents.1734005487.9b83ea8da157.5872.2:   0%|          | 0.00/33.1k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ShahzaibDev/flant5-finetuned-summarizer/commit/c22a98fdf83a483e8d55e6133171dcf53d45179a', commit_message='Upload fine-tuned BioMistral model', commit_description='', oid='c22a98fdf83a483e8d55e6133171dcf53d45179a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/ShahzaibDev/flant5-finetuned-summarizer', endpoint='https://huggingface.co', repo_type='model', repo_id='ShahzaibDev/flant5-finetuned-summarizer'), pr_revision=None, pr_num=None)

In [14]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch

# Define the device for inference (GPU or CPU)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MAX_TARGET_LENGTH = 1000  # Increase this for longer summaries

# Load the fine-tuned model and tokenizer from the output directory
output_dir = "./biomistral-FlanT5-finetune-Text-Summarizer"
fine_tuned_model = AutoModelForSeq2SeqLM.from_pretrained(output_dir).to(DEVICE)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

# Load the base FLAN-T5 model
base_model_name = "google/flan-t5-base"
base_model = AutoModelForSeq2SeqLM.from_pretrained(base_model_name).to(DEVICE)

# Combine the base model with your fine-tuned model (use the fine-tuned model for generation)
def get_prompt(doc):
    """Format prompts for text summarization using FLAN-T5 models."""
    prompt = "Summarize the following document:\n\n"
    prompt += f"{doc}"
    prompt += "\n\n Summary:"
    return prompt

# Generate response (summary) from the model
def get_response(prompt, model, tokenizer):
    """Generate a text summary from the prompt."""
    # Tokenize the prompt
    encoded_input = tokenizer(
        prompt,
        return_tensors="pt",
        add_special_tokens=True,
        padding='max_length',
        truncation=True,
        max_length=3024  # Adjust if the input text is large
    )

    # Move the inputs to the same device as the model (GPU or CPU)
    model_inputs = encoded_input.to(DEVICE)

    # Generate the response using the fine-tuned model on top of the base model
    generated_ids = model.generate(
        **model_inputs,
        max_length=MAX_TARGET_LENGTH,
        num_beams=8,  # Increased for more exploration of output space
        early_stopping=True,
        no_repeat_ngram_size=3,  # Avoid repetition
        temperature=0.7,  # Introduces more diversity in the output
        top_k=50  # Controls randomness of the generation
    )

    # Decode the response back to text
    decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return decoded_output

# Example document for summarization
document = """
Education is a fundamental pillar in shaping the future of individuals and societies. It provides the knowledge and skills necessary to navigate the world, fosters critical thinking, and promotes social and cultural awareness. A strong educational system empowers people to reach their full potential, contributes to economic development, and reduces inequality. As technology continues to transform various industries, education must evolve to prepare students for the demands of the modern workforce. Investing in education is essential for building a more equitable and prosperous future for all.
"""

# Prepare the prompt
prompt = get_prompt(document)

# Get the response (summary) from the fine-tuned model
summary = get_response(prompt, fine_tuned_model, tokenizer)

# Print the result
print("Generated Summary:")
print(summary)




Generated Summary:
Understand the importance of education.
