In [1]:
import pandas as pd
import numpy as np
from transformers import T5Tokenizer, T5ForConditionalGeneration

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
 
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

def summarize(text):
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    return tokenizer.decode(outputs[0])

# Example usage:
text = "Binary numbers are composed of only two digits: 0 and 1. They are fundamental in computer science because digital electronic circuits are built upon this system."
summary = summarize(text)
print(summary)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


<pad> binary numbers are composed of only two digits: 0 and 1. They are fundamental in computer science because digital electronic circuits are built upon this system. the binary numbers are composed of only two digits: 0 and 1.</s>


In [9]:
# Importing evaluate library
import evaluate

# Load the BLEU and ROUGE metrics
bleu_metric = evaluate.load("bleu")
rouge_metric = evaluate.load("rouge")

# Example sentences (non-tokenized)
reference = ["Binary numbers are composed of only two digits: 0 and 1. They are fundamental in computer science because digital electronic circuits are built upon this system."]
candidate = ["Binary numbers are composed of only two digits: 0 and 1. They are fundamental in computer science because digital electronic circuits are built upon this system."]

# BLEU expects plain text inputs
bleu_results = bleu_metric.compute(predictions=candidate, references=reference)
print(f"BLEU Score: {bleu_results['bleu'] * 100:.2f}")

# ROUGE expects plain text inputs
rouge_results = rouge_metric.compute(predictions=candidate, references=reference)

# Access ROUGE scores (no need for indexing into the result)
print(f"ROUGE-1 F1 Score: {rouge_results['rouge1']:.2f}")
print(f"ROUGE-L F1 Score: {rouge_results['rougeL']:.2f}")

BLEU Score: 100.00
ROUGE-1 F1 Score: 1.00
ROUGE-L F1 Score: 1.00


In [None]:
# Calculating sentiment analysis scores
from textblob import TextBlob

# Example text
text_1 =  "Solving quadratic equations often involves factoring, completing the square, or using the quadratic formula. The formula, $x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}$, is particularly useful for complex cases."
text_2 = "Newton's laws of motion describe the relationship between a body and the forces acting upon it, and its motion in response to those forces. The first law is often called the law of inertia."

# Analyze sentiment for text_1
blob_1 = TextBlob(text_1)
polarity_1 = blob_1.sentiment.polarity
subjectivity_1 = blob_1.sentiment.subjectivity

print(f"Text 1 - Polarity: {polarity_1}, Subjectivity: {subjectivity_1}")

# Analyze sentiment for text_2
blob_2 = TextBlob(text_2)
polarity_2 = blob_2.sentiment.polarity
subjectivity_2 = blob_2.sentiment.subjectivity

print(f"Text 2 - Polarity: {polarity_2}, Subjectivity: {subjectivity_2}")

Text 1 - Polarity: 0.0, Subjectivity: 0.2
Text 2 - Polarity: 0.125, Subjectivity: 0.16666666666666666
