In [1]:
# https://huggingface.co/transformers/v3.2.0/custom_datasets.html

In [2]:
!pip install transformers -U

Collecting transformers
  Downloading transformers-4.46.2-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.46.2-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m77.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.45.1
    Uninstalling transformers-4.45.1:
      Successfully uninstalled transformers-4.45.1
Successfully installed transformers-4.46.2


In [3]:
import pandas as pd

In [4]:
# https://www.kaggle.com/competitions/jigsaw-toxic-comment-classification-challenge/data

data = pd.read_excel("/kaggle/input/bart-dataset/PROJECT_DATA.xlsx")

data.head()

Unnamed: 0,SNO.,MCV,MCHC,HB,RBC,WBC,PLT,RDWCV,NEUTRO,LYMPHO,SUMMARY
0,Report1,87.7,30.1,7.3,2.77,10.0,189.0,11.4,50.1,43.2,Your report shows a hemoglobin level of 7.3 g/...
1,Report2,88.2,20.2,7.3,2.84,10.0,180.0,11.4,52.3,42.4,"In your CBC report, your hemoglobin level is 7..."
2,Report3,77.0,29.5,9.0,3.97,7.2,148.0,13.7,60.7,30.7,"In your CBC report, your hemoglobin level is 9..."
3,Report4,77.9,29.8,3.8,4.22,6.0,143.0,17.0,63.5,30.2,"Your CBC shows a hemoglobin level of 3.8 g/dL,..."
4,Report5,80.6,29.7,0.4,3.93,4.2,236.0,15.1,53.7,39.1,"In your CBC, the hemoglobin level is criticall..."


In [5]:
# data.to_csv("/content/drive/MyDrive/Youtube Tutorials/datasets/toxic_commnets.csv",index=False)

In [6]:
import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

import torch

from transformers import TrainingArguments, Trainer

from transformers import BertTokenizer, BertForSequenceClassification

In [7]:
pip install datasets


  pid, fd = os.forkpty()


Note: you may need to restart the kernel to use updated packages.


In [8]:
!rm -rf /root/.cache/huggingface


In [24]:
from transformers import BartForConditionalGeneration, BartTokenizer
from datasets import Dataset
from transformers import Trainer, TrainingArguments
import torch
import evaluate

# Load ROUGE metric
rouge = evaluate.load("rouge")

# Step 1: Load the pre-trained smaller BART model for summarization
model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')

# Load the tokenizer for the smaller BART model
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

# Step 2: Preprocessing function (same as before)
def preprocess_function(examples):
    input_texts = [
        " ".join([
            str(examples['MCV '][i]),
            str(examples['MCHC '][i]),
            str(examples['HB '][i]),
            str(examples['RBC '][i]),
            str(examples['WBC '][i]),
            str(examples['PLT'][i]),
            str(examples['RDWCV'][i]),
            str(examples['NEUTRO'][i]),
            str(examples['LYMPHO'][i])
        ]) for i in range(len(examples['MCV ']))  # Iterate over the batch
    ]
    
    model_inputs = tokenizer(input_texts, max_length=1024, padding="max_length", truncation=True)
    
    labels = tokenizer(examples['SUMMARY '], max_length=200, padding="max_length", truncation=True)
    
    model_inputs["labels"] = labels["input_ids"]
    
    return model_inputs

# Step 3: Create Dataset object from your pandas DataFrame (assuming 'data' is your DataFrame)
dataset = Dataset.from_pandas(data)

# Step 4: Apply the preprocessing function to the dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True)

# Step 5: Split the dataset into train/test (80/20 split)
train_test_split_datasets = tokenized_datasets.train_test_split(test_size=0.2)
train_dataset = train_test_split_datasets['train']
test_dataset = train_test_split_datasets['test']

# Step 6: Define the TrainingArguments (adjust these according to your setup)
training_args = TrainingArguments(
    output_dir='./results1_NEW',  # Output directory for checkpoints
    num_train_epochs=70,  # Number of training epochs
    per_device_train_batch_size=2,  # Batch size for training
    per_device_eval_batch_size=2,  # Batch size for evaluation
    warmup_steps=500,  # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,  # Strength of weight decay
    logging_dir='./logs',  # Directory for storing logs
    logging_steps=10,  # Log every X steps
    evaluation_strategy="epoch",  # Evaluate the model at the end of each epoch
    save_strategy="epoch",  # Save model at the end of each epoch
    load_best_model_at_end=True,  # Load the best model when finished training
    metric_for_best_model="loss",  # Metric for evaluating the best model
    push_to_hub=False,  # Set to True if you want to push to HuggingFace Hub
)

# Step 7: Define the compute_metrics function
def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    # Decode predictions and labels
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Compute ROUGE scores
    rouge_result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    # Format ROUGE results
    rouge_result = {key: value.mid.fmeasure for key, value in rouge_result.items()}

    return rouge_result

# Step 8: Define the Trainer with model, training arguments, datasets, and compute_metrics
trainer = Trainer(
    model=model,  # Pre-trained BART model
    args=training_args,  # TrainingArguments
    train_dataset=train_dataset,  # Tokenized training dataset
    eval_dataset=test_dataset,  # Tokenized test dataset
    compute_metrics=compute_metrics,  # Function for evaluation metrics
)

# Step 9: Train the model
trainer.train()

# Step 10: Evaluate the model on the test set
results = trainer.evaluate()

# Print the evaluation results (including ROUGE scores)
print("Test Results:", results)


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Map:   0%|          | 0/282 [00:00<?, ? examples/s]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss


TypeError: int() argument must be a string, a bytes-like object or a real number, not 'list'

In [23]:
import torch
from transformers import BartTokenizer, BartForConditionalGeneration

# Load the saved model and tokenizer
model = BartForConditionalGeneration.from_pretrained("./model_NEW")
tokenizer = BartTokenizer.from_pretrained("./model_NEW")

# Step 10: Save the model and tokenizer (optional)
model.save_pretrained("./model_NEW")
tokenizer.save_pretrained("./model_NEW")

# Step 11: Make predictions (optional, for inference)
def generate_summary(input_text):
    # Move the model to the GPU (if available)
    model.to('cuda' if torch.cuda.is_available() else 'cpu')

    # Tokenize the input text
    inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, padding="max_length", truncation=True)

    # Ensure the inputs are on the same device as the model (GPU or CPU)
    input_ids = inputs['input_ids'].to(model.device)
    attention_mask = inputs['attention_mask'].to(model.device)

    # Generate the summary (with beam search for better results)
    summary_ids = model.generate(input_ids, attention_mask=attention_mask, max_length=700, num_beams=4, early_stopping=True)

    # Decode the generated summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Example input text (lab values)
input_text = "MCV: 85 MCHC: 33 HB: 5.5 RBC: 1.5 WBC: 3.0 PLT: 250 RDWCV: 15 NEUTRO: 60 LYMPHO: 30"

# Generate the summary for the input text
summary = generate_summary(input_text)
print("Generated Summary:", summary)

Generated Summary: MCV: 85 MCHC: 33 RBC count, WBC: 5.5 RBC: 1.5 WBC, RDW-CV of 4.5% indicates mild anemia. WBC count is normal at 3.5 thousand cells/µL, with neutrophils at 4.0%. WBCs are normal at 6.5%, with RBCs at 5.0% and RBC levels at 5% respectively. The platelet count is adequate for clotting and clotting. RBC counts are low at 12.5%. RBC values are normal, but WBC levels are elevated at 12% and WBC values range from 10% to 20%. WMCs are low, indicating a need for further investigation.


In [15]:
# Step 10: Save the model and tokenizer (optional)
model.save_pretrained("./model")
tokenizer.save_pretrained("./model")


('./model/tokenizer_config.json',
 './model/special_tokens_map.json',
 './model/vocab.json',
 './model/merges.txt',
 './model/added_tokens.json')

In [17]:
from nltk.translate.bleu_score import corpus_bleu
from rouge_score import rouge_scorer
import torch
from tqdm import tqdm

# Function to evaluate the model
def evaluate_model(model, test_dataloader, device):
    model.eval()  # Set model to evaluation mode
    total_loss = 0
    predictions, references = [], []
    
    for batch in tqdm(test_dataloader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Forward pass
        with torch.no_grad():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()

        # Decode the predicted token IDs to text
        pred_ids = outputs.logits.argmax(dim=-1)
        decoded_preds = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
        decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

        # Store the predictions and references for BLEU/ROUGE
        predictions.extend(decoded_preds)
        references.extend(decoded_labels)

    # Compute average loss
    avg_loss = total_loss / len(test_dataloader)

    # Calculate BLEU score (using NLTK's corpus_bleu)
    bleu_score = corpus_bleu([[ref.split()] for ref in references], [pred.split() for pred in predictions])

    # Calculate ROUGE score (using rouge-score package)
    scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
    rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}
    for ref, pred in zip(references, predictions):
        scores = scorer.score(ref, pred)
        for key in rouge_scores:
            rouge_scores[key].append(scores[key].fmeasure)
    
    # Calculate average ROUGE scores
    avg_rouge_scores = {key: sum(value) / len(value) for key, value in rouge_scores.items()}

    return avg_loss, bleu_score, avg_rouge_scores

# Evaluate the model on the test dataset
avg_loss, bleu_score, avg_rouge_scores = evaluate_model(model, test_dataloader, device)

# Print the evaluation metrics
print(f"Evaluation Results:")
print(f"Average Loss: {avg_loss:.4f}")
print(f"BLEU Score: {bleu_score:.4f}")
print(f"ROUGE Scores: {avg_rouge_scores}")


NameError: name 'test_dataloader' is not defined

In [16]:
def compute_metrics(p):

    print(type(p))

    pred, labels = p

    pred = np.argmax(pred, axis=1)



    accuracy = accuracy_score(y_true=labels, y_pred=pred)

    recall = recall_score(y_true=labels, y_pred=pred)

    precision = precision_score(y_true=labels, y_pred=pred)

    f1 = f1_score(y_true=labels, y_pred=pred)



    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

In [12]:
trainer.evaluate()

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


{'eval_loss': 1.5616127252578735,
 'eval_runtime': 2.4965,
 'eval_samples_per_second': 22.832,
 'eval_steps_per_second': 6.008,
 'epoch': 3.0}

In [13]:
pip install evaluate


  pid, fd = os.forkpty()


Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m501.7 kB/s[0m eta [36m0:00:00[0m:01[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3
Note: you may need to restart the kernel to use updated packages.


In [14]:
pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=2f9850a25cb125fbd8981d15a3f320364bca492b4f09c8b4349a0f79f1067130
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
Note: you may need to restart the kernel to use updated packages.
