In [1]:
import os
import gc
import torch
import pandas as pd
import numpy as np
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    pipeline
)
from tqdm import tqdm
import evaluate
import json
from datetime import datetime

# ---------------- CONFIG ----------------
MODEL_DICT = {
    "t5":         "t5-base",
}

OUTPUT_ROOT = "./finetuned_cnn_dm_sentiment"
RESULTS_ROOT = "./results_csv"
os.makedirs(OUTPUT_ROOT, exist_ok=True)
os.makedirs(RESULTS_ROOT, exist_ok=True)

MAX_INPUT_LEN = 512
MAX_TARGET_LEN = 128
NUM_EPOCHS = 2
BATCH_SIZE = 2
LEARNING_RATE = 5e-5
TRAIN_SIZE = 20000  # Made configurable

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"✅ Training on: {device}")

# ---------------- LOAD DATASET ----------------
def load_and_prepare_dataset():
    """Load and prepare the CNN/DailyMail dataset"""
    print("Loading dataset...")
    dataset = load_dataset("abisee/cnn_dailymail", "3.0.0")
    dataset = dataset["train"].select(range(TRAIN_SIZE))
    return dataset

# ---------------- METRIC ----------------
rouge = evaluate.load("rouge")

def compute_metrics(eval_pred, tokenizer):
    """Compute ROUGE metrics for evaluation"""
    preds, labels = eval_pred
    if isinstance(preds, tuple):
        preds = preds[0]
    
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # Clean up predictions and labels
    decoded_preds = [pred.strip() for pred in decoded_preds]
    decoded_labels = [label.strip() for label in decoded_labels]
    
    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    return {k: round(v * 100, 2) for k, v in result.items()}

# ---------------- SENTIMENT ANALYSIS SETUP ----------------
def setup_sentiment_pipelines():
    """Initialize all sentiment analysis pipelines"""
    print("Setting up sentiment analysis pipelines...")
    
    pipelines = {}
    try:
        pipelines['polarity'] = pipeline("sentiment-analysis", 
                                        model="cardiffnlp/twitter-roberta-base-sentiment-latest")
    except:
        pipelines['polarity'] = pipeline("sentiment-analysis")
    
    try:
        pipelines['finegrained'] = pipeline("sentiment-analysis", 
                                           model="nlptown/bert-base-multilingual-uncased-sentiment")
    except:
        print("Warning: Fine-grained sentiment model not available, skipping...")
        pipelines['finegrained'] = None
    
    try:
        pipelines['emotion'] = pipeline("text-classification", 
                                       model="j-hartmann/emotion-english-distilroberta-base", 
                                       return_all_scores=True)
    except:
        print("Warning: Emotion model not available, skipping...")
        pipelines['emotion'] = None
    
    try:
        pipelines['zero_shot'] = pipeline("zero-shot-classification", 
                                         model="facebook/bart-large-mnli")
    except:
        print("Warning: Zero-shot model not available, skipping...")
        pipelines['zero_shot'] = None
    
    return pipelines

# Labels for zero-shot classification
INTENT_LABELS = ["informative", "warning", "opinion", "breaking news", "analysis"]
ASPECT_LABELS = ["economy", "politics", "health", "sports", "technology", "entertainment"]

# ---------------- TRAINING FUNCTION ----------------
def train_and_save(model_key, model_name, dataset, sentiment_pipelines):
    """Train a model and return inference function"""
    print(f"\n🚀 Fine-tuning {model_key}...")
    
    # Clear GPU memory
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()
    
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
        # Add pad token if missing
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            
        model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
        
        def preprocess(batch):
            inputs = batch["article"]
            if "t5" in model_key:
                inputs = ["summarize: " + text for text in inputs]
            
            model_inputs = tokenizer(
                inputs, 
                max_length=MAX_INPUT_LEN, 
                truncation=True, 
                padding="max_length"
            )
            
            labels = tokenizer(
                batch["highlights"], 
                max_length=MAX_TARGET_LEN, 
                truncation=True, 
                padding="max_length"
            )
            
            model_inputs["labels"] = labels["input_ids"]
            return model_inputs
        
        # Split dataset
        split_dataset = dataset.train_test_split(test_size=0.1, seed=42)
        ds_train = split_dataset["train"]
        ds_val = split_dataset["test"]
        
        print(f"Training samples: {len(ds_train)}")
        print(f"Validation samples: {len(ds_val)}")
        
        # Tokenize datasets
        tokenized_train = ds_train.map(preprocess, batched=True, remove_columns=ds_train.column_names)
        tokenized_val = ds_val.map(preprocess, batched=True, remove_columns=ds_val.column_names)
        
        collator = DataCollatorForSeq2Seq(tokenizer, model)
        
        args = Seq2SeqTrainingArguments(
            output_dir=f"{OUTPUT_ROOT}/{model_key}",
            eval_strategy="epoch",
            learning_rate=LEARNING_RATE,
            per_device_train_batch_size=BATCH_SIZE,
            per_device_eval_batch_size=BATCH_SIZE,
            num_train_epochs=NUM_EPOCHS,
            weight_decay=0.01,
            save_strategy="epoch",
            save_total_limit=2,
            predict_with_generate=True,
            fp16=torch.cuda.is_available(),
            logging_dir=f"{OUTPUT_ROOT}/{model_key}/logs",
            report_to="none",
            load_best_model_at_end=True,
            metric_for_best_model="eval_rouge1",
            greater_is_better=True,
        )
        
        trainer = Seq2SeqTrainer(
            model=model,
            args=args,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_val,
            tokenizer=tokenizer,
            data_collator=collator,
            compute_metrics=lambda x: compute_metrics(x, tokenizer),
        )
        
        # Train the model
        trainer.train()
        
        # Save model and tokenizer
        trainer.save_model(f"{OUTPUT_ROOT}/{model_key}")
        tokenizer.save_pretrained(f"{OUTPUT_ROOT}/{model_key}")
        
        # Save training metrics
        training_history = trainer.state.log_history
        with open(f"{OUTPUT_ROOT}/{model_key}/training_history.json", "w") as f:
            json.dump(training_history, f, indent=2)
        
        print(f"✅ {model_key} training completed and saved!")
        
        # Create inference function
        def summarize_with_sentiments(article_text):
            try:
                prefix = "summarize: " if "t5" in model_key else ""
                inputs = tokenizer(
                    prefix + article_text, 
                    return_tensors="pt", 
                    truncation=True, 
                    max_length=MAX_INPUT_LEN
                )
                inputs = {k: v.to(model.device) for k, v in inputs.items()}
                
                with torch.no_grad():
                    ids = model.generate(
                        **inputs, 
                        max_length=MAX_TARGET_LEN, 
                        num_beams=4, 
                        early_stopping=True,
                        no_repeat_ngram_size=2
                    )
                
                summary = tokenizer.decode(ids[0], skip_special_tokens=True)
                trainer.save_model(f"{OUTPUT_ROOT}/{model_key}")
                tokenizer.save_pretrained(f"{OUTPUT_ROOT}/{model_key}")
                # Perform sentiment analysis
                result = {"summary": summary, "model": model_key}
                
                # Polarity analysis
                if sentiment_pipelines['polarity']:
                    try:
                        polarity = sentiment_pipelines['polarity'](summary)[0]
                        result["polarity"] = polarity
                    except Exception as e:
                        print(f"Polarity analysis failed: {e}")
                        result["polarity"] = {"label": "UNKNOWN", "score": 0.0}
                
                # Fine-grained sentiment
                if sentiment_pipelines['finegrained']:
                    try:
                        finegrained = sentiment_pipelines['finegrained'](summary)[0]
                        result["finegrained"] = finegrained
                    except Exception as e:
                        print(f"Fine-grained analysis failed: {e}")
                        result["finegrained"] = {"label": "UNKNOWN", "score": 0.0}
                
                # Emotion analysis
                if sentiment_pipelines['emotion']:
                    try:
                        emotions = sentiment_pipelines['emotion'](summary)[0]
                        result["emotions"] = emotions
                    except Exception as e:
                        print(f"Emotion analysis failed: {e}")
                        result["emotions"] = []
                
                # Intent classification
                if sentiment_pipelines['zero_shot']:
                    try:
                        intent = sentiment_pipelines['zero_shot'](summary, candidate_labels=INTENT_LABELS)
                        result["intent"] = intent
                    except Exception as e:
                        print(f"Intent classification failed: {e}")
                        result["intent"] = {"labels": [], "scores": []}
                
                # Aspect classification
                if sentiment_pipelines['zero_shot']:
                    try:
                        aspects = sentiment_pipelines['zero_shot'](summary, candidate_labels=ASPECT_LABELS)
                        result["aspects"] = aspects
                    except Exception as e:
                        print(f"Aspect classification failed: {e}")
                        result["aspects"] = {"labels": [], "scores": []}
                
                return result
                
            except Exception as e:
                print(f"Error in summarization: {e}")
                return {
                    "summary": "Error generating summary",
                    "model": model_key,
                    "error": str(e)
                }
        
        return summarize_with_sentiments
    
    except Exception as e:
        print(f"❌ Error training {model_key}: {e}")
        return None

# ---------------- MAIN EXECUTION ----------------
def main():
    """Main execution function"""
    print("🚀 Starting CNN/DailyMail Fine-tuning Pipeline")
    print(f"Timestamp: {datetime.now()}")
    
    # Load dataset
    dataset = load_and_prepare_dataset()
    
    # Setup sentiment pipelines
    sentiment_pipelines = setup_sentiment_pipelines()
    
    # Train models and collect inference functions
    model_functions = {}
    for key, name in MODEL_DICT.items():
        print(f"\n{'='*50}")
        summarize_fn = train_and_save(key, name, dataset, sentiment_pipelines)
        if summarize_fn:
            model_functions[key] = summarize_fn
            # Clear memory after each model
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            gc.collect()
    
    # Test on sample articles
    sample_articles = [dataset[i]["article"] for i in range(10)]  # Reduced for faster testing
    
    for key, fn in model_functions.items():
        results = []
        print(f"\n📊 Processing test articles with {key}...")
        
        for i, article in enumerate(tqdm(sample_articles, desc=f"{key} inference")):
            try:
                res = fn(article)
                res["article_id"] = i
                res["article_snippet"] = article[:200] + "..." if len(article) > 200 else article
                results.append(res)
            except Exception as e:
                print(f"Error processing article {i} with {key}: {e}")
                continue

        # Save results
        if results:
            df = pd.DataFrame(results)
            output_file = f"{RESULTS_ROOT}/{key}_results.csv"
            df.to_csv(output_file, index=False)
            print(f"✅ Saved {len(results)} results to {output_file}")
    
    print(f"\n🎉 Pipeline completed! Check results in {RESULTS_ROOT}/")

if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm


✅ Training on: cuda
🚀 Starting CNN/DailyMail Fine-tuning Pipeline
Timestamp: 2025-08-23 21:37:03.155128
Loading dataset...
Setting up sentiment analysis pipelines...


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0
Device set to use cuda:0




Device set to use cuda:0




🚀 Fine-tuning t5...
Training samples: 18000
Validation samples: 2000


Map: 100%|██████████| 2000/2000 [00:03<00:00, 562.56 examples/s]
  trainer = Seq2SeqTrainer(


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,0.8083,0.691227,26.34,11.87,21.46,21.47
2,0.9258,0.816013,24.98,10.34,20.23,20.25


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


✅ t5 training completed and saved!

📊 Processing test articles with t5...


t5 inference:  50%|█████     | 5/10 [00:38<00:36,  7.34s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
t5 inference: 100%|██████████| 10/10 [01:13<00:00,  7.34s/it]

✅ Saved 10 results to ./results_csv/t5_results.csv

🎉 Pipeline completed! Check results in ./results_csv/





In [5]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# ---------------- PATHS ----------------
MODEL_PATH = "./finetuned_cnn_dm_sentiment/t5"

# ---------------- LOAD MODEL ----------------
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH).to(device)

# ---------------- SENTIMENT / CLASSIFICATION PIPELINES ----------------
pipelines = {}

pipelines['finegrained'] = pipeline("sentiment-analysis",
                                    model="nlptown/bert-base-multilingual-uncased-sentiment")
pipelines['emotion'] = pipeline("text-classification",
                                model="j-hartmann/emotion-english-distilroberta-base",
                                return_all_scores=True)
pipelines['zero_shot'] = pipeline("zero-shot-classification",
                                  model="facebook/bart-large-mnli")

INTENT_LABELS = ["informative", "warning", "opinion", "breaking news", "analysis"]
ASPECT_LABELS = ["economy", "politics", "health", "sports", "technology", "entertainment"]

# ---------------- SUMMARIZE + SENTIMENT ----------------
def summarize_with_sentiments(text, max_input_len=512, max_output_len=128):
    """Generate a summary and run sentiment/emotion/zero-shot analysis"""

    # Add prefix for T5
    if "t5" in MODEL_PATH.lower():
        text = "summarize: " + text

    # Encode input
    inputs = tokenizer(text, return_tensors="pt",
                       truncation=True, max_length=max_input_len).to(device)

    # Generate summary
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_length=max_output_len,
            num_beams=4,
            early_stopping=True,
            no_repeat_ngram_size=2
        )
    summary = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Build result dictionary
    result = {"summary": summary, "model": "t5"}

    # Run sentiment / classification
    try:
        result["polarity"] = pipelines['polarity'](summary)[0]
    except:
        result["polarity"] = {"label": "UNKNOWN", "score": 0.0}

    try:
        result["finegrained"] = pipelines['finegrained'](summary)[0]
    except:
        result["finegrained"] = {"label": "UNKNOWN", "score": 0.0}

    try:
        result["emotions"] = pipelines['emotion'](summary)[0]
    except:
        result["emotions"] = []

    try:
        result["intent"] = pipelines['zero_shot'](summary, candidate_labels=INTENT_LABELS)
    except:
        result["intent"] = {"labels": [], "scores": []}

    try:
        result["aspects"] = pipelines['zero_shot'](summary, candidate_labels=ASPECT_LABELS)
    except:
        result["aspects"] = {"labels": [], "scores": []}

    return result


Device set to use cuda:0


ValueError: Could not load model j-hartmann/emotion-english-distilroberta-base with any of the following classes: (<class 'transformers.models.auto.modeling_auto.AutoModelForSequenceClassification'>, <class 'transformers.models.auto.modeling_tf_auto.TFAutoModelForSequenceClassification'>, <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'>, <class 'transformers.models.roberta.modeling_tf_roberta.TFRobertaForSequenceClassification'>). See the original errors:

while loading with AutoModelForSequenceClassification, an error is thrown:
Traceback (most recent call last):
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\pipelines\base.py", line 292, in infer_framework_load_model
    model = model_class.from_pretrained(model, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\models\auto\auto_factory.py", line 600, in from_pretrained
    return model_class.from_pretrained(
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 317, in _wrapper
    return func(*args, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 4990, in from_pretrained
    config, torch_dtype, dtype_orig = _get_torch_dtype(
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 1387, in _get_torch_dtype
    state_dict = load_state_dict(
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 562, in load_state_dict
    check_torch_load_is_safe()
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\utils\import_utils.py", line 1622, in check_torch_load_is_safe
    raise ValueError(
ValueError: Due to a serious vulnerability issue in `torch.load`, even with `weights_only=True`, we now require users to upgrade torch to at least v2.6 in order to use the function. This version restriction does not apply when loading files with safetensors.
See the vulnerability report here https://nvd.nist.gov/vuln/detail/CVE-2025-32434

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\pipelines\base.py", line 310, in infer_framework_load_model
    model = model_class.from_pretrained(model, **fp32_kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\models\auto\auto_factory.py", line 600, in from_pretrained
    return model_class.from_pretrained(
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 317, in _wrapper
    return func(*args, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 5074, in from_pretrained
    ) = cls._load_pretrained_model(
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 5340, in _load_pretrained_model
    load_state_dict(checkpoint_files[0], map_location="meta", weights_only=weights_only).keys()
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 562, in load_state_dict
    check_torch_load_is_safe()
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\utils\import_utils.py", line 1622, in check_torch_load_is_safe
    raise ValueError(
ValueError: Due to a serious vulnerability issue in `torch.load`, even with `weights_only=True`, we now require users to upgrade torch to at least v2.6 in order to use the function. This version restriction does not apply when loading files with safetensors.
See the vulnerability report here https://nvd.nist.gov/vuln/detail/CVE-2025-32434

while loading with TFAutoModelForSequenceClassification, an error is thrown:
Traceback (most recent call last):
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\pipelines\base.py", line 310, in infer_framework_load_model
    model = model_class.from_pretrained(model, **fp32_kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\models\auto\auto_factory.py", line 600, in from_pretrained
    return model_class.from_pretrained(
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_tf_utils.py", line 2929, in from_pretrained
    model = cls(config, *model_args, **model_kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\models\roberta\modeling_tf_roberta.py", line 1396, in __init__
    super().__init__(config, *inputs, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_tf_utils.py", line 1190, in __init__
    super().__init__(*inputs, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\tensorflow\python\trackable\base.py", line 205, in _method_wrapper
    result = method(self, *args, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\keras\utils\generic_utils.py", line 1269, in validate_kwargs
    raise TypeError(error_message, kwarg)
TypeError: ('Keyword argument not understood:', 'torch_dtype')

while loading with RobertaForSequenceClassification, an error is thrown:
Traceback (most recent call last):
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\pipelines\base.py", line 292, in infer_framework_load_model
    model = model_class.from_pretrained(model, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 317, in _wrapper
    return func(*args, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 4990, in from_pretrained
    config, torch_dtype, dtype_orig = _get_torch_dtype(
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 1387, in _get_torch_dtype
    state_dict = load_state_dict(
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 562, in load_state_dict
    check_torch_load_is_safe()
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\utils\import_utils.py", line 1622, in check_torch_load_is_safe
    raise ValueError(
ValueError: Due to a serious vulnerability issue in `torch.load`, even with `weights_only=True`, we now require users to upgrade torch to at least v2.6 in order to use the function. This version restriction does not apply when loading files with safetensors.
See the vulnerability report here https://nvd.nist.gov/vuln/detail/CVE-2025-32434

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\pipelines\base.py", line 310, in infer_framework_load_model
    model = model_class.from_pretrained(model, **fp32_kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 317, in _wrapper
    return func(*args, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 5074, in from_pretrained
    ) = cls._load_pretrained_model(
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 5340, in _load_pretrained_model
    load_state_dict(checkpoint_files[0], map_location="meta", weights_only=weights_only).keys()
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_utils.py", line 562, in load_state_dict
    check_torch_load_is_safe()
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\utils\import_utils.py", line 1622, in check_torch_load_is_safe
    raise ValueError(
ValueError: Due to a serious vulnerability issue in `torch.load`, even with `weights_only=True`, we now require users to upgrade torch to at least v2.6 in order to use the function. This version restriction does not apply when loading files with safetensors.
See the vulnerability report here https://nvd.nist.gov/vuln/detail/CVE-2025-32434

while loading with TFRobertaForSequenceClassification, an error is thrown:
Traceback (most recent call last):
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\pipelines\base.py", line 310, in infer_framework_load_model
    model = model_class.from_pretrained(model, **fp32_kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_tf_utils.py", line 2929, in from_pretrained
    model = cls(config, *model_args, **model_kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\models\roberta\modeling_tf_roberta.py", line 1396, in __init__
    super().__init__(config, *inputs, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\transformers\modeling_tf_utils.py", line 1190, in __init__
    super().__init__(*inputs, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\tensorflow\python\trackable\base.py", line 205, in _method_wrapper
    result = method(self, *args, **kwargs)
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "c:\Users\sapan\anaconda3\envs\py10-gpu-tf\lib\site-packages\keras\utils\generic_utils.py", line 1269, in validate_kwargs
    raise TypeError(error_message, kwarg)
TypeError: ('Keyword argument not understood:', 'torch_dtype')




In [3]:
sample_text = """
The U.S. economy showed stronger-than-expected growth last quarter, 
with consumer spending remaining resilient despite high inflation. 
Experts believe the Federal Reserve may reconsider its stance on interest rate hikes 
as signs of cooling inflation begin to emerge.
"""

summary = generate_summary(sample_text)
print("Input:", sample_text[:200] + "...")
print("Generated Summary:", summary)


Input: 
The U.S. economy showed stronger-than-expected growth last quarter, 
with consumer spending remaining resilient despite high inflation. 
Experts believe the Federal Reserve may reconsider its stance ...
Generated Summary: The U.S. economy showed stronger-than-expected growth last quarter . Consumer spending remained resilient despite high inflation.


In [6]:
import torch
import os
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    pipeline
)
import json

# Configuration
MODEL_PATH = "./finetuned_cnn_dm_sentiment/t5"  # Path where your model is saved
MAX_INPUT_LEN = 512
MAX_TARGET_LEN = 128

# Check if CUDA is available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

class FineTunedSummarizer:
    def __init__(self, model_path):
        """Initialize the fine-tuned summarizer"""
        self.model_path = model_path
        self.load_model()
        self.setup_sentiment_pipelines()
    
    def load_model(self):
        """Load the fine-tuned model and tokenizer"""
        print(f"Loading model from {self.model_path}...")
        
        # Load tokenizer and model
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_fast=True)
        self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_path).to(device)
        
        # Set model to evaluation mode
        self.model.eval()
        
        print("✅ Model loaded successfully!")
    
    def setup_sentiment_pipelines(self):
        """Initialize sentiment analysis pipelines"""
        print("Setting up sentiment analysis pipelines...")
        
        self.sentiment_pipelines = {}
        
        try:
            self.sentiment_pipelines['polarity'] = pipeline(
                "sentiment-analysis", 
                model="cardiffnlp/twitter-roberta-base-sentiment-latest"
            )
        except:
            self.sentiment_pipelines['polarity'] = pipeline("sentiment-analysis")
        
        try:
            self.sentiment_pipelines['finegrained'] = pipeline(
                "sentiment-analysis", 
                model="nlptown/bert-base-multilingual-uncased-sentiment"
            )
        except:
            self.sentiment_pipelines['finegrained'] = None
        
        try:
            self.sentiment_pipelines['emotion'] = pipeline(
                "text-classification", 
                model="j-hartmann/emotion-english-distilroberta-base", 
                return_all_scores=True
            )
        except:
            self.sentiment_pipelines['emotion'] = None
        
        try:
            self.sentiment_pipelines['zero_shot'] = pipeline(
                "zero-shot-classification", 
                model="facebook/bart-large-mnli"
            )
        except:
            self.sentiment_pipelines['zero_shot'] = None
        
        print("✅ Sentiment pipelines loaded!")
    
    def summarize_with_sentiments(self, article_text):
        """Generate summary with sentiment analysis"""
        try:
            # Prepare input (T5 needs "summarize:" prefix)
            prefix = "summarize: "
            inputs = self.tokenizer(
                prefix + article_text, 
                return_tensors="pt", 
                truncation=True, 
                max_length=MAX_INPUT_LEN,
                padding=True
            )
            
            # Move inputs to the same device as model
            inputs = {k: v.to(device) for k, v in inputs.items()}
            
            # Generate summary
            with torch.no_grad():
                ids = self.model.generate(
                    **inputs, 
                    max_length=MAX_TARGET_LEN, 
                    num_beams=4, 
                    early_stopping=True,
                    no_repeat_ngram_size=2,
                    pad_token_id=self.tokenizer.pad_token_id
                )
            
            # Decode the summary
            summary = self.tokenizer.decode(ids[0], skip_special_tokens=True)
            
            # Prepare result dictionary
            result = {
                "summary": summary,
                "model": "t5-finetuned",
                "article_length": len(article_text.split())
            }
            
            # Perform sentiment analysis on the summary
            self._analyze_sentiments(summary, result)
            
            return result
            
        except Exception as e:
            print(f"Error in summarization: {e}")
            return {
                "summary": "Error generating summary",
                "model": "t5-finetuned",
                "error": str(e)
            }
    
    def _analyze_sentiments(self, summary, result):
        """Perform various sentiment analyses on the summary"""
        # Intent and aspect labels
        INTENT_LABELS = ["informative", "warning", "opinion", "breaking news", "analysis"]
        ASPECT_LABELS = ["economy", "politics", "health", "sports", "technology", "entertainment"]
        
        # Polarity analysis
        if self.sentiment_pipelines['polarity']:
            try:
                polarity = self.sentiment_pipelines['polarity'](summary)[0]
                result["polarity"] = polarity
            except Exception as e:
                result["polarity"] = {"label": "UNKNOWN", "score": 0.0}
        
        # Fine-grained sentiment
        if self.sentiment_pipelines['finegrained']:
            try:
                finegrained = self.sentiment_pipelines['finegrained'](summary)[0]
                result["finegrained"] = finegrained
            except Exception as e:
                result["finegrained"] = {"label": "UNKNOWN", "score": 0.0}
        
        # Emotion analysis
        if self.sentiment_pipelines['emotion']:
            try:
                emotions = self.sentiment_pipelines['emotion'](summary)[0]
                result["emotions"] = emotions
            except Exception as e:
                result["emotions"] = []
        
        # Intent classification
        if self.sentiment_pipelines['zero_shot']:
            try:
                intent = self.sentiment_pipelines['zero_shot'](summary, candidate_labels=INTENT_LABELS)
                result["intent"] = intent
            except Exception as e:
                result["intent"] = {"labels": [], "scores": []}
        
        # Aspect classification
        if self.sentiment_pipelines['zero_shot']:
            try:
                aspects = self.sentiment_pipelines['zero_shot'](summary, candidate_labels=ASPECT_LABELS)
                result["aspects"] = aspects
            except Exception as e:
                result["aspects"] = {"labels": [], "scores": []}

def main():
    """Example usage of the fine-tuned model"""
    
    # Initialize the summarizer
    summarizer = FineTunedSummarizer(MODEL_PATH)
    
    # Sample news article (you can replace this with your own text)
    sample_article = """
    Scientists at MIT have developed a revolutionary new battery technology that could dramatically extend the range of electric vehicles. The new lithium-metal battery can store up to 10 times more energy than traditional lithium-ion batteries while charging in just 10 minutes. The research team, led by Dr. Sarah Johnson, has spent five years developing this breakthrough technology. Initial tests show that electric vehicles equipped with these batteries could travel over 1,000 miles on a single charge. The technology uses a novel polymer coating that prevents the formation of dendrites, which have been a major obstacle in lithium-metal battery development. Major automakers including Tesla, Ford, and General Motors have already expressed interest in licensing the technology. Commercial production is expected to begin within two years, potentially revolutionizing the electric vehicle industry and accelerating the transition away from fossil fuels. The research was funded by the Department of Energy and published in the journal Nature Energy.
    """
    
    # Generate summary with sentiment analysis
    print("Processing article...")
    result = summarizer.summarize_with_sentiments(sample_article)
    
    # Display results
    print("\n" + "="*60)
    print("FINE-TUNED MODEL RESULTS")
    print("="*60)
    
    print(f"\nOriginal Article Length: {len(sample_article.split())} words")
    print(f"Summary Length: {len(result['summary'].split())} words")
    
    print(f"\nSUMMARY:")
    print(f"'{result['summary']}'")
    
    if 'polarity' in result:
        print(f"\nPOLARITY SENTIMENT:")
        print(f"Label: {result['polarity']['label']}")
        print(f"Confidence: {result['polarity']['score']:.3f}")
    
    if 'finegrained' in result and result['finegrained']:
        print(f"\nFINE-GRAINED SENTIMENT:")
        print(f"Rating: {result['finegrained']['label']}")
        print(f"Confidence: {result['finegrained']['score']:.3f}")
    
    if 'emotions' in result and result['emotions']:
        print(f"\nTOP 3 EMOTIONS:")
        sorted_emotions = sorted(result['emotions'], key=lambda x: x['score'], reverse=True)[:3]
        for emotion in sorted_emotions:
            print(f"- {emotion['label']}: {emotion['score']:.3f}")
    
    if 'intent' in result and result['intent'].get('labels'):
        print(f"\nINTENT CLASSIFICATION:")
        top_intent = result['intent']['labels'][0]
        top_score = result['intent']['scores'][0]
        print(f"Primary Intent: {top_intent} (confidence: {top_score:.3f})")
    
    if 'aspects' in result and result['aspects'].get('labels'):
        print(f"\nASPECT CLASSIFICATION:")
        top_aspect = result['aspects']['labels'][0]
        top_score = result['aspects']['scores'][0]
        print(f"Primary Aspect: {top_aspect} (confidence: {top_score:.3f})")
    
    print("\n" + "="*60)
    
    # Save result to JSON file
    output_file = "sample_inference_result.json"
    with open(output_file, 'w') as f:
        json.dump(result, f, indent=2)
    print(f"Result saved to: {output_file}")

if __name__ == "__main__":
    main()

Using device: cuda
Loading model from ./finetuned_cnn_dm_sentiment/t5...
✅ Model loaded successfully!
Setting up sentiment analysis pipelines...


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0


✅ Sentiment pipelines loaded!
Processing article...

FINE-TUNED MODEL RESULTS

Original Article Length: 154 words
Summary Length: 47 words

SUMMARY:
'The new lithium-metal battery can store up to 10 times more energy than traditional batteries . Commercial production is expected to begin within two years, potentially revolutionizing the electric vehicle industry. The research was funded by the Department of Energy and published in the journal Nature Energy.'

POLARITY SENTIMENT:
Label: POSITIVE
Confidence: 0.971

FINE-GRAINED SENTIMENT:
Rating: 5 stars
Confidence: 0.534

INTENT CLASSIFICATION:
Primary Intent: informative (confidence: 0.493)

ASPECT CLASSIFICATION:
Primary Aspect: technology (confidence: 0.909)

Result saved to: sample_inference_result.json
