In [None]:
# ============================================
# 📌 Full ABSA Pipeline for Laptop Dataset (GPU enabled)
# ============================================

# Requirements:
# pip install transformers torch huggingface_hub pandas scikit-learn

import os
import pandas as pd
import torch
from transformers import pipeline
from huggingface_hub import InferenceClient
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# ---------------------------
# Configuration
# ---------------------------
use_api = False
model_name = "yangheng/deberta-v3-base-absa-v1.1"
hf_token_env = "HF_TOKEN"

# ---------------------------
# Device setup
# ---------------------------
device = 0 if torch.cuda.is_available() else -1
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    print("Using CPU")

# ---------------------------
# Initialize model/client
# ---------------------------
if use_api:
    if os.environ.get(hf_token_env) is None:
        raise EnvironmentError(f"Environment variable {hf_token_env} must be set for API mode")
    client = InferenceClient(api_key=os.environ[hf_token_env])
    def classify_text(text):
        resp = client.text_classification(model=model_name, inputs=text)
        if isinstance(resp, list) and len(resp) > 0 and "label" in resp[0]:
            return resp[0]["label"]
        raise RuntimeError("Unexpected response from InferenceClient")
else:
    pipe = pipeline("text-classification", model=model_name, tokenizer=model_name, device=device)
    def classify_text(text):
        pred = pipe(text, truncation=True)
        if isinstance(pred, list) and len(pred) > 0 and "label" in pred[0]:
            return pred[0]["label"]
        raise RuntimeError("Unexpected response from local pipeline")

# ---------------------------
# Label Normalization
# ---------------------------
def normalize_label(label):
    if not isinstance(label, str):
        return "neutral"
    l = label.strip().lower()
    if l in ("positive", "pos", "label_positive"): return "positive"
    if l in ("negative", "neg", "label_negative"): return "negative"
    if l in ("neutral", "neural", "label_neutral"): return "neutral"
    if l.startswith("label_") and l[6:].isdigit():
        idx = int(l[6:])
        mapping = {0: "negative", 1: "neutral", 2: "positive"}
        return mapping.get(idx, "neutral")
    if "pos" in l: return "positive"
    if "neg" in l: return "negative"
    if "neu" in l: return "neutral"
    return "neutral"

# ---------------------------
# ABSA for single review
# ---------------------------
def analyze_aspects(review, aspects_list):
    results = {}
    if not isinstance(review, str) or len(review.strip()) == 0:
        return {a: "neutral" for a in aspects_list}
    for asp in aspects_list:
        text = f"{review} [SEP] {asp}"
        raw_label = classify_text(text)
        results[asp] = normalize_label(raw_label)
    return results

# ---------------------------
# Evaluate dataset
# ---------------------------
def evaluate_dataset(df, text_col="Sentence", aspect_col="Aspect Term", label_col="polarity"):
    """
    df must have columns: Sentence, Aspect Term, polarity
    """
    y_true, y_pred = [], []
    # Check if the required columns exist in the dataframe
    if aspect_col not in df.columns or label_col not in df.columns:
        print(f"Warning: '{aspect_col}' or '{label_col}' column not found in the DataFrame. Skipping evaluation for this dataset.")
        return None # Or raise an error, depending on desired behavior

    for _, row in df.iterrows():
        review, asp, true = row[text_col], row[aspect_col], row[label_col]
        pred = analyze_aspects(review, [asp])[asp]
        y_true.append(true.lower())
        y_pred.append(pred)

    # Handle the case where y_true is empty (no data to evaluate)
    if not y_true:
        print("Warning: No data to evaluate in the provided DataFrame.")
        return None

    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, digits=3))
    return accuracy_score(y_true, y_pred)

# ---------------------------
# Main pipeline
# ---------------------------
if __name__ == "__main__":
    # Load datasets
    train_df = pd.read_csv("/content/Laptop_Train_v2.csv")
    test_df = pd.read_csv("/content/Laptops_Test_Data_PhaseA.csv")

    print("\nTrain Data Shape:", train_df.shape)
    print("Test Data Shape:", test_df.shape)

    # Split into train/val
    train_split, val_split = train_test_split(train_df, test_size=0.2, random_state=42)

    print("\n🔹 Evaluating Train Set...")
    train_acc = evaluate_dataset(train_split)
    print("Train Accuracy:", train_acc)

    print("\n🔹 Evaluating Validation Set...")
    val_acc = evaluate_dataset(val_split)
    print("Validation Accuracy:", val_acc)

    # Add missing columns to test_df for evaluation
    if 'Aspect Term' not in test_df.columns:
        test_df['Aspect Term'] = 'placeholder'  # Add a placeholder column
    if 'polarity' not in test_df.columns:
        test_df['polarity'] = 'neutral' # Add a placeholder column for evaluation, assuming 'neutral' as a default

    print("\n🔹 Evaluating Test Set...")
    test_acc = evaluate_dataset(test_df)
    print("Test Accuracy:", test_acc)

CUDA available: True
Using GPU: Tesla T4


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.



Train Data Shape: (2358, 6)
Test Data Shape: (800, 2)

🔹 Evaluating Train Set...

Classification Report:
              precision    recall  f1-score   support

    conflict      0.000     0.000     0.000        33
    negative      0.944     0.978     0.961       687
     neutral      0.848     0.924     0.884       368
    positive      0.969     0.939     0.954       798

    accuracy                          0.934      1886
   macro avg      0.690     0.710     0.700      1886
weighted avg      0.919     0.934     0.926      1886

Train Accuracy: 0.9337221633085896

🔹 Evaluating Validation Set...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Classification Report:
              precision    recall  f1-score   support

    conflict      0.000     0.000     0.000        12
    negative      0.941     0.978     0.959       179
     neutral      0.887     0.935     0.910        92
    positive      0.958     0.958     0.958       189

    accuracy                          0.936       472
   macro avg      0.696     0.718     0.707       472
weighted avg      0.913     0.936     0.925       472

Validation Accuracy: 0.9364406779661016

🔹 Evaluating Test Set...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Classification Report:
              precision    recall  f1-score   support

    negative      0.000     0.000     0.000         0
     neutral      1.000     0.691     0.817       800
    positive      0.000     0.000     0.000         0

    accuracy                          0.691       800
   macro avg      0.333     0.230     0.272       800
weighted avg      1.000     0.691     0.817       800

Test Accuracy: 0.69125


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Predict aspect terms and polarities for the test set
test_predictions = []
# Define aspects to analyze based on the training data or common laptop aspects
# For demonstration, using a predefined list of common aspects
aspects_to_analyze = [
    "battery", "performance", "display", "build", "value",
    "keyboard", "sound", "weight", "design", "durability",
    "cooling", "camera", "charging", "warranty"
]

print("\n🔹 Predicting aspects and sentiment for Test Set...")
for index, row in test_df.iterrows():
    review = row['Sentence']
    # Analyze aspects for each review in the test set
    predicted_sentiments = analyze_aspects(review, aspects_to_analyze)
    # Store the predictions. For simplicity, we can store the dictionary or process it further
    # Here, we'll store the dictionary of predicted sentiments for each review
    test_predictions.append(predicted_sentiments)

# Add predictions to the test_df DataFrame
# This will create a new column where each entry is a dictionary of aspect sentiments for that review
test_df['Predicted_Aspect_Sentiments'] = test_predictions

# Display the updated test_df with predictions
print("\nTest DataFrame with Predictions:")
display(test_df.head())

# You can further process the 'Predicted_Aspect_Sentiments' column to extract specific aspect sentiments
# For example, to see the predicted sentiment for 'battery' for each review:
# test_df['Predicted_Battery_Sentiment'] = test_df['Predicted_Aspect_Sentiments'].apply(lambda x: x.get('battery', 'neutral'))
# display(test_df[['Sentence', 'Predicted_Battery_Sentiment']].head())


🔹 Predicting aspects and sentiment for Test Set...

Test DataFrame with Predictions:


Unnamed: 0,id,Sentence,Aspect Term,polarity,Predicted_Aspect_Sentiments
0,892:1,"Boot time is super fast, around anywhere from ...",placeholder,neutral,"{'battery': 'neutral', 'performance': 'positiv..."
1,1144:1,tech support would not fix the problem unless ...,placeholder,neutral,"{'battery': 'neutral', 'performance': 'negativ..."
2,805:2,but in resume this computer rocks!,placeholder,neutral,"{'battery': 'neutral', 'performance': 'positiv..."
3,359:1,Set up was easy.,placeholder,neutral,"{'battery': 'positive', 'performance': 'positi..."
4,562:1,Did not enjoy the new Windows 8 and touchscree...,placeholder,neutral,"{'battery': 'neutral', 'performance': 'negativ..."


In [None]:
# Add missing columns to test_df for evaluation if they don't exist
# These were added in a previous step, but this check ensures the code runs even if that step is skipped.
if 'Aspect Term' not in test_df.columns:
    test_df['Aspect Term'] = 'placeholder'  # Add a placeholder column
if 'polarity' not in test_df.columns:
    test_df['polarity'] = 'neutral' # Add a placeholder column for evaluation, assuming 'neutral' as a default

print("\n🔹 Evaluating Test Set...")
# Evaluate the test set using the evaluate_dataset function
test_acc = evaluate_dataset(test_df)
print("Test Accuracy:", test_acc)


🔹 Evaluating Test Set...

Classification Report:
              precision    recall  f1-score   support

    negative      0.000     0.000     0.000         0
     neutral      1.000     0.691     0.817       800
    positive      0.000     0.000     0.000         0

    accuracy                          0.691       800
   macro avg      0.333     0.230     0.272       800
weighted avg      1.000     0.691     0.817       800

Test Accuracy: 0.69125


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# ---------------------------
# Improved Evaluate dataset
# ---------------------------
def evaluate_dataset(df, text_col="Sentence", aspect_col="Aspect Term", label_col="polarity"):
    y_true, y_pred = [], []

    if aspect_col not in df.columns or label_col not in df.columns:
        print(f"⚠️ Skipping evaluation: missing {aspect_col}/{label_col}. Running inference only...")
        df["predicted_polarity"] = df[text_col].apply(lambda r: analyze_aspects(r, ["general"])["general"])
        return None

    # Prepare batch inputs
    texts, aspects, labels = [], [], []
    for _, row in df.iterrows():
        review, asp, true = row[text_col], row[aspect_col], row[label_col]
        if pd.isna(review) or pd.isna(asp):
            continue
        texts.append(f"{review} [SEP] {asp}")
        aspects.append(asp)
        labels.append(true)

    # Batch predictions
    preds_raw = pipe(texts, truncation=True, batch_size=32)
    preds = [normalize_label(p["label"]) for p in preds_raw]

    # Map conflict → neutral
    labels = ["neutral" if l.lower()=="conflict" else l.lower() for l in labels]

    y_true.extend(labels)
    y_pred.extend(preds)

    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, digits=3))
    return accuracy_score(y_true, y_pred)


In [None]:
test_df["predicted_polarity"] = test_df.apply(
    lambda r: analyze_aspects(r["Sentence"], [r.get("Aspect Term", "general")]).get(r.get("Aspect Term", "general"), "neutral"),
    axis=1
)
test_df.to_csv("Laptop_Test_With_Predictions.csv", index=False)


In [None]:
# ============================================
# 📌 ABSA Query-based Sentiment Analyzer
# ============================================

# Requirements:
# pip install transformers torch huggingface_hub pandas

from transformers import pipeline

# ---------------------------
# Model & Pipeline
# ---------------------------
model_name = "yangheng/deberta-v3-base-absa-v1.1"
pipe = pipeline("text-classification", model=model_name, tokenizer=model_name)

def normalize_label(label):
    l = label.strip().lower()
    if "positive" in l or "pos" in l: return "positive"
    if "negative" in l or "neg" in l: return "negative"
    if "neutral" in l or "neu" in l: return "neutral"
    if l.startswith("label_"):
        idx = int(l.split("_")[-1])
        mapping = {0:"negative", 1:"neutral", 2:"positive"}
        return mapping.get(idx,"neutral")
    return "neutral"

# ---------------------------
# Aspects to check
# ---------------------------
aspects = [
    "battery", "performance", "display", "build", "value",
    "keyboard", "sound", "weight", "design", "durability",
    "cooling", "camera", "charging", "warranty"
]

# ---------------------------
# ABSA for a single query
# ---------------------------
def analyze_query(query, aspects_list=aspects):
    results = {}
    for asp in aspects_list:
        text = f"{query} [SEP] {asp}"
        raw_label = pipe(text, truncation=True)[0]["label"]
        results[asp] = normalize_label(raw_label)
    return results

# ---------------------------
# Example usage
# ---------------------------
if __name__ == "__main__":
    query1 = "The battery lasts long but the display is too dim."
    query2 = "Performance is smooth, but the keyboard feels cheap."

    for q in [query1, query2]:
        print(f"\n🔹 Review: {q}")
        res = analyze_query(q)
        for asp, sent in res.items():
            if sent != "neutral":   # only print aspects that matter
                print(f"  Aspect: {asp:<12} → Sentiment: {sent}")


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.



🔹 Review: The battery lasts long but the display is too dim.
  Aspect: battery      → Sentiment: positive
  Aspect: performance  → Sentiment: positive
  Aspect: display      → Sentiment: negative
  Aspect: build        → Sentiment: positive
  Aspect: value        → Sentiment: positive
  Aspect: sound        → Sentiment: positive
  Aspect: weight       → Sentiment: positive
  Aspect: design       → Sentiment: negative
  Aspect: durability   → Sentiment: positive
  Aspect: camera       → Sentiment: negative

🔹 Review: Performance is smooth, but the keyboard feels cheap.
  Aspect: battery      → Sentiment: positive
  Aspect: performance  → Sentiment: positive
  Aspect: display      → Sentiment: positive
  Aspect: build        → Sentiment: positive
  Aspect: value        → Sentiment: positive
  Aspect: keyboard     → Sentiment: negative
  Aspect: sound        → Sentiment: positive
  Aspect: weight       → Sentiment: positive
  Aspect: design       → Sentiment: positive
  Aspect: durabilit

In [None]:
# ============================================
# 📌 Advanced ABSA Pipeline with Multiple Improvement Techniques
# ============================================

import os
import pandas as pd
import torch
import numpy as np
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from collections import Counter
import re

# ---------------------------
# Configuration
# ---------------------------
MODELS = [
    "yangheng/deberta-v3-base-absa-v1.1",
    "cardiffnlp/twitter-roberta-base-sentiment-latest",
    "nlptown/bert-base-multilingual-uncased-sentiment"
]
PRIMARY_MODEL = "yangheng/deberta-v3-base-absa-v1.1"

# ---------------------------
# Device setup
# ---------------------------
device = 0 if torch.cuda.is_available() else -1
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    print("Using CPU")

# ---------------------------
# Enhanced Label Normalization
# ---------------------------
def normalize_label(label):
    if not isinstance(label, str):
        return "neutral"
    l = label.strip().lower()

    # Direct mappings
    if l in ("positive", "pos", "label_positive", "label_2", "5 stars", "4 stars"):
        return "positive"
    if l in ("negative", "neg", "label_negative", "label_0", "1 star", "2 stars"):
        return "negative"
    if l in ("neutral", "neural", "label_neutral", "label_1", "3 stars"):
        return "neutral"
    if l in ("conflict", "label_conflict"):
        return "conflict"

    # Numeric label mapping
    if l.startswith("label_"):
        if l[6:].isdigit():
            idx = int(l[6:])
            mapping = {0: "negative", 1: "neutral", 2: "positive"}
            return mapping.get(idx, "neutral")

    # Star ratings
    if "star" in l:
        if "5" in l or "4" in l: return "positive"
        if "1" in l or "2" in l: return "negative"
        if "3" in l: return "neutral"

    # Substring matching
    if "pos" in l: return "positive"
    if "neg" in l: return "negative"
    if "neu" in l: return "neutral"

    return "neutral"

# ---------------------------
# Text Preprocessing and Augmentation
# ---------------------------
def clean_text(text):
    """Clean and normalize text"""
    if not isinstance(text, str):
        return ""
    # Remove extra whitespace
    text = re.sub(r'\s+', ' ', text)
    # Remove special characters but keep sentence structure
    text = re.sub(r'[^\w\s.,!?-]', '', text)
    return text.strip()

def augment_text(text, aspect):
    """Generate augmented versions of text-aspect pairs"""
    variations = [
        f"{text} [SEP] {aspect}",
        f"Review: {text} [SEP] Aspect: {aspect}",
        f"{text}. Opinion about {aspect}?",
        f"What is the sentiment about {aspect} in: {text}",
    ]
    return variations

# ---------------------------
# Multi-Model Ensemble Classifier
# ---------------------------
class EnsembleABSA:
    def __init__(self, model_name, device):
        self.model_name = model_name
        self.device = device
        self.pipe = None
        self._initialize_pipeline()

    def _initialize_pipeline(self):
        """Initialize pipeline with error handling"""
        try:
            self.pipe = pipeline(
                "text-classification",
                model=self.model_name,
                tokenizer=self.model_name,
                device=self.device,
                truncation=True,
                max_length=512
            )
            print(f"✅ Loaded model: {self.model_name}")
        except Exception as e:
            print(f"❌ Failed to load {self.model_name}: {e}")
            self.pipe = None

    def predict(self, text, return_scores=False):
        """Get prediction with confidence scores"""
        if self.pipe is None:
            return "neutral", 0.33

        try:
            result = self.pipe(text, truncation=True, max_length=512)
            if isinstance(result, list) and len(result) > 0:
                label = normalize_label(result[0]["label"])
                score = result[0].get("score", 0.0)

                if return_scores:
                    return label, score
                return label
        except Exception as e:
            print(f"Error in prediction: {e}")
            return "neutral", 0.33 if return_scores else "neutral"

        return "neutral", 0.33 if return_scores else "neutral"

# ---------------------------
# Advanced ABSA with Multiple Strategies
# ---------------------------
class AdvancedABSA:
    def __init__(self, primary_model, device, use_ensemble=True):
        self.primary_classifier = EnsembleABSA(primary_model, device)
        self.use_ensemble = use_ensemble
        self.classifiers = []

        if use_ensemble:
            print("\n🔄 Initializing ensemble models...")
            for model in MODELS:
                if model != primary_model:
                    try:
                        classifier = EnsembleABSA(model, device)
                        if classifier.pipe is not None:
                            self.classifiers.append(classifier)
                    except:
                        continue

    def predict_with_voting(self, text, aspect):
        """Ensemble prediction with majority voting"""
        predictions = []
        scores = []

        # Primary model prediction
        label, score = self.primary_classifier.predict(text, return_scores=True)
        predictions.append(label)
        scores.append(score)

        # Ensemble predictions
        if self.use_ensemble:
            for classifier in self.classifiers:
                label, score = classifier.predict(text, return_scores=True)
                predictions.append(label)
                scores.append(score)

        # Weighted voting based on confidence
        if len(predictions) == 1:
            return predictions[0]

        # Majority voting with confidence weighting
        weighted_votes = {}
        for pred, score in zip(predictions, scores):
            weighted_votes[pred] = weighted_votes.get(pred, 0) + score

        return max(weighted_votes, key=weighted_votes.get)

    def predict_with_augmentation(self, text, aspect):
        """Use text augmentation for robust prediction"""
        text_clean = clean_text(text)
        aspect_clean = clean_text(aspect)

        # Generate variations
        variations = augment_text(text_clean, aspect_clean)

        predictions = []
        for variation in variations:
            pred = self.primary_classifier.predict(variation)
            predictions.append(pred)

        # Majority vote
        if predictions:
            return Counter(predictions).most_common(1)[0][0]
        return "neutral"

    def predict_with_context_window(self, text, aspect):
        """Extract context around aspect for better prediction"""
        text_clean = clean_text(text)
        aspect_clean = clean_text(aspect)

        # Try to find aspect in text
        text_lower = text_clean.lower()
        aspect_lower = aspect_clean.lower()

        if aspect_lower in text_lower:
            # Extract context window around aspect
            idx = text_lower.find(aspect_lower)
            start = max(0, idx - 100)
            end = min(len(text_clean), idx + len(aspect_clean) + 100)
            context = text_clean[start:end]

            input_text = f"{context} [SEP] {aspect_clean}"
        else:
            input_text = f"{text_clean} [SEP] {aspect_clean}"

        return self.primary_classifier.predict(input_text)

    def predict_combined(self, text, aspect, strategy="voting"):
        """Combined prediction using multiple strategies"""
        predictions = []

        if strategy == "all" or strategy == "voting":
            pred = self.predict_with_voting(
                f"{clean_text(text)} [SEP] {clean_text(aspect)}",
                aspect
            )
            predictions.append(pred)

        if strategy == "all" or strategy == "augmentation":
            pred = self.predict_with_augmentation(text, aspect)
            predictions.append(pred)

        if strategy == "all" or strategy == "context":
            pred = self.predict_with_context_window(text, aspect)
            predictions.append(pred)

        # Final majority vote
        if predictions:
            return Counter(predictions).most_common(1)[0][0]
        return "neutral"

# ---------------------------
# Evaluation with Advanced Techniques
# ---------------------------
def evaluate_with_strategy(df, absa_model, strategy="voting",
                          text_col="Sentence", aspect_col="Aspect Term",
                          label_col="polarity", dataset_name="Dataset"):
    """Evaluate using specified strategy"""

    if aspect_col not in df.columns or label_col not in df.columns:
        print(f"⚠️  Required columns missing in {dataset_name}")
        return None

    y_true, y_pred = [], []

    print(f"\n{'='*60}")
    print(f"📊 {dataset_name} - Strategy: {strategy.upper()}")
    print(f"{'='*60}")

    total = len(df)
    for idx, row in df.iterrows():
        if (idx + 1) % 100 == 0:
            print(f"Processing: {idx + 1}/{total}", end='\r')

        try:
            text = str(row[text_col])
            aspect = str(row[aspect_col])
            true_label = normalize_label(str(row[label_col]))

            if pd.isna(text) or pd.isna(aspect) or not text.strip() or not aspect.strip():
                continue

            # Get prediction using specified strategy
            pred = absa_model.predict_combined(text, aspect, strategy=strategy)

            y_true.append(true_label)
            y_pred.append(pred)

        except Exception as e:
            continue

    if not y_true:
        print(f"\n⚠️  No valid data in {dataset_name}")
        return None

    print(f"\nProcessed: {len(y_true)}/{total} samples")

    labels = sorted(list(set(y_true)))

    print(f"\n📈 Classification Report:")
    print(classification_report(y_true, y_pred, labels=labels, digits=4, zero_division=0))

    print(f"\n🎯 Confusion Matrix:")
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    print(f"Labels: {labels}")
    print(cm)

    accuracy = accuracy_score(y_true, y_pred)
    f1_macro = f1_score(y_true, y_pred, labels=labels, average='macro', zero_division=0)
    f1_weighted = f1_score(y_true, y_pred, labels=labels, average='weighted', zero_division=0)

    print(f"\n✅ Accuracy: {accuracy:.4f}")
    print(f"📊 F1-Score (Macro): {f1_macro:.4f}")
    print(f"📊 F1-Score (Weighted): {f1_weighted:.4f}")

    return {
        'accuracy': accuracy,
        'f1_macro': f1_macro,
        'f1_weighted': f1_weighted,
        'strategy': strategy
    }

# ---------------------------
# Cross-Validation for Robustness
# ---------------------------
def cross_validate_model(df, absa_model, n_splits=5):
    """Perform stratified k-fold cross-validation"""
    print(f"\n{'='*60}")
    print(f"🔄 Performing {n_splits}-Fold Cross-Validation")
    print(f"{'='*60}")

    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    scores = []

    for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['polarity']), 1):
        print(f"\nFold {fold}/{n_splits}")
        val_fold = df.iloc[val_idx]

        result = evaluate_with_strategy(
            val_fold,
            absa_model,
            strategy="voting",
            dataset_name=f"Fold {fold}"
        )

        if result:
            scores.append(result['accuracy'])

    if scores:
        print(f"\n{'='*60}")
        print(f"Cross-Validation Results:")
        print(f"Mean Accuracy: {np.mean(scores):.4f} (+/- {np.std(scores):.4f})")
        print(f"{'='*60}")

    return scores

# ---------------------------
# Main Pipeline
# ---------------------------
if __name__ == "__main__":
    print("="*60)
    print("🚀 Advanced ABSA Pipeline Starting")
    print("="*60)

    # Load datasets
    print("\n📁 Loading datasets...")
    train_df = pd.read_csv("/content/Laptop_Train_v2.csv")
    test_df = pd.read_csv("/content/Laptops_Test_Data_PhaseA.csv")

    print(f"Train: {train_df.shape}, Test: {test_df.shape}")

    # Preprocess
    train_df = train_df.dropna(subset=['Sentence', 'Aspect Term', 'polarity'])
    train_df['polarity'] = train_df['polarity'].apply(normalize_label)

    # Initialize Advanced ABSA
    print("\n🤖 Initializing Advanced ABSA System...")
    absa = AdvancedABSA(PRIMARY_MODEL, device, use_ensemble=False)

    # Split data
    train_split, val_split = train_test_split(train_df, test_size=0.2, random_state=42, stratify=train_df['polarity'])

    # Evaluate with different strategies
    strategies = ["voting", "augmentation", "context", "all"]
    results = {}

    print("\n" + "="*60)
    print("📊 VALIDATION SET - Testing Different Strategies")
    print("="*60)

    for strategy in strategies:
        result = evaluate_with_strategy(
            val_split,
            absa,
            strategy=strategy,
            dataset_name=f"Validation ({strategy})"
        )
        if result:
            results[strategy] = result

    # Find best strategy
    if results:
        best_strategy = max(results, key=lambda x: results[x]['accuracy'])
        print(f"\n🏆 Best Strategy: {best_strategy.upper()} - Accuracy: {results[best_strategy]['accuracy']:.4f}")
    else:
        best_strategy = "voting"

    # Evaluate test set with best strategy
    print("\n" + "="*60)
    print("🎯 TEST SET EVALUATION")
    print("="*60)

    if 'Aspect Term' in test_df.columns and 'polarity' in test_df.columns:
        test_df = test_df.dropna(subset=['Sentence', 'Aspect Term', 'polarity'])
        test_df['polarity'] = test_df['polarity'].apply(normalize_label)

        test_result = evaluate_with_strategy(
            test_df,
            absa,
            strategy=best_strategy,
            dataset_name="Test Set"
        )
    else:
        print("⚠️  Test set is unlabeled - cannot evaluate")

    # Optional: Cross-validation
    print("\n" + "="*60)
    print("🔄 Optional: Cross-Validation Analysis")
    print("="*60)
    choice = input("Run cross-validation? (y/n): ").lower()
    if choice == 'y':
        cv_scores = cross_validate_model(train_df, absa, n_splits=5)

    print("\n" + "="*60)
    print("✅ PIPELINE COMPLETE")
    print("="*60)

CUDA available: True
Using GPU: Tesla T4
🚀 Advanced ABSA Pipeline Starting

📁 Loading datasets...
Train: (2358, 6), Test: (800, 2)

🤖 Initializing Advanced ABSA System...


Device set to use cuda:0


✅ Loaded model: yangheng/deberta-v3-base-absa-v1.1

📊 VALIDATION SET - Testing Different Strategies

📊 Validation (voting) - Strategy: VOTING
Processing: 1400/472
Processed: 472/472 samples

📈 Classification Report:
              precision    recall  f1-score   support

    conflict     0.0000    0.0000    0.0000         9
    negative     0.9382    0.9653    0.9516       173
     neutral     0.8500    0.9239    0.8854        92
    positive     0.9691    0.9495    0.9592       198

    accuracy                         0.9322       472
   macro avg     0.6893    0.7097    0.6990       472
weighted avg     0.9161    0.9322    0.9237       472


🎯 Confusion Matrix:
Labels: ['conflict', 'negative', 'neutral', 'positive']
[[  0   6   2   1]
 [  0 167   4   2]
 [  0   4  85   3]
 [  0   1   9 188]]

✅ Accuracy: 0.9322
📊 F1-Score (Macro): 0.6990
📊 F1-Score (Weighted): 0.9237

📊 Validation (augmentation) - Strategy: AUGMENTATION

Processed: 472/472 samples

📈 Classification Report:
          