In [1]:
# Colab Cell 0: Mount Google Drive
from google.colab import drive
import os

print("Mounting Google Drive...")
drive.mount('/content/drive')
print("Google Drive mounted.")

# Define your base project directory in Google Drive (MUST match where you saved your model)
GOOGLE_DRIVE_PROJECT_ROOT = '/content/drive/MyDrive/Colab_Projects/ABSA_Model_Project'
print(f"Project root in Drive: {GOOGLE_DRIVE_PROJECT_ROOT}")

Mounting Google Drive...
Mounted at /content/drive
Google Drive mounted.
Project root in Drive: /content/drive/MyDrive/Colab_Projects/ABSA_Model_Project


In [2]:
# Colab Cell 1: Install Necessary Libraries
# This cell needs to be run every time you start a new Colab session or restart runtime.
# It will be fast after the first time as Colab caches packages.

!pip install -U transformers==4.37.2
!pip install -U peft==0.10.0
!pip install -U accelerate==0.23.0
!pip install -U datasets
!pip install -U pandas
!pip install -U lxml # Optional, but good to include if it was in your original env

print("Required libraries installed/updated.")

# No explicit runtime restart needed after this specific combination of installs,
# as long as they are compatible with the base Python.
# If you get import errors, then a restart might be necessary.

Collecting transformers==4.37.2
  Downloading transformers-4.37.2-py3-none-any.whl.metadata (129 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.19,>=0.14 (from transformers==4.37.2)
  Downloading tokenizers-0.15.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.37.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.15.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers

In [3]:
# Colab Cell: Load Saved Model and Perform Inference

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
# Make sure GOOGLE_DRIVE_PROJECT_ROOT is defined (from Cell 0)
# (It will be if you run Cell 0 first)

# --- 1. Define the path where your DistilBERT model was saved in Google Drive ---
# This path must precisely match where you saved it in Cell 3.
saved_model_path = os.path.join(GOOGLE_DRIVE_PROJECT_ROOT, 'absa_distilbert_model')

# --- 2. Define your label mapping (MUST match what was used during training) ---
# You need to define this manually or load it if you saved it alongside your model
id_to_label = {0: 'positive', 1: 'negative', 2: 'neutral'}

# --- 3. Load the Tokenizer and Model ---
print(f"Loading tokenizer from Google Drive: {saved_model_path}")
loaded_tokenizer = AutoTokenizer.from_pretrained(saved_model_path)
print(f"Loading model from Google Drive: {saved_model_path}")
loaded_model = AutoModelForSequenceClassification.from_pretrained(saved_model_path)

# --- 4. Move the model to the GPU (if available) and set to evaluation mode ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
loaded_model.to(device)
loaded_model.eval() # Set model to evaluation mode

print(f"DistilBERT model loaded successfully from Drive and moved to device: {device}")


# --- 5. Create an Inference Function ---
def predict_absa_sentiment(sentence: str, aspect: str):
    # CRITICAL: max_length MUST match FIXED_TOKEN_MAX_LENGTH used in training (128)
    FIXED_TOKEN_MAX_LENGTH = 128

    inputs = loaded_tokenizer(
        str(sentence),
        str(aspect),
        truncation=True,
        max_length=FIXED_TOKEN_MAX_LENGTH,
        padding='max_length',
        return_tensors='pt'
    ).to(device)

    with torch.no_grad():
        outputs = loaded_model(**inputs)
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1)
        predicted_class_id = torch.argmax(probabilities, dim=1).item()

    sentiment_label = id_to_label[predicted_class_id]
    confidence = probabilities[0][predicted_class_id].item()

    return sentiment_label, confidence, probabilities[0].tolist()


# --- 6. Test your model with some examples ---
print("\n--- Testing Model Inference ---")
test_cases = [
    ("The food here is delicious but the service is slow.", "food"),
    ("The food here is delicious but the service is slow.", "service"),
    ("The ambiance was very nice, but the prices were too high.", "ambiance"),
    ("The ambiance was very nice, but the prices were too high.", "prices"),
    ("I had no issues with anything, it was just fine.", "issues"),
    ("The staff were unfriendly.", "staff"),
    ("The cleanliness of the restrooms was a concern.", "cleanliness"),
    ("Worst coffee I've ever had.", "coffee")
]

for sentence, aspect in test_cases:
    sentiment, confidence, all_probs = predict_absa_sentiment(sentence, aspect)
    print(f"\nSentence: '{sentence}'")
    print(f"Aspect: '{aspect}'")
    print(f"Predicted Sentiment: {sentiment} (Confidence: {confidence:.2f})")
    probs_dict = {id_to_label[i]: f'{all_probs[i]:.2f}' for i in range(len(all_probs))}
    print(f"All Probabilities: {probs_dict}")

Loading tokenizer from Google Drive: /content/drive/MyDrive/Colab_Projects/ABSA_Model_Project/absa_distilbert_model
Loading model from Google Drive: /content/drive/MyDrive/Colab_Projects/ABSA_Model_Project/absa_distilbert_model
DistilBERT model loaded successfully from Drive and moved to device: cuda

--- Testing Model Inference ---

Sentence: 'The food here is delicious but the service is slow.'
Aspect: 'food'
Predicted Sentiment: positive (Confidence: 1.00)
All Probabilities: {'positive': '1.00', 'negative': '0.00', 'neutral': '0.00'}

Sentence: 'The food here is delicious but the service is slow.'
Aspect: 'service'
Predicted Sentiment: negative (Confidence: 0.99)
All Probabilities: {'positive': '0.00', 'negative': '0.99', 'neutral': '0.00'}

Sentence: 'The ambiance was very nice, but the prices were too high.'
Aspect: 'ambiance'
Predicted Sentiment: positive (Confidence: 1.00)
All Probabilities: {'positive': '1.00', 'negative': '0.00', 'neutral': '0.00'}

Sentence: 'The ambiance was

In [4]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from datasets import load_dataset
import torch

# Load your SemEval test set (replace with your own CSV if applicable)
# Example if you have a CSV:
# test_df = pd.read_csv("/path/to/test.csv")

# Example dataset structure: sentence, aspect, label
test_sentences = [
    "The food here is delicious but the service is slow.",
    "The ambiance was very nice, but the prices were too high.",
    "Worst coffee I've ever had.",
    "The staff were unfriendly."
]
test_aspects = ["food", "prices", "coffee", "staff"]
true_labels = [0, 1, 1, 1]  # 0=positive, 1=negative, 2=neutral

pred_labels = []

for sent, asp in zip(test_sentences, test_aspects):
    sentiment, _, _ = predict_absa_sentiment(sent, asp)
    # Convert label name back to ID
    label_id = [k for k, v in id_to_label.items() if v == sentiment][0]
    pred_labels.append(label_id)

# --- Compute Metrics ---
accuracy = accuracy_score(true_labels, pred_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='weighted')

print(f"Accuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1-score: {f1:.3f}")

# Full class-by-class report
print("\nDetailed classification report:")
print(classification_report(true_labels, pred_labels, target_names=list(id_to_label.values())))


Accuracy: 1.000
Precision: 1.000
Recall: 1.000
F1-score: 1.000

Detailed classification report:


ValueError: Number of classes, 2, does not match size of target_names, 3. Try specifying the labels parameter