In [None]:
!pip install -q transformers

In [None]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

In [None]:
# Mount Google Drive if model is saved there
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load model and tokenizer from saved directory
model_path = '/content/drive/MyDrive/sentiment140_model'
tokenizer = DistilBertTokenizer.from_pretrained(model_path)
model = DistilBertForSequenceClassification.from_pretrained(model_path)

In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
# Define classification function with proper preprocessing
def classify_text(text):
    # Add the same prefix used during training
    input_text = f"TEXT1: {text}"

    # Tokenize with same parameters as training
    inputs = tokenizer(
        input_text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=128
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)

    probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
    return probabilities[0].cpu().tolist()

In [None]:
# Class labels mapping
class_names = {
    0: "negative",
    1: "positive"
}

In [None]:
# Interactive classification loop
print("Enter text to classify (or 'quit' to exit):")
while True:
    user_input = input("> ")

    if user_input.lower() in ['quit', 'exit']:
        print("Goodbye!")
        break

    probabilities = classify_text(user_input)

    print("\nClassification results:")
    for i, prob in enumerate(probabilities):
        print(f"{class_names[i]}: {prob:.2%}")

    predicted_class = torch.tensor(probabilities).argmax().item()
    print(f"\nTop prediction: {class_names[predicted_class]} ({probabilities[predicted_class]:.2%})\n")