In [None]:
import torch
print(torch.cuda.is_available())  # Should be True
print(torch.cuda.get_device_name(0))  # Shows GPU name

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

In [None]:
dataset = load_dataset("imdb")
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

In [None]:
def tokenize(batch):
  return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)

In [None]:
tokenized_dataset = dataset.map(tokenize, batched=True)
tokenized_dataset.set_format("torch",columns=["input_ids", "attention_mask", "label"])

In [None]:
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english",num_labels = 2)

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    save_strategy="epoch",
    logging_dir = "./logs",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    learning_rate=2e-5,  # or use `accelerate` to auto-detect
    fp16=True
    )

In [None]:
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_dataset["train"],
    eval_dataset = tokenized_dataset["test"],
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
)

In [None]:
trainer.train()

In [None]:
trainer.save_model("distilbert-imdb-finetuned")


In [None]:
model.push_to_hub("KavanaPadaki/distilbert-imdb")

In [None]:
pipeline("text-classification", model="path-or-hub-name")

In [None]:
import evaluate
import numpy as np

accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

In [None]:
predictions = trainer.predict(tokenized_dataset["test"])
acc = accuracy_metric.compute(
    predictions=np.argmax(predictions.predictions, axis=-1),
    references=predictions.label_ids
)
print(f"Validation Accuracy: {acc['accuracy']:.4f}")

In [None]:
f1_metric = evaluate.load("f1")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")

In [None]:
f1_score = f1_metric.compute(predictions=np.argmax(predictions.predictions, axis=-1), references=predictions.label_ids)
precision_score = precision_metric.compute(predictions=np.argmax(predictions.predictions, axis=-1), references=predictions.label_ids)
recall_score = recall_metric.compute(predictions=np.argmax(predictions.predictions, axis=-1), references=predictions.label_ids)

In [None]:
print(f"Validation F1 Score: {f1_score['f1']:.4f}")
print(f"Validation Precision Score: {precision_score['precision']:.4f}")
print(f"Validation Recall Score: {recall_score['recall']:.4f}")

In [None]:
%%writefile app.py
import streamlit as st
from transformers import pipeline

# Load your fine-tuned model from local folder or Hugging Face Hub
MODEL_NAME = "KavanaPadaki/distilbert-imdb"  # or "./distilbert-imdb-finetuned"
classifier = pipeline("text-classification", model=MODEL_NAME)

# Streamlit UI
st.title("🎬 IMDb Sentiment Classifier")
st.write("Enter a movie review and see if it's positive or negative.")

# Text input
user_input = st.text_area("Movie Review", height=150)

if st.button("Classify"):
    if user_input.strip():
        result = classifier(user_input, truncation=True, max_length=512)[0]
        label = result['label']
        score = result['score']
        st.markdown(f"**Prediction:** {label}")
        st.markdown(f"**Confidence:** {score:.2%}")
    else:
        st.warning("Please enter a review before classifying.")