Features:

Uses FLAN-T5 via Hugging Face Transformers

Includes sentiment classification prompt (few-shot)

Runs a Flask server for API access

Uses ngrok to expose the API endpoint to the internet

Returns sentiment + optional summary

In [6]:
# ✅ Step 1: Install required libraries
!pip install -q transformers
!pip install -U transformers
!pip uninstall -y wandb


# ✅ Step 2: Upload your dataset manually
from google.colab import files
uploaded = files.upload()  # Upload 'legal_sentiment_dataset.csv'

# ✅ Step 3: Import necessary modules
import pandas as pd
import torch
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from transformers import (
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification,
    Trainer,
    TrainingArguments
)
import numpy as np

# ✅ Step 4: Load dataset
df = pd.read_csv("legal_sentiment_dataset.csv")  # Must match uploaded file name

# ✅ Step 5: Label encoding
label_map = {"Positive": 0, "Neutral": 1, "Negative": 2}
df["label"] = df["sentiment"].map(label_map)

# ✅ Step 6: Split into train/test
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# ✅ Step 7: Load tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

# ✅ Step 8: Custom Dataset class
class LegalSentimentDataset(Dataset):
    def __init__(self, texts, labels):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=128)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# ✅ Step 9: Prepare datasets
train_dataset = LegalSentimentDataset(train_df["text"].tolist(), train_df["label"].tolist())
test_dataset = LegalSentimentDataset(test_df["text"].tolist(), test_df["label"].tolist())

# ✅ Step 10: Load model
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3)

# ✅ Step 11: Set training arguments
output_dir = "/content/legal_sentiment_model"
training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
   # evaluation_strategy="epoch", # This caused the error
    logging_dir="./logs",
    logging_steps=10,

)

# ✅ Step 12: Metrics function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    acc = np.mean(preds == labels)
    return {"accuracy": acc}

# ✅ Step 13: Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)
# Disable W&B
import os
os.environ["WANDB_DISABLED"] = "true"

# Train the model
trainer.train()

# ✅ Step 14: Train the model
trainer.train()

# ✅ Step 15: Save model and tokenizer
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"✅ Model saved at: {output_dir}")

Found existing installation: wandb 0.20.1
Uninstalling wandb-0.20.1:
  Successfully uninstalled wandb-0.20.1


Saving legal_sentiment_dataset.csv to legal_sentiment_dataset (6).csv


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Step,Training Loss
10,0.9652
20,0.4421
30,0.0969
40,0.0275
50,0.0132
60,0.0084
70,0.0064
80,0.0056
90,0.0046
100,0.0044


Step,Training Loss
10,0.0031
20,0.0027
30,0.0023
40,0.0019
50,0.0017
60,0.0015
70,0.0015
80,0.0014
90,0.0013
100,0.0012


✅ Model saved at: /content/legal_sentiment_model


In [15]:
# ✅ Step 1: Install dependencies
!pip install gradio transformers -q

# ✅ Step 2: Load trained model
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import torch
import gradio as gr

# Load your fine-tuned model from this path
model_path = "/content/legal_sentiment_model"

model = DistilBertForSequenceClassification.from_pretrained(model_path)
tokenizer = DistilBertTokenizerFast.from_pretrained(model_path)
model.eval()

label_map = {0: "Positive", 1: "Neutral", 2: "Negative"}

# ✅ Step 3: Define prediction function
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        logits = model(**inputs).logits
        pred = torch.argmax(logits, dim=1).item()
    return f"📘 Sentiment: {label_map[pred]}"

# ✅ Step 4: Create & launch Gradio app
gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=6, placeholder="Paste legal sentence or paragraph..."),
    outputs="text",
    title="⚖️ Legal Sentiment Analyzer",
    description="Enter any legal document text to predict if it's Positive, Neutral, or Negative."
).launch(share=True)


ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.
ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 5

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()


ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.


* Running on public URL: https://b820e14a02c248ea21.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


