<a href="https://colab.research.google.com/github/Shakhawat-Fahim/EmpathyWithRoberta/blob/main/empathy_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# from google.colab import files
# uploaded = files.upload()  # Upload your NewsEssay v4 CSV file


In [None]:
!pip install -q transformers datasets scikit-learn evaluate
import pandas as pd
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
from datasets import Dataset
import evaluate

f = "trac3_EMP_train.csv"
df = pd.read_csv(f,  escapechar='\\')



In [None]:

#Required columns
df = df[['person_essay', 'person_empathy']].dropna()
df = df.rename(columns={'person_essay': 'text', 'person_empathy': 'label'})

# Normalizing labels between 0 and 1
df['label'] = df['label'] / 7.0

train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
train_ds = Dataset.from_pandas(train_df)
val_ds = Dataset.from_pandas(val_df)


In [None]:
# Tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=1)

def tokenize(data):
    return tokenizer(data["text"], truncation=True)

train_ds = train_ds.map(tokenize, batched=True)
val_ds = val_ds.map(tokenize, batched=True)


In [9]:
#Training config
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=1,
    logging_dir="./logs",
)

# Metric
metric = evaluate.load("pearsonr")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions.squeeze()
    return metric.compute(predictions=predictions, references=labels)

# Trainer
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

  trainer = Trainer(


In [None]:

trainer.train()


model.save_pretrained("empathy-roberta-model")
tokenizer.save_pretrained("empathy-roberta-model")

print("\nTraining complete.")


In [10]:
!pip install -q gradio


In [27]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_path = "/content/empathy-roberta-model"  # or your saved model folder
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [32]:
def predict_empathy(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
        empathy_score = outputs.logits.squeeze().item()

    return {"Empathy Score": round((empathy_score*7)/10, 2)}



In [33]:
import gradio as gr

demo = gr.Interface(
    fn=predict_empathy,
    inputs=gr.Textbox(lines=5, label="User Input or Essay"),
    outputs=gr.Label(label="Predicted Empathy Score"),
    title="🧠 Empathy Detection Model",
    description="This model predicts the empathy level (0–7 scale) from text input."
)

demo.launch()  # use share=True for public link



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://78b0b465dc8cdacad4.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


