In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report
from transformers import RobertaTokenizer, RobertaForSequenceClassification, TrainingArguments, Trainer
import torch
from torch.utils.data import Dataset


In [5]:

from google.colab import files
import io

# Upload the CSV file
uploaded = files.upload()

# Get the filename of the uploaded file
filename = next(iter(uploaded))

# Read the CSV file
df = pd.read_csv(io.BytesIO(uploaded[filename]))

Saving processed_data.csv to processed_data (1).csv


In [6]:
# Load the data
# df = pd.read_csv('./data/processed_data.csv')
# small sample for testing
df = df.sample(frac=0.2, random_state=42)
print(f"Sampled dataset size: {len(df)} (20% of original)")

Sampled dataset size: 3796 (20% of original)


In [7]:

# Prepare the dataset
class CustomDataset(Dataset):
    def __init__(self, comments, labels):
        self.comments = comments
        self.labels = labels
        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

    def __len__(self):
        return len(self.comments)

    def __getitem__(self, idx):
        comment = self.comments[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(comment, truncation=True, padding='max_length', max_length=512, return_tensors='pt')
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.float)
        }


In [8]:
# Prepare labels
labels = df[['student_difficult', 'student_star', 'gives_good_feedback', 'caring', 'respected']].values

# Split the data
X_train, X_test, y_train, y_test = train_test_split(df['comments'], labels, test_size=0.2, random_state=42)

# Create datasets
train_dataset = CustomDataset(X_train.tolist(), y_train.tolist())
test_dataset = CustomDataset(X_test.tolist(), y_test.tolist())


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]



In [9]:
# Define the model (5 outputs: 2 for regression, 3 for classification)
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=5)


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
# Custom loss function
def custom_loss(outputs, labels):
    mse_loss = torch.nn.MSELoss()
    bce_loss = torch.nn.BCEWithLogitsLoss()

    # MSE for difficulty and rating (first two outputs)
    reg_loss = mse_loss(outputs[:, :2], labels[:, :2])

    # BCE for the three tags (last three outputs)
    class_loss = bce_loss(outputs[:, 2:], labels[:, 2:])

    # Combine losses (you can adjust the weights if needed)
    total_loss = reg_loss + class_loss
    return total_loss

# Custom Trainer
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        loss = custom_loss(outputs.logits, labels)
        return (loss, outputs) if return_outputs else loss

In [11]:
# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
)

# Create Trainer
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)

# Train the model
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
500,3.3652
1000,1.7096


TrainOutput(global_step=1140, training_loss=2.40935113471851, metrics={'train_runtime': 939.0627, 'train_samples_per_second': 9.699, 'train_steps_per_second': 1.214, 'total_flos': 2396480041562112.0, 'train_loss': 2.40935113471851, 'epoch': 3.0})

In [12]:
# Evaluate the model
eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")

# Make predictions
predictions = trainer.predict(test_dataset)
predicted_values = predictions.predictions

# Separate predictions
predicted_difficulty = predicted_values[:, 0]
predicted_rating = predicted_values[:, 1]
predicted_tags = (predicted_values[:, 2:] > 0.5).astype(int)

# Calculate MSE for difficulty and rating
difficulty_mse = mean_squared_error(y_test[:, 0], predicted_difficulty)
rating_mse = mean_squared_error(y_test[:, 1], predicted_rating)
print(f"Difficulty Mean Squared Error: {difficulty_mse}")
print(f"Rating Mean Squared Error: {rating_mse}")

Evaluation results: {'eval_loss': 1.6741864681243896, 'eval_runtime': 22.5835, 'eval_samples_per_second': 33.653, 'eval_steps_per_second': 4.207, 'epoch': 3.0}
Difficulty Mean Squared Error: 1.2574265862110199
Rating Mean Squared Error: 0.8896427915182216


In [13]:
# Calculate accuracy and print classification report for tags
tags = ['gives_good_feedback', 'caring', 'respected']
for i, tag in enumerate(tags):
    accuracy = accuracy_score(y_test[:, i+2], predicted_tags[:, i])
    print(f"\n{tag} Accuracy: {accuracy}")
    print(classification_report(y_test[:, i+2], predicted_tags[:, i]))


gives_good_feedback Accuracy: 0.7105263157894737
              precision    recall  f1-score   support

         0.0       0.71      1.00      0.83       540
         1.0       0.00      0.00      0.00       220

    accuracy                           0.71       760
   macro avg       0.36      0.50      0.42       760
weighted avg       0.50      0.71      0.59       760


caring Accuracy: 0.7157894736842105
              precision    recall  f1-score   support

         0.0       0.72      1.00      0.83       544
         1.0       0.00      0.00      0.00       216

    accuracy                           0.72       760
   macro avg       0.36      0.50      0.42       760
weighted avg       0.51      0.72      0.60       760


respected Accuracy: 0.6986842105263158
              precision    recall  f1-score   support

         0.0       0.70      1.00      0.82       531
         1.0       0.00      0.00      0.00       229

    accuracy                           0.70       760
 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
