In [1]:
!pip install pandas scikit-learn sentence-transformers joblib --break-system-packages

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import os
import json
import torch
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sentence_transformers import SentenceTransformer
# Data preparation
SEED = 42
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

def parse_reviews_to_dataframe(dataset_dir):
    records = []

    for year_dir in os.listdir(dataset_dir):
        year_path = os.path.join(dataset_dir, year_dir)
        if not os.path.isdir(year_path):
            continue

        review_dir = os.path.join(year_path, f"{year_dir}_review")
        if not os.path.exists(review_dir):
            continue

        for fname in os.listdir(review_dir):
            if not (fname.endswith(".json") and "ICLR" in fname):
                continue

            file_path = os.path.join(review_dir, fname)
            with open(file_path, "r") as f:
                try:
                    data = json.load(f)
                except Exception as e:
                    print(f"Failed to parse {file_path}: {e}")
                    continue

            paper_id = data.get("id", fname.replace(".json", ""))
            meta_review = data.get("metaReview", "")
            reviews = data.get("reviews", [])

            review_texts = []
            rating_scores = []

            for review in reviews:
                review_text = review.get("review", "")
                rating_raw = review.get("rating", "")
                try:
                    rating_score = int(rating_raw.split(":")[0].strip())
                    rating_scores.append(rating_score)
                except Exception as e:
                    print(f"Invalid rating. Error is e: {e}")

                review_texts.append(review_text)

            #full_text = " ".join(review_texts + [meta_review]).strip()
            avg_rating = sum(rating_scores) / len(rating_scores)
            label = 1 if avg_rating >= 6 else 0

            records.append({
                "paper_id": paper_id,
                "text": meta_review,
                "avg_rating": avg_rating,
                "label": label
            })

    return pd.DataFrame(records)

df = parse_reviews_to_dataframe("dataset")  # <- Your dataset folder path
print(f"Total samples: {len(df)}")

train_df, temp_df = train_test_split(df, test_size=0.2, stratify=df["label"], random_state=SEED)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df["label"], random_state=SEED)

Total samples: 5178


In [3]:
# Majority Class
majority_label = train_df["label"].mode()[0]
majority_preds = [majority_label] * len(test_df)
majority_acc = accuracy_score(test_df["label"], majority_preds)
print(f"Majority Class Baseline Accuracy: {majority_acc:.4f}")

Majority Class Baseline Accuracy: 0.6737


In [4]:
# TF-IDF + Logistic Regression
print("\n Training TF-IDF + Logistic Regression baseline...")
tfidf_model = make_pipeline(
    TfidfVectorizer(max_features=10000),
    LogisticRegression(max_iter=1000, random_state=SEED)
)
tfidf_model.fit(train_df["text"], train_df["label"])
tfidf_preds = tfidf_model.predict(test_df["text"])
tfidf_acc = accuracy_score(test_df["label"], tfidf_preds)
print(f"TF-IDF + Logistic Regression Accuracy: {tfidf_acc:.4f}")
print(classification_report(test_df["label"], tfidf_preds))


 Training TF-IDF + Logistic Regression baseline...
TF-IDF + Logistic Regression Accuracy: 0.7703
              precision    recall  f1-score   support

           0       0.76      0.96      0.85       349
           1       0.82      0.38      0.52       169

    accuracy                           0.77       518
   macro avg       0.79      0.67      0.68       518
weighted avg       0.78      0.77      0.74       518



In [5]:
# Sentence-BERT + Logistic Regression
print("\n Computing SBERT embeddings...")
sbert = SentenceTransformer("all-MiniLM-L6-v2")
X_train = sbert.encode(train_df["text"].tolist(), show_progress_bar=True)
X_test = sbert.encode(test_df["text"].tolist(), show_progress_bar=True)

clf = LogisticRegression(max_iter=1000, random_state=SEED)
clf.fit(X_train, train_df["label"])
sbert_preds = clf.predict(X_test)
sbert_acc = accuracy_score(test_df["label"], sbert_preds)
print(f"SBERT + Logistic Regression Accuracy: {sbert_acc:.4f}")
print(classification_report(test_df["label"], sbert_preds))


 Computing SBERT embeddings...


  return torch._C._cuda_getDeviceCount() > 0


Batches:   0%|          | 0/130 [00:00<?, ?it/s]

Batches:   0%|          | 0/17 [00:00<?, ?it/s]

SBERT + Logistic Regression Accuracy: 0.7008
              precision    recall  f1-score   support

           0       0.71      0.94      0.81       349
           1       0.63      0.20      0.30       169

    accuracy                           0.70       518
   macro avg       0.67      0.57      0.56       518
weighted avg       0.68      0.70      0.64       518



In [6]:
# added one more linear classifier as suggested
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder

print("\nComputing SBERT embeddings...")
sbert = SentenceTransformer("all-MiniLM-L6-v2")

# Compute SBERT embeddings
X_train = sbert.encode(train_df["text"].tolist(), show_progress_bar=True, convert_to_tensor=True)
X_val = sbert.encode(val_df["text"].tolist(), show_progress_bar=True, convert_to_tensor=True)
X_test = sbert.encode(test_df["text"].tolist(), show_progress_bar=True, convert_to_tensor=True)

# Encode labels
le = LabelEncoder()
y_train = torch.tensor(le.fit_transform(train_df["label"].tolist()), dtype=torch.long)
y_val = torch.tensor(le.transform(val_df["label"].tolist()), dtype=torch.long)
y_test = torch.tensor(le.transform(test_df["label"].tolist()), dtype=torch.long)

# Dataloaders
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define MLP Classifier
class MLPClassifier(torch.nn.Module):
    def __init__(self, input_dim=384, hidden_dim=128, num_classes=2):  # 384 for MiniLM
        super().__init__()
        self.model = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.3),
            torch.nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLPClassifier(input_dim=X_train.shape[1], num_classes=len(le.classes_)).to(device)
criterion = torch.nn.CrossEntropyLoss()
# I tried learning rates of 1e-3 and 1e-4. I got better results with 1e-3   
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Training
print("\nTraining MLP on SBERT embeddings...")
model.train()
# overfits after 8 epochs
for epoch in range(8):
    total_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Validation after each epoch
    model.eval()
    with torch.no_grad():
        val_logits = model(X_val.to(device))
        val_preds = val_logits.argmax(dim=1).cpu()
        val_acc = accuracy_score(y_val, val_preds)
    print(f"Epoch {epoch+1} | Loss: {total_loss:.4f} | Validation Accuracy: {val_acc:.4f}")

# Evaluation
print("\nEvaluating MLP on SBERT embeddings...")
model.eval()
with torch.no_grad():
    preds = model(X_test.to(device)).argmax(dim=1).cpu().numpy()

sbert_mlp_acc = accuracy_score(test_df["label"], preds)
print(f"SBERT + MLP Accuracy: {sbert_mlp_acc:.4f}")
print(classification_report(test_df["label"], preds))


Computing SBERT embeddings...


Batches:   0%|          | 0/130 [00:00<?, ?it/s]

Batches:   0%|          | 0/17 [00:00<?, ?it/s]

Batches:   0%|          | 0/17 [00:00<?, ?it/s]


Training MLP on SBERT embeddings...
Epoch 1 | Loss: 81.4076 | Validation Accuracy: 0.6737
Epoch 2 | Loss: 76.7239 | Validation Accuracy: 0.6718
Epoch 3 | Loss: 73.4870 | Validation Accuracy: 0.6873
Epoch 4 | Loss: 71.0482 | Validation Accuracy: 0.6815
Epoch 5 | Loss: 68.8346 | Validation Accuracy: 0.6911
Epoch 6 | Loss: 66.6963 | Validation Accuracy: 0.7008
Epoch 7 | Loss: 64.5903 | Validation Accuracy: 0.6931
Epoch 8 | Loss: 62.2848 | Validation Accuracy: 0.6969

Evaluating MLP on SBERT embeddings...
SBERT + MLP Accuracy: 0.6950
              precision    recall  f1-score   support

           0       0.71      0.91      0.80       349
           1       0.58      0.25      0.35       169

    accuracy                           0.69       518
   macro avg       0.64      0.58      0.57       518
weighted avg       0.67      0.69      0.65       518

