In [1]:
!pip install pandas scikit-learn sentence-transformers joblib --break-system-packages

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import os
import json
import torch
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sentence_transformers import SentenceTransformer
import joblib
# Data preparation
# Set global seed
SEED = 42
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

def parse_reviews_to_dataframe(dataset_dir):
    records = []

    for year_dir in os.listdir(dataset_dir):
        year_path = os.path.join(dataset_dir, year_dir)
        if not os.path.isdir(year_path):
            continue

        review_dir = os.path.join(year_path, f"{year_dir}_review")
        if not os.path.exists(review_dir):
            continue

        for fname in os.listdir(review_dir):
            if not (fname.endswith(".json") and "ICLR" in fname):
                continue

            file_path = os.path.join(review_dir, fname)
            with open(file_path, "r") as f:
                try:
                    data = json.load(f)
                except Exception as e:
                    print(f"Failed to parse {file_path}: {e}")
                    continue

            paper_id = data.get("id", fname.replace(".json", ""))
            meta_review = data.get("metaReview", "")
            reviews = data.get("reviews", [])

            review_texts = []
            rating_scores = []

            for review in reviews:
                review_text = review.get("review", "")
                rating_raw = review.get("rating", "")
                try:
                    rating_score = int(rating_raw.split(":")[0].strip())
                    rating_scores.append(rating_score)
                except Exception as e:
                    print(f"Invalid rating. Error is e: {e}")

                review_texts.append(review_text)

            full_text = " ".join(review_texts + [meta_review]).strip()
            if len(rating_scores) == 0:
                continue  # skip if no valid rating
            avg_rating = sum(rating_scores) / len(rating_scores)
            label = 1 if avg_rating >= 6 else 0

            records.append({
                "paper_id": paper_id,
                "text": full_text,
                "avg_rating": avg_rating,
                "label": label
            })

    return pd.DataFrame(records)

df = parse_reviews_to_dataframe("dataset")  # <- Your dataset folder path
print(f"Total samples: {len(df)}")

train_df, temp_df = train_test_split(df, test_size=0.2, stratify=df["label"], random_state=SEED)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df["label"], random_state=SEED)

Total samples: 5178


In [3]:
# Majority Class
majority_label = train_df["label"].mode()[0]
majority_preds = [majority_label] * len(test_df)
majority_acc = accuracy_score(test_df["label"], majority_preds)
print(f"Majority Class Baseline Accuracy: {majority_acc:.4f}")

Majority Class Baseline Accuracy: 0.6737


In [4]:
# TF-IDF + Logistic Regression
print("\n Training TF-IDF + Logistic Regression baseline...")
tfidf_model = make_pipeline(
    TfidfVectorizer(max_features=10000),
    LogisticRegression(max_iter=1000, random_state=SEED)
)
tfidf_model.fit(train_df["text"], train_df["label"])
tfidf_preds = tfidf_model.predict(test_df["text"])
tfidf_acc = accuracy_score(test_df["label"], tfidf_preds)
print(f"TF-IDF + Logistic Regression Accuracy: {tfidf_acc:.4f}")
print(classification_report(test_df["label"], tfidf_preds))


 Training TF-IDF + Logistic Regression baseline...
TF-IDF + Logistic Regression Accuracy: 0.7259
              precision    recall  f1-score   support

           0       0.72      0.99      0.83       349
           1       0.86      0.19      0.31       169

    accuracy                           0.73       518
   macro avg       0.79      0.59      0.57       518
weighted avg       0.76      0.73      0.66       518



In [5]:
# Sentence-BERT + Logistic Regression
print("\n Computing SBERT embeddings...")
sbert = SentenceTransformer("all-MiniLM-L6-v2")
X_train = sbert.encode(train_df["text"].tolist(), show_progress_bar=True)
X_test = sbert.encode(test_df["text"].tolist(), show_progress_bar=True)

clf = LogisticRegression(max_iter=1000, random_state=SEED)
clf.fit(X_train, train_df["label"])
sbert_preds = clf.predict(X_test)
sbert_acc = accuracy_score(test_df["label"], sbert_preds)
print(f"SBERT + Logistic Regression Accuracy: {sbert_acc:.4f}")
print(classification_report(test_df["label"], sbert_preds))


 Computing SBERT embeddings...


Batches:   0%|          | 0/130 [00:00<?, ?it/s]

Batches:   0%|          | 0/17 [00:00<?, ?it/s]

SBERT + Logistic Regression Accuracy: 0.6834
              precision    recall  f1-score   support

           0       0.69      0.97      0.81       349
           1       0.61      0.08      0.15       169

    accuracy                           0.68       518
   macro avg       0.65      0.53      0.48       518
weighted avg       0.66      0.68      0.59       518

