In [4]:
import torch
import pandas as pd
import polars as pl
import numpy as np
import requests
import time
import json
from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support, classification_report
from tqdm import tqdm

# Configuration
MAX_ORES_QUERIES = 20000  # None for FULL TEST but probably overkill?
OUTPUT_FILE = "ores_benchmark_results.json"

def get_test_set_articles():
    """Get same test set articles with revision IDs"""

    # Load data and create sam test mask as training
    data = torch.load("data/data_quantile_Target_QC_aggcat.pt", weights_only=False)

    # Import mask creation function
    from utils.create_split_masks import create_split_masks
    _, _, test_mask = create_split_masks(data, seed=42)
    test_indices = torch.where(test_mask)[0].cpu().numpy()

    print(f"Test set size: {len(test_indices)} nodes")

    # Get true labels for test set
    test_labels = data.y[test_indices].cpu().numpy()

    # Load wiki data to get pageids
    df_wiki = pl.read_parquet("data/df_wiki_data.parquet")
    test_pageids = df_wiki['pageid'][test_indices].to_list()

    # Load original articles to get revision IDs
    df_original = pl.read_csv("data/cleaned_articles_final.csv")

    test_articles = df_original.filter(
        pl.col('pageid').is_in(test_pageids)
    ).select(['pageid', 'article', 'quality_class', 'revid'])

    # Add node index and true label
    pageid_to_idx = {pid: idx for idx, pid in enumerate(df_wiki['pageid'].to_list())}
    pageid_to_label = {test_pageids[i]: test_labels[i] for i in range(len(test_pageids))}

    test_articles = test_articles.with_columns([
        pl.col('pageid').map_elements(
            lambda x: pageid_to_idx.get(x),
            return_dtype=pl.Int64
        ).alias('node_index'),
        pl.col('pageid').map_elements(
            lambda x: pageid_to_label.get(x),
            return_dtype=pl.Int64
        ).alias('true_label_numeric')
    ])

    # Filter to only test indices
    test_articles = test_articles.filter(
        pl.col('node_index').is_in(test_indices.tolist())
    )

    return test_articles.to_pandas()

def query_ores_api(revid):
    """Query ORES API with revision idD"""

    url = "https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict"
    headers = {
        "User-Agent": "UniversityofKonstanzArticleResearchBot/1.0",
        "Content-Type": "application/json",
    }

    try:
        resp = requests.post(
            url,
            headers=headers,
            json={"rev_id": int(revid)},
            timeout=15
        )
        resp.raise_for_status()
        j = resp.json()

        scores = j.get("enwiki", {}).get("scores", {})
        rev_key = next(iter(scores.keys()), None)

        if rev_key:
            pred = scores[rev_key].get("articlequality", {}).get("score", {}).get("prediction")
            probs = scores[rev_key].get("articlequality", {}).get("score", {}).get("probability", {})
            return pred, probs

    except Exception as e:
        print(f"Error for revid {revid}: {e}")

    return None, None

# Get test articles
test_articles_df = get_test_set_articles()
print(f"Found {len(test_articles_df)} articles")

# Limit for testing if needed
if MAX_ORES_QUERIES:
    test_articles_df = test_articles_df.sample(n=min(MAX_ORES_QUERIES, len(test_articles_df)), random_state=42)

# Launch attack on ORES.API
print("\nOK GARMIN QUERY THE ORES API WITH OUR TEST DATA...")
ores_predictions = []
ores_probabilities = []

for idx, row in tqdm(test_articles_df.iterrows(), total=len(test_articles_df)):
    pred, probs = query_ores_api(row['revid'])
    ores_predictions.append(pred)
    ores_probabilities.append(probs)
    time.sleep(0.2)  # Rate limit ᶻ 𝗓 𐰁(ᴗ_ ᴗ。)ᶻ 𝗓 𐰁(ᴗ_ ᴗ。)ᶻ 𝗓 𐰁(ᴗ_ ᴗ。)

test_articles_df['ores_prediction'] = ores_predictions
test_articles_df['ores_probabilities'] = ores_probabilities

# Map to numeric
quality_to_num = {
    "FA": 9, "FL": 8, "FM": 7, "A": 6,
    "GA": 5, "B": 4, "C": 3, "Start": 2,
    "Stub": 1, "List": 0
}

# Aggregated categories
quality_to_aggcat = {
    "FA": 2, "FL": 2, "FM": 2,
    "A": 1, "GA": 1, "B": 1, "C": 1,
    "Start": 0, "Stub": 0, "List": 0
}

# Map to aggregated categories
test_articles_df['actual_aggcat'] = test_articles_df['quality_class'].map(quality_to_aggcat)
test_articles_df['ores_aggcat'] = test_articles_df['ores_prediction'].map(quality_to_aggcat)

# Filter valids
valid_df = test_articles_df.dropna(subset=['ores_aggcat'])
print(f"\nValid ORES predictions: {len(valid_df)}/{len(test_articles_df)}")

# Metrics
y_true = valid_df['true_label_numeric'].values
y_pred = valid_df['ores_aggcat'].values

accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, support = precision_recall_fscore_support(y_true, y_pred, average=None, zero_division=0)
precision_macro = precision.mean()
recall_macro = recall.mean()
f1_macro = f1.mean()
f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0)

# Results
print(f"Accuracy:{accuracy:.4f}")
print(f"Precision (macro):{precision_macro:.4f}")
print(f"Recall (macro):{recall_macro:.4f}")
print(f"F1 (macro):{f1_macro:.4f}")
print(f"F1 (weighted):{f1_weighted:.4f}")
print(classification_report(y_true, y_pred, target_names=['Low', 'Medium', 'High']))

# Save to JSON
results = {
    "model": "ORES",
    "test_size": int(len(valid_df)),
    "total_test_size": int(len(test_articles_df)),
    "failed_predictions": int(len(test_articles_df) - len(valid_df)),
    "metrics": {
        "accuracy": float(accuracy),
        "precision_macro": float(precision_macro),
        "recall_macro": float(recall_macro),
        "f1_macro": float(f1_macro),
        "f1_weighted": float(f1_weighted),
        "precision_per_class": [float(x) for x in precision.tolist()],
        "recall_per_class": [float(x) for x in recall.tolist()],
        "f1_per_class": [float(x) for x in f1.tolist()],
        "support_per_class": [int(x) for x in support.tolist()]
    },
    "class_distribution": {
        "true": {str(k): int(v) for k, v in zip(*np.unique(y_true, return_counts=True))},
        "predicted": {str(k): int(v) for k, v in zip(*np.unique(y_pred, return_counts=True))}
    }
}

with open(OUTPUT_FILE, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\nResults saved: {OUTPUT_FILE}")

# Save as csv
test_articles_df.to_csv("ores_detailed_predictions.csv", index=False)

Test set size: 56980 nodes
Found 56978 articles

OK GARMIN QUERY THE ORES API WITH OUR TEST DATA...


 15%|█▌        | 3021/20000 [53:06<4:36:16,  1.02it/s] 

Error for revid 1274708269: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 15%|█▌        | 3084/20000 [54:28<5:31:03,  1.17s/it] 

Error for revid 1297261759: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 15%|█▌        | 3096/20000 [54:55<4:52:53,  1.04s/it] 

Error for revid 1269508013: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 20%|█▉        | 3930/20000 [1:08:36<3:55:00,  1.14it/s]

Error for revid 1235026710: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 28%|██▊       | 5694/20000 [1:39:13<3:55:46,  1.01it/s] 

Error for revid 1298341906: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 37%|███▋      | 7349/20000 [2:07:52<4:04:59,  1.16s/it] 

Error for revid 1298532157: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 40%|████      | 8079/20000 [2:20:40<4:58:03,  1.50s/it] 

Error for revid 1277275867: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 40%|████      | 8080/20000 [2:20:56<18:47:37,  5.68s/it]

Error for revid 1292445270: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 40%|████      | 8081/20000 [2:21:11<28:28:37,  8.60s/it]

Error for revid 1247872611: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 40%|████      | 8083/20000 [2:21:13<15:12:00,  4.59s/it]

Error for revid 1300163315: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████      | 8111/20000 [2:22:04<18:00:36,  5.45s/it]

Error for revid 1174738063: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 41%|████      | 8249/20000 [2:24:37<3:27:25,  1.06s/it] 

Error for revid 1294501118: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 41%|████▏     | 8250/20000 [2:24:53<17:30:45,  5.37s/it]

Error for revid 1299317161: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8251/20000 [2:24:54<13:54:03,  4.26s/it]

Error for revid 1295950670: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8257/20000 [2:25:01<5:03:38,  1.55s/it] 

Error for revid 1295995044: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8258/20000 [2:25:01<4:03:41,  1.25s/it]

Error for revid 1259211311: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8259/20000 [2:25:02<3:24:08,  1.04s/it]

Error for revid 1302182382: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8260/20000 [2:25:03<2:54:51,  1.12it/s]

Error for revid 1262011024: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8261/20000 [2:25:03<2:34:47,  1.26it/s]

Error for revid 1299956599: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8262/20000 [2:25:04<2:19:25,  1.40it/s]

Error for revid 1292105841: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8264/20000 [2:25:05<2:35:37,  1.26it/s]

Error for revid 1291937190: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8265/20000 [2:25:06<2:36:30,  1.25it/s]

Error for revid 1300045591: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8269/20000 [2:25:10<2:45:33,  1.18it/s]

Error for revid 1299174356: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8270/20000 [2:25:10<2:27:47,  1.32it/s]

Error for revid 1291941939: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8271/20000 [2:25:11<2:34:44,  1.26it/s]

Error for revid 1293362883: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 41%|████▏     | 8274/20000 [2:25:13<2:16:12,  1.43it/s]

Error for revid 1287177335: 404 Client Error: Not Found for url: https://api.wikimedia.org/service/lw/inference/v1/models/enwiki-articlequality:predict


 42%|████▏     | 8306/20000 [2:25:54<3:58:40,  1.22s/it]

Error for revid 1296427016: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 42%|████▏     | 8321/20000 [2:26:28<5:19:09,  1.64s/it] 

Error for revid 1276220835: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 42%|████▏     | 8330/20000 [2:26:52<3:51:03,  1.19s/it] 

Error for revid 1277639116: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 43%|████▎     | 8578/20000 [2:31:40<3:10:09,  1.00it/s] 

Error for revid 1300856985: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 51%|█████     | 10169/20000 [2:59:15<2:55:38,  1.07s/it]

Error for revid 1237785195: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 52%|█████▏    | 10443/20000 [3:04:09<2:21:14,  1.13it/s] 

Error for revid 1297506970: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 55%|█████▌    | 11043/20000 [3:15:03<3:31:49,  1.42s/it] 

Error for revid 1292893139: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 74%|███████▍  | 14785/20000 [4:18:22<1:19:20,  1.10it/s] 

Error for revid 1195329576: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 76%|███████▌  | 15210/20000 [4:25:31<1:10:42,  1.13it/s]

Error for revid 1300578955: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 81%|████████▏ | 16253/20000 [4:43:16<1:17:47,  1.25s/it]

Error for revid 1298796020: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 82%|████████▏ | 16330/20000 [4:45:14<56:55,  1.07it/s]  

Error for revid 1273926865: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 89%|████████▉ | 17857/20000 [5:13:05<38:43,  1.08s/it]  

Error for revid 1300813281: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


 89%|████████▉ | 17894/20000 [5:16:01<48:50,  1.39s/it]   

Error for revid 1300261394: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 90%|████████▉ | 17921/20000 [5:32:23<46:01,  1.33s/it]     

Error for revid 1292877537: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 90%|█████████ | 18071/20000 [5:51:09<43:13,  1.34s/it]     

Error for revid 1298265796: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 90%|█████████ | 18100/20000 [6:07:13<32:32,  1.03s/it]     

Error for revid 1302467301: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 91%|█████████▏| 18255/20000 [6:26:06<22:09,  1.31it/s]     

Error for revid 1188139677: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 91%|█████████▏| 18280/20000 [6:42:23<30:30,  1.06s/it]     

Error for revid 1219990697: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 93%|█████████▎| 18623/20000 [7:33:52<21:45,  1.05it/s]     

Error for revid 1290652555: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 93%|█████████▎| 18656/20000 [7:51:25<24:37,  1.10s/it]     

Error for revid 1275254373: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 94%|█████████▍| 18811/20000 [8:10:42<20:32,  1.04s/it]     

Error for revid 1301015397: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 94%|█████████▍| 18841/20000 [8:15:51<17:40,  1.09it/s]   

Error for revid 1290460479: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 94%|█████████▍| 18899/20000 [8:17:04<17:34,  1.04it/s]  

Error for revid 1292311455: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 95%|█████████▍| 18902/20000 [8:17:21<56:19,  3.08s/it]  

Error for revid 1272729303: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 95%|█████████▍| 18925/20000 [8:18:00<18:28,  1.03s/it]  

Error for revid 1294629505: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 95%|█████████▌| 19033/20000 [8:36:52<19:42,  1.22s/it]    

Error for revid 1252840754: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 96%|█████████▌| 19199/20000 [8:56:12<10:58,  1.22it/s]    

Error for revid 1020128315: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 96%|█████████▌| 19228/20000 [9:12:49<13:17,  1.03s/it]    

Error for revid 1289028148: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 96%|█████████▋| 19254/20000 [9:16:45<14:38,  1.18s/it]   

Error for revid 1258348109: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


 97%|█████████▋| 19405/20000 [9:36:27<10:03,  1.01s/it]    

Error for revid 1292447682: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


100%|█████████▉| 19903/20000 [11:10:30<01:44,  1.07s/it]    

Error for revid 1301459657: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


100%|█████████▉| 19931/20000 [11:18:45<01:10,  1.02s/it]   

Error for revid 1290452120: HTTPSConnectionPool(host='api.wikimedia.org', port=443): Read timed out. (read timeout=15)


100%|██████████| 20000/20000 [11:35:34<00:00,  2.09s/it]   



Valid ORES predictions: 19942/20000
Accuracy:0.6412
Precision (macro):0.5057
Recall (macro):0.6805
F1 (macro):0.4885
F1 (weighted):0.6663
              precision    recall  f1-score   support

         Low       0.97      0.55      0.70     14138
      Medium       0.44      0.86      0.58      5681
        High       0.10      0.63      0.18       123

    accuracy                           0.64     19942
   macro avg       0.51      0.68      0.49     19942
weighted avg       0.82      0.64      0.67     19942


Results saved: ores_benchmark_results.json
