In [46]:
import mlcroissant as mlc, inspect, sys
print("runtime version :", getattr(mlc, "__version__", "unknown"))
print("imported from   :", inspect.getfile(mlc))
print("FileObject sig :", inspect.signature(mlc.FileObject))


runtime version : unknown
imported from   : /opt/homebrew/lib/python3.10/site-packages/mlcroissant/__init__.py
FileObject sig : (ctx: 'Context' = <factory>, id: 'str' = <factory>, name: str = '', parents: 'list[Node]' = <factory>, jsonld: 'Any' = None, JSONLD_TYPE: 'Callable[[Context], term.URIRef] | term.URIRef | str | None' = '__MISSING_JSONLD_TYPE__', content_url: str | None = None, content_size: str | None = None, contained_in: list[str] | None = <factory>, description: str | None = None, encoding_formats: list[str] | None = None, md5: str | None = None, same_as: list[str] | None = None, sha256: str | None = None, source: mlcroissant._src.structure_graph.nodes.source.Source = <factory>) -> None


In [40]:
pip install --upgrade mlcroissant


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [41]:
import mlcroissant as mlc
from datetime import datetime

In [42]:
import hashlib

def compute_md5(file_path):
    hash_md5 = hashlib.md5()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

# Compute MD5 for vader.csv and graded.csv
vader_md5 = compute_md5("../data/vader.csv")
graded_md5 = compute_md5("../data/graded.csv")

print(f"MD5 for vader.csv: {vader_md5}")
print(f"MD5 for graded.csv: {graded_md5}")

MD5 for vader.csv: 1505f4938525173013651d2e7842b5c9
MD5 for graded.csv: 342a63d1f0183480e46f704c3a27654f


In [47]:
distribution = [
    mlc.FileObject(
        id="vader_csv",
        name="vader.csv",
        description=(
            "CSV file containing columns Case, ID, Repository, CWE, Severity, "
            "Submitted At, Approved At, Description. Raw annotations from reviewers."
        ),
        content_url="data/vader.csv",
        md5=vader_md5,          # or sha256=... (sha-256 is now preferred)
        encoding_formats=["text/csv"],
    ),
    mlc.FileObject(
        id="graded_csv",
        name="graded.csv",
        description=(
            "LLM outputs for each model. Columns: Case, ID, Repository, CWE, "
            "... , grok-3-beta-rating."
        ),
        content_url="data/graded.csv",
        md5=graded_md5,
        encoding_formats=["text/csv"],
    ),
]


In [48]:
def compute_sha256(file_path):
    hash_sha256 = hashlib.sha256()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_sha256.update(chunk)
    return hash_sha256.hexdigest()

vader_sha256 = compute_sha256("../data/vader.csv")
graded_sha256 = compute_sha256("../data/graded.csv")

In [54]:
from mlcroissant import DataType as DT
# Define record sets (data structure)
record_sets = [
    # Existing record set for vader.csv (unchanged from previous response)
    mlc.RecordSet(
        id="vader_metrics",
        name="vader_metrics",
        description="Vulnerability metrics in the VADER dataset.",
        fields=[
            mlc.Field(
                name="Case",
                description="Case identifier.",
                data_types=[DT.INTEGER],
                source=mlc.Source(file_object="vader_csv"),
            ),
            mlc.Field(
                name="ID",
                description="Unique identifier for submissions.",
                data_types=[DT.INTEGER],
                source=mlc.Source(file_object="vader_csv"),
            ),
            mlc.Field(
                name="Repository",
                description="URL of the repository containing the vulnerability.",
                data_types=[DT.TEXT],
                source=mlc.Source(file_object="vader_csv"),
            ),
            mlc.Field(
                name="CWE",
                description="Common Weakness Enumeration identifier (e.g., CWE-674).",
                data_types=[DT.TEXT],
                source=mlc.Source(file_object="vader_csv"),
            ),
            mlc.Field(
                name="Severity",
                description="Severity level of the vulnerability (e.g., Medium).",
                data_types=[DT.TEXT],
                source=mlc.Source(file_object="vader_csv"),
            ),
            mlc.Field(
                name="Submitted At",
                description="Date the vulnerability was submitted (ISO 8601 format).",
                data_types=[DT.DATE],
                source=mlc.Source(file_object="vader_csv"),
            ),
            mlc.Field(
                name="Approved At",
                description="Date the vulnerability was approved (ISO 8601 format) by reviewer.",
                data_types=[DT.DATE],
                source=mlc.Source(file_object="vader_csv"),
            ),
            mlc.Field(
                name="Description",
                description="Description of the vulnerability.",
                data_types=[DT.TEXT],
                source=mlc.Source(file_object="vader_csv"),
            ),
        ],
    ),
    # Updated record set for graded.csv with all 42 columns
    mlc.RecordSet(
        id="graded_metrics",
        name="graded_metrics",
        description="Model performance ratings for VADER vulnerabilities.",
        fields=[
            mlc.Field(
                name="Case",
                description="Case identifier.",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="ID",
                description="Unique identifier for the submission.",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="Repository",
                description="URL of the repository containing the vulnerability.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="CWE",
                description="Common Weakness Enumeration identifier (e.g., CWE-547).",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="Severity",
                description="Severity level of the vulnerability (1-5).",
                data_types=[DT.INTEGER],  # int64 (despite being 1-5, stored as int64)
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="Description",
                description="Description of the vulnerability.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            # Claude 3.7 Sonnet fields
            mlc.Field(
                name="claude-3.7-sonnet",
                description="Performance evaluation for the Claude 3.7 Sonnet model.",
                data_types=[DT.TEXT],  # object (JSON-like string)
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="claude-3.7-sonnet-explanation",
                description="Explanation score for Claude 3.7 Sonnet (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="claude-3.7-sonnet-remediation",
                description="Remediation score for Claude 3.7 Sonnet (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="claude-3.7-sonnet-other",
                description="Other evaluation score for Claude 3.7 Sonnet (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="claude-3.7-sonnet-comment",
                description="Comments on Claude 3.7 Sonnet's performance.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            # Gemini 2.5 Pro fields
            mlc.Field(
                name="gemini-2.5-pro",
                description="Performance evaluation for the Gemini 2.5 Pro model.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gemini-2.5-pro-explanation",
                description="Explanation score for Gemini 2.5 Pro (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gemini-2.5-pro-remediation",
                description="Remediation score for Gemini 2.5 Pro (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gemini-2.5-pro-other",
                description="Other evaluation score for Gemini 2.5 Pro (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gemini-2.5-pro-comment",
                description="Comments on Gemini 2.5 Pro's performance.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            # GPT-4.1 fields
            mlc.Field(
                name="gpt-4.1",
                description="Performance evaluation for the GPT-4.1 model.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gpt-4.1-explanation",
                description="Explanation score for GPT-4.1 (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gpt-4.1-remediation",
                description="Remediation score for GPT-4.1 (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gpt-4.1-other",
                description="Other evaluation score for GPT-4.1 (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gpt-4.1-comment",
                description="Comments on GPT-4.1's performance.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            # GPT-4.5 fields
            mlc.Field(
                name="gpt-4.5",
                description="Performance evaluation for the GPT-4.5 model.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gpt-4.5-explanation",
                description="Explanation score for GPT-4.5 (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gpt-4.5-remediation",
                description="Remediation score for GPT-4.5 (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gpt-4.5-other",
                description="Other evaluation score for GPT-4.5 (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gpt-4.5-comment",
                description="Comments on GPT-4.5's performance.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            # o3 fields
            mlc.Field(
                name="o3",
                description="Performance evaluation for the o3 model.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="o3-explanation",
                description="Explanation score for o3 (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="o3-remediation",
                description="Remediation score for o3 (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="o3-other",
                description="Other evaluation score for o3 (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="o3-comment",
                description="Comments on o3's performance.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            # Grok 3 Beta fields
            mlc.Field(
                name="grok-3-beta",
                description="Performance evaluation for the Grok 3 Beta model.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="grok-3-beta-explanation",
                description="Explanation score for Grok 3 Beta (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="grok-3-beta-remediation",
                description="Remediation score for Grok 3 Beta (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="grok-3-beta-other",
                description="Other evaluation score for Grok 3 Beta (0-5).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="grok-3-beta-comment",
                description="Comments on Grok 3 Beta's performance.",
                data_types=[DT.TEXT],  # object
                source=mlc.Source(file_object="graded_csv"),
            ),
            # Rating fields for all models
            mlc.Field(
                name="claude-3.7-sonnet-rating",
                description="Overall rating for Claude 3.7 Sonnet (0-10).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gemini-2.5-pro-rating",
                description="Overall rating for Gemini 2.5 Pro (0-10).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gpt-4.1-rating",
                description="Overall rating for GPT-4.1 (0-10).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="gpt-4.5-rating",
                description="Overall rating for GPT-4.5 (0-10).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="o3-rating",
                description="Overall rating for o3 (0-10).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
            mlc.Field(
                name="grok-3-beta-rating",
                description="Overall rating for Grok 3 Beta (0-10).",
                data_types=[DT.INTEGER],  # int64
                source=mlc.Source(file_object="graded_csv"),
            ),
        ],
    ),
]

In [53]:
print([d for d in dir(DT) if not d.startswith("_")])

['AUDIO_OBJECT', 'BOOL', 'BOUNDING_BOX', 'DATE', 'FLOAT', 'FLOAT16', 'FLOAT32', 'FLOAT64', 'IMAGE_OBJECT', 'INT16', 'INT32', 'INT64', 'INT8', 'INTEGER', 'SPLIT', 'TEXT', 'UINT16', 'UINT32', 'UINT64', 'UINT8', 'URL']


In [58]:
# Define metadata
metadata = mlc.Metadata(
    id="vader_dataset",
    name="VADER Vulnerability Dataset",
    description="A dataset of vulnerability remediation metrics across programming languages, including model performance ratings for vulnerability analysis.",
    date_published=datetime(2025, 5, 16),  # Matches current date: May 16, 2025
    url="https://github.com/AfterQuery/vader",
    distribution=distribution,
    record_sets=record_sets,
    license="https://creativecommons.org/licenses/by/4.0/",
    cite_as="Liu E., Wang A., Georgescu C., Mateega S. (2025). https://huggingface.co/datasets/AfterQuery/vader"
)

# Check for issues and save
print(metadata.issues.report())
import json

with open("vader_croissant.json", "w") as f:
    json.dump(metadata.to_json(), f, indent=2, default=str)

  -  [Metadata(VADER Vulnerability Dataset)] Property "https://schema.org/version" is recommended, but does not exist.


In [6]:
import pandas as pd

In [9]:
df = pd.read_csv('../data/vader.csv')

In [10]:
df

Unnamed: 0,Case,ID,Repository,CWE,Severity,Submitted At,Approved At,Description
0,1,1745334014083,https://github.com/Uthpal-p/Library-Management...,"CWE-674, CWE-121",Medium,2025-04-25T12:49:42.000Z,2025-04-27T01:23:36.000Z,Explanation:\nThe recursive input validation f...
1,2,1745335148884,https://github.com/saleor/saleor/blob/main/sal...,CWE-547,Low,2025-04-27T11:32:48.000Z,2025-04-29T00:53:49.000Z,Root Cause: The code contains hardcoded sensit...
2,3,1745336228778,kunzbhatia/Library-Management-System/blob/main...,CWE-89,High,2025-04-27T17:10:25.000Z,2025-04-28T00:50:15.000Z,Explanation:\nVulnerable functions such as del...
3,4,1745337278378,https://github.com/rio-labs/rio/blob/main/fron...,CWE-79,High,2025-04-27T07:50:32.000Z,2025-04-27T08:02:17.000Z,Root Cause: The vulnerability arises because t...
4,5,1745344282769,https://github.com/heli-toon/LBSHS-LMS,CWE-89,High,2025-04-22T17:51:22.000Z,2025-04-24T14:53:54.000Z,Summary:\nThe SQL injection vulnerability in t...
...,...,...,...,...,...,...,...,...
172,173,1745932579678,rndusr/torf/blob/master/torf/_generate.py,CWE-362,High,2025-04-29T13:16:19.000Z,2025-04-29T15:12:21.000Z,Explanation: \nRace conditions occur when mult...
173,174,1745933230563,https://github.com/kgarayev/filesearch,CWE-22,High,2025-04-29T13:27:10.000Z,2025-04-29T15:16:05.000Z,Explanation: \nThe code doesn't sanitize user ...
174,175,1745935258674,damonlynch/showinfilemanager/blob/main/src/sho...,"CWE-78, CWE-88, CWE-426",Critical,2025-04-29T14:00:58.000Z,2025-04-29T15:22:57.000Z,The code contains two dangerous security misco...
175,176,1745935460553,https://github.com/requirejs/requirejs/tree/2.3.6,CWE-1321,Critical,2025-04-29T14:04:20.000Z,2025-04-29T14:53:12.000Z,Prototype-pollution in RequireJS ≤ 2.3.6 lets ...


In [8]:
print(df.dtypes)

Unnamed: 0         int64
Case               int64
ID                 int64
Repository        object
CWE               object
Severity          object
Submitted At      object
Approved At       object
Description       object
language          object
num_files        float64
num_languages      int64
dtype: object


In [11]:
df = pd.read_csv('../data/graded.csv')

In [12]:
df

Unnamed: 0,Case,ID,Repository,CWE,Severity,Description,claude-3.7-sonnet,claude-3.7-sonnet-explanation,claude-3.7-sonnet-remediation,claude-3.7-sonnet-other,...,grok-3-beta-explanation,grok-3-beta-remediation,grok-3-beta-other,grok-3-beta-comment,claude-3.7-sonnet-rating,gemini-2.5-pro-rating,gpt-4.1-rating,gpt-4.5-rating,o3-rating,grok-3-beta-rating
0,1,1745335148884,https://github.com/saleor/saleor/blob/main/sal...,CWE-547,2,Root Cause: The code contains hardcoded sensit...,"{\n ""cwe_id"": ""CWE-540"",\n ""explanation"": ""T...",2,3,2,...,1,3,2,Incorrect CWE classification: should be CWE-79...,7,8,8,6,6,6
1,2,1745336228778,https://github.com/kunzbhatia/Library-Manageme...,CWE-89,4,Explanation:\nVulnerable functions such as del...,"{\n ""cwe_id"": ""CWE-89"",\n ""explanation"": ""Th...",2,5,3,...,2,5,3,This explanation compared to the original subm...,10,10,7,10,10,10
2,3,1745337278378,https://github.com/rio-labs/rio/blob/main/fron...,CWE-79,4,Root Cause: The vulnerability arises because t...,"{\n ""cwe_id"": ""CWE-79"",\n ""explanation"": ""Th...",1,3,2,...,1,3,2,The remediation improves security but lacks au...,6,8,8,8,8,6
3,4,1745344282769,https://github.com/heli-toon/LBSHS-LMS/,CWE-89,4,Summary:\nThe SQL injection vulnerability in t...,"{\n ""cwe_id"": ""CWE-89"",\n ""explanation"": ""Th...",2,5,3,...,2,5,3,The LLM's vulnerability and CWE identification...,10,10,10,10,10,10
4,5,1745351755910,https://github.com/samir-k1/Firebase-Tut,CWE-200,1,1. What is the Firebase API Key?\nIt’s used to...,"{\n ""cwe_id"": ""CWE-798"",\n ""explanation"": ""T...",0,0,0,...,0,0,0,"The LLM identified an incorrect CWE, resulting...",0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,170,1745932579678,https://github.com/rndusr/torf/blob/master/tor...,CWE-362,4,Explanation: \nRace conditions occur when mult...,"{\n ""cwe_id"": ""CWE-22"",\n ""explanation"": ""Th...",2,5,3,...,2,4,3,Patch limits thread count to mitigate DoS risk...,10,4,9,9,9,9
170,171,1745933230563,https://github.com/kgarayev/filesearch,CWE-22,4,Explanation: \nThe code doesn't sanitize user ...,"{\n ""cwe_id"": ""CWE-22"",\n ""explanation"": ""Th...",1,2,2,...,1,4,2,"The patch is solid, but the CWE ID is incorrec...",5,9,4,9,9,7
171,172,1745935258674,https://github.com/damonlynch/showinfilemanage...,"CWE-78, CWE-88, CWE-426",5,The code contains two dangerous security misco...,"{\n ""cwe_id"": ""CWE-78"",\n ""explanation"": ""Th...",2,5,3,...,2,5,3,The patch addresses the vulnerability effectiv...,10,10,9,10,10,10
172,173,1745935460553,https://github.com/requirejs/requirejs/tree/2.3.6,CWE-1321,5,Prototype-pollution in RequireJS ≤ 2.3.6 lets ...,"{\n ""cwe_id"": ""CWE-79"",\n ""explanation"": ""Re...",0,0,0,...,0,0,0,"The LLM identified an incorrect CWE, resulting...",0,10,0,0,10,0


In [16]:
df.columns

Index(['Case', 'ID', 'Repository', 'CWE', 'Severity', 'Description',
       'claude-3.7-sonnet', 'claude-3.7-sonnet-explanation',
       'claude-3.7-sonnet-remediation', 'claude-3.7-sonnet-other',
       'claude-3.7-sonnet-comment', 'gemini-2.5-pro',
       'gemini-2.5-pro-explanation', 'gemini-2.5-pro-remediation',
       'gemini-2.5-pro-other', 'gemini-2.5-pro-comment', 'gpt-4.1',
       'gpt-4.1-explanation', 'gpt-4.1-remediation', 'gpt-4.1-other',
       'gpt-4.1-comment', 'gpt-4.5', 'gpt-4.5-explanation',
       'gpt-4.5-remediation', 'gpt-4.5-other', 'gpt-4.5-comment', 'o3',
       'o3-explanation', 'o3-remediation', 'o3-other', 'o3-comment',
       'grok-3-beta', 'grok-3-beta-explanation', 'grok-3-beta-remediation',
       'grok-3-beta-other', 'grok-3-beta-comment', 'claude-3.7-sonnet-rating',
       'gemini-2.5-pro-rating', 'gpt-4.1-rating', 'gpt-4.5-rating',
       'o3-rating', 'grok-3-beta-rating'],
      dtype='object')

In [14]:
print(df.dtypes)

Case                              int64
ID                                int64
Repository                       object
CWE                              object
Severity                          int64
Description                      object
claude-3.7-sonnet                object
claude-3.7-sonnet-explanation     int64
claude-3.7-sonnet-remediation     int64
claude-3.7-sonnet-other           int64
claude-3.7-sonnet-comment        object
gemini-2.5-pro                   object
gemini-2.5-pro-explanation        int64
gemini-2.5-pro-remediation        int64
gemini-2.5-pro-other              int64
gemini-2.5-pro-comment           object
gpt-4.1                          object
gpt-4.1-explanation               int64
gpt-4.1-remediation               int64
gpt-4.1-other                     int64
gpt-4.1-comment                  object
gpt-4.5                          object
gpt-4.5-explanation               int64
gpt-4.5-remediation               int64
gpt-4.5-other                     int64
