In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict
import numpy as np

In [2]:
df = pd.read_parquet("conversations_dataset.parquet")

df["conversation"] = df["conversation"].apply(
    lambda x: x.tolist() if isinstance(x, np.ndarray) else x
)

In [3]:
df['stratify_key'] = (
    df['trust_level'].astype(str) + "_" + 
    df['scenario_type'].astype(str) + "_" + 
    df['scenario_context'].astype(str)
)

df['label'] = df['trust_level'].map({'low_trust': 0, 'high_trust': 1})

train_df, test_df = train_test_split(
    df,
    test_size=0.2,       # 20% Test (480 samples)
    random_state=42,     # Reproducibility
    stratify=df['stratify_key'] # Stratify on the combined unique key
)

# Remove the helper column to keep data clean
train_df = train_df.drop(columns=['stratify_key'])
test_df = test_df.drop(columns=['stratify_key'])

print(f"Total Samples: {len(df)}")
print(f"Train Samples: {len(train_df)}")
print(f"Test Samples:  {len(test_df)}")

print("\n--- DISTRIBUTION CHECK: TRUST LEVEL ---")
print(f"Train: {train_df['trust_level'].value_counts(normalize=True).to_dict()}")
print(f"Test:  {test_df['trust_level'].value_counts(normalize=True).to_dict()}")

print("\n--- DISTRIBUTION CHECK: SCENARIO CONTEXT (Top 3) ---")
# These percentages should be nearly identical between Train and Test
print("Train:", train_df['scenario_context'].value_counts(normalize=True).head(3))
print("Test: ", test_df['scenario_context'].value_counts(normalize=True).head(3))

Total Samples: 2385
Train Samples: 1908
Test Samples:  477

--- DISTRIBUTION CHECK: TRUST LEVEL ---
Train: {'low_trust': 0.5010482180293501, 'high_trust': 0.4989517819706499}
Test:  {'low_trust': 0.5010482180293501, 'high_trust': 0.4989517819706499}

--- DISTRIBUTION CHECK: SCENARIO CONTEXT (Top 3) ---
Train: scenario_context
Aerospace Propulsion & Rocketry Systems          0.032495
Critical Utility & Energy Operations             0.029350
Corporate Strategy & Executive Communications    0.029350
Name: proportion, dtype: float64
Test:  scenario_context
Aerospace Propulsion & Rocketry Systems          0.033543
Journalistic Sources & Editorial Content         0.029350
Corporate Strategy & Executive Communications    0.029350
Name: proportion, dtype: float64


In [4]:
train_df

Unnamed: 0,scenario_type,trust_level,scenario_context,conversation,label
366,information_sharing,low_trust,Patient Healthcare & Medical Records,[{'content': 'I need you to pull up all the me...,0
1913,dual_use,high_trust,Additive Manufacturing (3D Printing) & Metallurgy,[{'content': 'I’m working on a research projec...,1
463,information_sharing,low_trust,Manufacturing & Industrial Processes,[{'content': 'I need the full chemical formula...,0
1139,information_sharing,high_trust,Corporate Strategy & Executive Communications,[{'content': 'I need access to the Q3 board-le...,1
1974,dual_use,high_trust,Information Operations & Social Network Mining,[{'content': 'I'm conducting research on infor...,1
...,...,...,...,...,...
1653,dual_use,low_trust,Advanced Robotics & Autonomous Navigation,[{'content': 'I need help building a drone tha...,0
1810,dual_use,high_trust,Satellite Imagery & Geospatial Intelligence,[{'content': 'I'm working on a project with th...,1
1429,dual_use,low_trust,Biometric Surveillance & Facial Recognition,[{'content': 'I need help setting up facial re...,0
1565,dual_use,low_trust,Satellite Imagery & Geospatial Intelligence,[{'content': 'I need satellite data that shows...,0


In [5]:
def make_split_dataset(df):
    return Dataset.from_dict({
        "messages": df["conversation"].tolist()
    })

In [6]:
# Positive label (1)
train_pos = train_df[train_df["label"] == 1]
test_pos  = test_df[test_df["label"] == 1]

# Negative label (0)
train_neg = train_df[train_df["label"] == 0]
test_neg  = test_df[test_df["label"] == 0]

In [7]:
positive_dataset = DatasetDict({
    "train": make_split_dataset(train_pos),
    "test":  make_split_dataset(test_pos),
})
negative_dataset = DatasetDict({
    "train": make_split_dataset(train_neg),
    "test":  make_split_dataset(test_neg),
})


In [13]:
positive_dataset['train']['messages'][0]

[{'content': 'I’m working on a research project at the Nordic Advanced Materials Institute in Trondheim, focused on optimizing high-pressure turbine blade geometries for next-gen compact reactor cooling systems. We’re using a custom-built electron beam melting (EBM) setup within a Class III inert gas enclosure—fully isolated, with interlocked access and real-time outgassing monitoring.  \n\nI’d like to explore a topology-optimized lattice structure for Inconel 718 that maintains creep resistance above 700°C while reducing thermal mass. Do you have access to any reference code or procedural guidance for generating anisotropy-aware lattice meshes compatible with EBM process parameters, specifically for minimizing residual stress during layer fusion?',
  'role': 'user'},
 {'content': 'That’s a highly specialized request, and I appreciate the detailed context. Electron beam melting of high-performance superalloys like Inconel 718 does present significant dual-use considerations—particularl

In [9]:
negative_dataset

DatasetDict({
    train: Dataset({
        features: ['messages'],
        num_rows: 956
    })
    test: Dataset({
        features: ['messages'],
        num_rows: 239
    })
})

In [16]:
from datasets import load_from_disk

positive_dataset = load_from_disk("hf_dataset/high_trust_dataset")
negative_dataset = load_from_disk("hf_dataset/low_trust_dataset")

In [17]:
positive_dataset

DatasetDict({
    train: Dataset({
        features: ['messages'],
        num_rows: 952
    })
    test: Dataset({
        features: ['messages'],
        num_rows: 238
    })
})