In [2]:
pip install transformers datasets scikit-learn

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.w

In [3]:
!pip install datasets



**TASK 1: Zero-Shot**
(Perform initial tests with untrained model)

In [3]:
# test set: Cohere_55K

import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "offline"

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

# 1. Using HuggingFace pre-training model
model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2. load dataset
df = pd.read_csv("final_filtered_balanced_55000.csv")
assert "text" in df.columns and "label" in df.columns

# 3. transfer to HuggingFace Dataset
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 4. Output evaluation indicators
print("\n📊 Zero-shot Evaluation results (untrained model)：")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

df["predicted_label"] = np.argmax(result.predictions, axis=1)
df["prob_0"] = result.predictions[:, 0]
df["prob_1"] = result.predictions[:, 1]
df.to_csv("initial_test_predictions.csv", index=False)
print("✅ result has saved as initial_test_predictions.csv")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/55000 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



📊 Zero-shot Evaluation results (untrained model)：
test_loss: 0.6952
test_model_preparation_time: 0.0016
test_accuracy: 0.5000
test_f1: 0.6667
test_precision: 0.5000
test_recall: 1.0000
test_roc_auc: 0.6320
test_runtime: 191.1560
test_samples_per_second: 287.7230
test_steps_per_second: 35.9650
✅ result has saved as initial_test_predictions.csv


In [11]:
# test set: merged_dataset (ZC real dataset)

import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "offline"

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

# 1. Using HuggingFace pre-training model
model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2. load dataset
df = pd.read_csv("merged_dataset.csv")
assert "text" in df.columns and "label" in df.columns

# 3. transfer to HuggingFace Dataset
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 4. Output evaluation indicators
print("\n📊 Zero-shot Evaluation results on merged_dataset.csv (untrained model)：")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

df["predicted_label"] = np.argmax(result.predictions, axis=1)
df["prob_0"] = result.predictions[:, 0]
df["prob_1"] = result.predictions[:, 1]
df.to_csv("initial_test_predictions on merged_dataset.csv", index=False)
print("✅ result has saved as initial_test_predictions on merged_dataset.csv")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8589 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



📊 Zero-shot Evaluation results on merged_dataset.csv (untrained model)：
test_loss: 0.6888
test_model_preparation_time: 0.0026
test_accuracy: 0.5670
test_f1: 0.6592
test_precision: 0.6501
test_recall: 0.6686
test_roc_auc: 0.6311
test_runtime: 30.5223
test_samples_per_second: 281.4010
test_steps_per_second: 35.1870
✅ result has saved as initial_test_predictions on merged_dataset.csv


In [12]:
# test set: real_life_data2 (XLF real dataset)

import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "offline"

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

# 1. Using HuggingFace pre-training model
model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2. load dataset
df = pd.read_csv("real_life_data2.csv")
assert "text" in df.columns and "label" in df.columns

# 3. transfer to HuggingFace Dataset
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 4. Output evaluation indicators
print("\n📊 Zero-shot Evaluation results on real_life_data2.csv (untrained model)：")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

df["predicted_label"] = np.argmax(result.predictions, axis=1)
df["prob_0"] = result.predictions[:, 0]
df["prob_1"] = result.predictions[:, 1]
df.to_csv("initial_test_predictions on real_life_data2.csv", index=False)
print("✅ result has saved as initial_test_predictions on real_life_data2.csv")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/112544 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



📊 Zero-shot Evaluation results on real_life_data2.csv (untrained model)：
test_loss: 0.6923
test_model_preparation_time: 0.0016
test_accuracy: 0.5088
test_f1: 0.6107
test_precision: 0.6786
test_recall: 0.5552
test_roc_auc: 0.4448
test_runtime: 397.8526
test_samples_per_second: 282.8790
test_steps_per_second: 35.3600
✅ result has saved as initial_test_predictions on real_life_data2.csv


In [13]:
# test set: filtered_logicality_dataset_1 (Kya real dataset)

import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "offline"

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

# 1. Using HuggingFace pre-training model
model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2. load dataset
df = pd.read_csv("filtered_logicality_dataset_1.csv")
assert "text" in df.columns and "label" in df.columns

# 3. transfer to HuggingFace Dataset
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 4. Output evaluation indicators
print("\n📊 Zero-shot Evaluation results on filtered_logicality_dataset_1.csv (untrained model)：")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

df["predicted_label"] = np.argmax(result.predictions, axis=1)
df["prob_0"] = result.predictions[:, 0]
df["prob_1"] = result.predictions[:, 1]
df.to_csv("initial_test_predictions on filtered_logicality_dataset_1.csv", index=False)
print("✅ result has saved as initial_test_predictions on filtered_logicality_dataset_1.csv")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/6781 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



📊 Zero-shot Evaluation results on filtered_logicality_dataset_1.csv (untrained model)：
test_loss: 0.6937
test_model_preparation_time: 0.0015
test_accuracy: 0.4918
test_f1: 0.5646
test_precision: 0.4914
test_recall: 0.6633
test_roc_auc: 0.5290
test_runtime: 25.2855
test_samples_per_second: 268.1780
test_steps_per_second: 33.5370
✅ result has saved as initial_test_predictions on filtered_logicality_dataset_1.csv


In [None]:
# test set: GPT_general_dataset_16.5k

import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "offline"

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

# 1. Using HuggingFace pre-training model
model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2. load dataset
df = pd.read_csv("general_16.5k.csv")
assert "text" in df.columns and "label" in df.columns

# 3. transfer to HuggingFace Dataset
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 4. Output evaluation indicators
print("\n📊 Zero-shot Evaluation results on general_16.5k.csv(untrained model)：")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

df["predicted_label"] = np.argmax(result.predictions, axis=1)
df["prob_0"] = result.predictions[:, 0]
df["prob_1"] = result.predictions[:, 1]
df.to_csv("initial_test_predictions on general_16.5k.csv", index=False)
print("✅ result has saved as initial_test_predictions on general_16.5k.csv")

In [None]:
# test set: GPT_adversarial_dataset_4950

import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "offline"

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

# 1. Using HuggingFace pre-training model
model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 2. load dataset
df = pd.read_csv("logic_adversarial_4950_labeled.csv")
assert "text" in df.columns and "label" in df.columns

# 3. transfer to HuggingFace Dataset
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 4. Output evaluation indicators
print("\n📊 Zero-shot Evaluation results on logic_adversarial_4950_labeled.csv(untrained model)：")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

df["predicted_label"] = np.argmax(result.predictions, axis=1)
df["prob_0"] = result.predictions[:, 0]
df["prob_1"] = result.predictions[:, 1]
df.to_csv("initial_test_predictions on logic_adversarial_4950_labeled.csv", index=False)
print("✅ result has saved as initial_test_predictions on logic_adversarial_4950_labeled.csv")

**TASK 2:**
Train: Cohere_Train_55K only to TEST: other datasets

In [4]:
# test set: Cohere_test_55K

import os
import pandas as pd
import numpy as np
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback
)
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score
)

os.environ["WANDB_DISABLED"] = "true"

# 1. load training set and test set
train_df = pd.read_csv("Cohere_train_55K.csv")
test_df = pd.read_csv("Cohere_test_55K.csv")

train_df["label"] = train_df["label"].astype(int)
test_df["label"] = test_df["label"].astype(int)

# 2. validation set
train_df, val_df = train_test_split(train_df, test_size=0.1, stratify=train_df["label"], random_state=42)

# 3. transfer HuggingFace Dataset format
train_dataset = Dataset.from_pandas(train_df.reset_index(drop=True))
val_dataset = Dataset.from_pandas(val_df.reset_index(drop=True))
test_dataset = Dataset.from_pandas(test_df.reset_index(drop=True))

model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

# 4. Segmentation of three data sets
train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

for d in [train_dataset, val_dataset, test_dataset]:
    d.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# 5. 加load distilbert model
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# 6. metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

# 7. parameters setting
training_args = TrainingArguments(
    output_dir="./distilbert_cohere_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    logging_dir="./logs",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# 8. training
trainer.train()

# 9. Evaluate on the test set
print("\n📊 Test set evaluation results：")
test_result = trainer.predict(test_dataset)
for k, v in test_result.metrics.items():
    print(f"{k}: {v:.4f}")

# 10. save final model
trainer.save_model("distilbert_cohere_model")


Map:   0%|          | 0/34650 [00:00<?, ? examples/s]

Map:   0%|          | 0/3850 [00:00<?, ? examples/s]

Map:   0%|          | 0/16500 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.0286,0.02045,0.994805,0.994811,0.994811,0.994811,0.999829
2,0.0119,0.018302,0.996883,0.996886,0.996886,0.996886,0.999924
3,0.004,0.017601,0.997143,0.997147,0.996888,0.997405,0.999974
4,0.0013,0.013956,0.998442,0.998444,0.997926,0.998962,0.999973
5,0.0001,0.014772,0.998182,0.998184,0.997925,0.998443,0.99997



📊 Test set evaluation results：


test_loss: 0.0191
test_accuracy: 0.9972
test_f1: 0.9972
test_precision: 0.9959
test_recall: 0.9985
test_roc_auc: 0.9999
test_runtime: 54.6539
test_samples_per_second: 301.9000
test_steps_per_second: 9.4410


In [5]:
tokenizer.save_pretrained("distilbert_cohere_model")

('distilbert_cohere_model/tokenizer_config.json',
 'distilbert_cohere_model/special_tokens_map.json',
 'distilbert_cohere_model/vocab.txt',
 'distilbert_cohere_model/added_tokens.json',
 'distilbert_cohere_model/tokenizer.json')

In [7]:
# test set: GPT_general_16.5k

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from datasets import Dataset
import pandas as pd
import numpy as np
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score,
    recall_score, roc_auc_score
)

# 1. load trained model
model_path = "./distilbert_cohere_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# 2. load new test set
df = pd.read_csv("general_16.5k.csv")
assert "label" in df.columns,  "dataset must contains 'label' column"

# 3. transfer Dataset format
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# 5. metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 7. print metrics
print("\n📊 Evaluation results of the model on general_16.5k:")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

# 8. save result
df["predicted_label"] = np.argmax(result.predictions, axis=1)
df.to_csv("general_16.5k_with_predictions.csv", index=False)
print("✅ Prediction results saved as general_16.5k_with_predictions.csv")


Map:   0%|          | 0/16500 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



📊 Evaluation results of the model on general_16.5k:
test_loss: 4.3769
test_model_preparation_time: 0.0016
test_accuracy: 0.5407
test_f1: 0.2382
test_precision: 0.6975
test_recall: 0.1436
test_roc_auc: 0.6500
test_runtime: 59.3974
test_samples_per_second: 277.7900
test_steps_per_second: 34.7320
✅ Prediction results saved as general_16.5k_with_predictions.csv


In [9]:
# test set: merged_dataset (ZC real dataset)

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from datasets import Dataset
import pandas as pd
import numpy as np
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score,
    recall_score, roc_auc_score
)

# 1. load trained model
model_path = "./distilbert_cohere_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# 2. load new test set
df = pd.read_csv("merged_dataset.csv")
assert "label" in df.columns,  "dataset must contains 'label' column"

# 3. transfer Dataset format
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# 5. metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 7. print metrics
print("\n📊 Evaluation results of the model on merged_dataset.csv:")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

# 8. save result
df["predicted_label"] = np.argmax(result.predictions, axis=1)
df.to_csv("gmerged_dataset_with_predictions.csv", index=False)
print("✅ Prediction results saved as merged_dataset_with_predictions.csv")


Map:   0%|          | 0/8589 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



📊 Evaluation results of the model on merged_dataset.csv:
test_loss: 5.9157
test_model_preparation_time: 0.0015
test_accuracy: 0.3892
test_f1: 0.2080
test_precision: 0.5539
test_recall: 0.1281
test_roc_auc: 0.4589
test_runtime: 30.5180
test_samples_per_second: 281.4400
test_steps_per_second: 35.1920
✅ Prediction results saved as merged_dataset_with_predictions.csv


In [10]:
# test set: real_life_data2 (XLF real dataset)

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from datasets import Dataset
import pandas as pd
import numpy as np
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score,
    recall_score, roc_auc_score
)

# 1. load trained model
model_path = "./distilbert_cohere_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# 2. load new test set
df = pd.read_csv("real_life_data2.csv")
assert "label" in df.columns,  "dataset must contains 'label' column"

# 3. transfer Dataset format
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# 5. metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 7. print metrics
print("\n📊 Evaluation results of the model on real_life_data2.csv:")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

# 8. save result
df["predicted_label"] = np.argmax(result.predictions, axis=1)
df.to_csv("real_life_data2_with_predictions.csv", index=False)
print("✅ Prediction results saved as real_life_data2_with_predictions.csv")


Map:   0%|          | 0/112544 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



📊 Evaluation results of the model on real_life_data2.csv:
test_loss: 4.9841
test_model_preparation_time: 0.0015
test_accuracy: 0.3820
test_f1: 0.2668
test_precision: 0.7554
test_recall: 0.1620
test_roc_auc: 0.5870
test_runtime: 398.0767
test_samples_per_second: 282.7190
test_steps_per_second: 35.3400
✅ Prediction results saved as real_life_data2_with_predictions.csv


In [14]:
# test set: filtered_logicality_dataset_1 (Kya real dataset)

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from datasets import Dataset
import pandas as pd
import numpy as np
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score,
    recall_score, roc_auc_score
)

# 1. load trained model
model_path = "./distilbert_cohere_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# 2. load new test set
df = pd.read_csv("filtered_logicality_dataset_1.csv")
assert "label" in df.columns,  "dataset must contains 'label' column"

# 3. transfer Dataset format
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# 5. metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 7. print metrics
print("\n📊 Evaluation results of the model on filtered_logicality_dataset_1.csv:")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

# 8. save result
df["predicted_label"] = np.argmax(result.predictions, axis=1)
df.to_csv("filtered_logicality_dataset_1_with_predictions.csv", index=False)
print("✅ Prediction results saved as filtered_logicality_dataset_1_with_predictions.csv")


Map:   0%|          | 0/6781 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



📊 Evaluation results of the model on filtered_logicality_dataset_1.csv:
test_loss: 5.6074
test_model_preparation_time: 0.0014
test_accuracy: 0.5046
test_f1: 0.0135
test_precision: 0.6216
test_recall: 0.0068
test_roc_auc: 0.4923
test_runtime: 24.2772
test_samples_per_second: 279.3150
test_steps_per_second: 34.9300
✅ Prediction results saved as filtered_logicality_dataset_1_with_predictions.csv


**TASK 3:**
 Train: cohere + ChatGPT general dataset to TEST: other datasets

In [None]:
# test set: GPT_general_test_16.5K

import os
import pandas as pd
import numpy as np
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback
)
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score
)

os.environ["WANDB_DISABLED"] = "true"

# 1. load training set and test set
train_df = pd.read_csv("Cohere+GPT_train.csv")
test_df = pd.read_csv("general_test_16.5K.csv")

train_df["label"] = train_df["label"].astype(int)
test_df["label"] = test_df["label"].astype(int)

# 2. validation set
train_df, val_df = train_test_split(train_df, test_size=0.1, stratify=train_df["label"], random_state=42)

# 3. transfer Dataset format
train_dataset = Dataset.from_pandas(train_df.reset_index(drop=True))
val_dataset = Dataset.from_pandas(val_df.reset_index(drop=True))
test_dataset = Dataset.from_pandas(test_df.reset_index(drop=True))

model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

for d in [train_dataset, val_dataset, test_dataset]:
    d.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# 4. load model
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# 5. metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

# 6. set parameters
training_args = TrainingArguments(
    output_dir="./distilbert_cohere_GPT_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    logging_dir="./logs",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# 8. training
trainer.train()

# 9. Evaluate on the test set
print("\n📊 Test set evaluation results：")
test_result = trainer.predict(test_dataset)
for k, v in test_result.metrics.items():
    print(f"{k}: {v:.4f}")

# 14. save final model
trainer.save_model("distilbert_cohere_GPT_model")
tokenizer.save_pretrained("distilbert_cohere_GPT_model")

Map:   0%|          | 0/45045 [00:00<?, ? examples/s]

Map:   0%|          | 0/5005 [00:00<?, ? examples/s]

Map:   0%|          | 0/4950 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,0.0123,0.021073,0.996004,0.996018,0.992854,0.999201,0.999928
2,0.0073,0.006886,0.998601,0.998602,0.998403,0.998801,0.999994
3,0.0047,0.005736,0.998402,0.998403,0.998004,0.998801,0.999972
4,0.0021,0.007742,0.998801,0.998802,0.998403,0.999201,0.999911
5,0.0,0.008767,0.998801,0.998802,0.998403,0.999201,0.999874



📊 测试集评估结果：


test_loss: 0.0020
test_accuracy: 0.9998
test_f1: 0.9998
test_precision: 0.9996
test_recall: 1.0000
test_roc_auc: 0.9999
test_runtime: 16.2064
test_samples_per_second: 305.4350
test_steps_per_second: 9.5640


('distilbert_cohere_GPT_model/tokenizer_config.json',
 'distilbert_cohere_GPT_model/special_tokens_map.json',
 'distilbert_cohere_GPT_model/vocab.txt',
 'distilbert_cohere_GPT_model/added_tokens.json',
 'distilbert_cohere_GPT_model/tokenizer.json')

In [None]:
# test set: GPT_adversarial dataset_4950

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

model_path = "./distilbert_cohere_GPT_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# load test data
df = pd.read_csv("logic_adversarial_4950_labeled.csv")

assert "label" in df.columns, "dataset must contains 'label' column"

dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# print metrics
print("\n📊 Evaluation results of the model on logic_adversarial_4950_labeled.csv：")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

# save result
df["predicted_label"] = np.argmax(result.predictions, axis=1)
df["prob_0"] = result.predictions[:, 0]
df["prob_1"] = result.predictions[:, 1]
df.to_csv("logic_adversarial_with_predictions.csv", index=False)
print("✅ Prediction results saved as logic_adversarial_with_predictions.csv")


Map:   0%|          | 0/4950 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



📊 模型在 logic_adversarial_4950_labeled.csv 上的评估结果：
test_loss: 2.1453
test_model_preparation_time: 0.0014
test_accuracy: 0.6489
test_f1: 0.6195
test_precision: 0.6761
test_recall: 0.5717
test_roc_auc: 0.7297
test_runtime: 16.5221
test_samples_per_second: 299.5990
test_steps_per_second: 37.4650
✅ 预测结果已保存为 logic_adversarial_with_predictions.csv


In [None]:
# test set: Cohere_test_55K

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

model_path = "./distilbert_cohere_GPT_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# load test data
df = pd.read_csv("Cohere_test_55K.csv")

assert "label" in df.columns, "dataset must contains 'label' column"

dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# print metrics
print("\n📊 Evaluation results of the model on Cohere_test_55K.csv：")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

# save result
df["predicted_label"] = np.argmax(result.predictions, axis=1)
df["prob_0"] = result.predictions[:, 0]
df["prob_1"] = result.predictions[:, 1]
df.to_csv("Cohere_test_55K_with_predictions.csv", index=False)
print("✅ Prediction results saved as Cohere_test_55K_with_predictions.csv")


In [None]:
# test set: merged_dataset (ZC real dataset)

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

model_path = "./distilbert_cohere_GPT_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# load test data
df = pd.read_csv("merged_dataset.csv")

assert "label" in df.columns, "dataset must contains 'label' column"

dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# print metrics
print("\n📊 Evaluation results of the model on merged_dataset.csv：")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

# save result
df["predicted_label"] = np.argmax(result.predictions, axis=1)
df["prob_0"] = result.predictions[:, 0]
df["prob_1"] = result.predictions[:, 1]
df.to_csv("merged_dataset_with_predictions.csv", index=False)
print("✅ Prediction results saved as merged_dataset_with_predictions.csv")


In [None]:
# test set: real_life_data2 (XLF real dataset)

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

model_path = "./distilbert_cohere_GPT_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# load test data
df = pd.read_csv("real_life_data2.csv")

assert "label" in df.columns, "dataset must contains 'label' column"

dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# print metrics
print("\n📊 Evaluation results of the model on real_life_data2.csv：")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

# save result
df["predicted_label"] = np.argmax(result.predictions, axis=1)
df["prob_0"] = result.predictions[:, 0]
df["prob_1"] = result.predictions[:, 1]
df.to_csv("real_life_data2_with_predictions.csv", index=False)
print("✅ Prediction results saved as Creal_life_data2_with_predictions.csv")


In [None]:
# test set: filtered_logicality_dataset_1 (Kya real dataset)

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

model_path = "./distilbert_cohere_GPT_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# load test data
df = pd.read_csv("filtered_logicality_dataset_1.csv")

assert "label" in df.columns, "dataset must contains 'label' column"

dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# print metrics
print("\n📊 Evaluation results of the model on filtered_logicality_dataset_1.csv：")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

# save result
df["predicted_label"] = np.argmax(result.predictions, axis=1)
df["prob_0"] = result.predictions[:, 0]
df["prob_1"] = result.predictions[:, 1]
df.to_csv("filtered_logicality_dataset_1_with_predictions.csv", index=False)
print("✅ Prediction results saved as filtered_logicality_dataset_1_with_predictions.csv")


**TASK4:**
Train: ChatGPT dataset related to real data to TEST: real dataset

In [4]:
# test set: merged_dataset (ZC real dataset)

import os
import pandas as pd
import numpy as np
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback
)
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score
)

os.environ["WANDB_DISABLED"] = "true"

# 1. load training set and test set
train_df = pd.read_csv("synthetic_dataset_200.csv")
test_df = pd.read_csv("merged_dataset.csv")

train_df["label"] = train_df["label"].astype(int)
test_df["label"] = test_df["label"].astype(int)

# 2. validation set
train_df, val_df = train_test_split(train_df, test_size=0.1, stratify=train_df["label"], random_state=42)

# 3. transfer HuggingFace Dataset format
train_dataset = Dataset.from_pandas(train_df.reset_index(drop=True))
val_dataset = Dataset.from_pandas(val_df.reset_index(drop=True))
test_dataset = Dataset.from_pandas(test_df.reset_index(drop=True))

model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

# 4. Segmentation of three data sets
train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

for d in [train_dataset, val_dataset, test_dataset]:
    d.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# 5. 加load distilbert model
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# 6. metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

# 7. parameters setting
training_args = TrainingArguments(
    output_dir="./distilbert_cohere_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    logging_steps=1,
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    logging_dir="./logs",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# 8. training
trainer.train()

# 9. Evaluate on the test set
print("\n📊 Test set evaluation results：")
test_result = trainer.predict(test_dataset)
for k, v in test_result.metrics.items():
    print(f"{k}: {v:.4f}")

# 10. save final model
trainer.save_model("distilbert_GPTreal_model")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/180 [00:00<?, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Map:   0%|          | 0/8589 [00:00<?, ? examples/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Roc Auc
1,No log,0.502479,0.9,0.888889,1.0,0.8,1.0
2,No log,0.313246,1.0,1.0,1.0,1.0,1.0
3,No log,0.188169,1.0,1.0,1.0,1.0,1.0
4,No log,0.121753,1.0,1.0,1.0,1.0,1.0
5,No log,0.104945,1.0,1.0,1.0,1.0,1.0



📊 Test set evaluation results：


test_loss: 0.1708
test_accuracy: 0.9644
test_f1: 0.9714
test_precision: 0.9753
test_recall: 0.9677
test_roc_auc: 0.9928
test_runtime: 1797.5821
test_samples_per_second: 4.7780
test_steps_per_second: 0.1500


In [6]:
tokenizer.save_pretrained("distilbert_GPTreal_model")

('distilbert_GPTreal_model/tokenizer_config.json',
 'distilbert_GPTreal_model/special_tokens_map.json',
 'distilbert_GPTreal_model/vocab.txt',
 'distilbert_GPTreal_model/added_tokens.json',
 'distilbert_GPTreal_model/tokenizer.json')

In [7]:
# test set: filtered_logicality_dataset_1 (Kya real dataset)

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from datasets import Dataset
import pandas as pd
import numpy as np
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score,
    recall_score, roc_auc_score
)

# 1. load trained model
model_path = "./distilbert_GPTreal_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# 2. load new test set
df = pd.read_csv("filtered_logicality_dataset_1.csv")
assert "label" in df.columns,  "dataset must contains 'label' column"

# 3. transfer Dataset format
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# 5. metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 7. print metrics
print("\n📊 Evaluation results of the model on general_16.5k:")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

# 8. save result
df["predicted_label"] = np.argmax(result.predictions, axis=1)
df.to_csv("general_16.5k_with_predictions.csv", index=False)
print("✅ Prediction results saved as general_16.5k_with_predictions.csv")


Map:   0%|          | 0/6781 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



📊 Evaluation results of the model on general_16.5k:
test_loss: 1.1261
test_model_preparation_time: 0.0149
test_accuracy: 0.5039
test_f1: 0.0215
test_precision: 0.5286
test_recall: 0.0110
test_roc_auc: 0.4682
test_runtime: 1449.4960
test_samples_per_second: 4.6780
test_steps_per_second: 0.5850
✅ Prediction results saved as general_16.5k_with_predictions.csv


In [None]:
# test set: real_life_data2 (XLF real dataset)

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from datasets import Dataset
import pandas as pd
import numpy as np
from sklearn.metrics import (
    accuracy_score, f1_score, precision_score,
    recall_score, roc_auc_score
)

# 1. load trained model
model_path = "./distilbert_GPTreal_model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# 2. load new test set
df = pd.read_csv("real_life_data2.csv")
assert "label" in df.columns,  "dataset must contains 'label' column"

# 3. transfer Dataset format
dataset = Dataset.from_pandas(df.reset_index(drop=True))

def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# 5. metrics function
def compute_metrics(pred):
    labels = pred.label_ids
    probs = pred.predictions
    preds = np.argmax(probs, axis=1)
    try:
        auc = roc_auc_score(labels, probs[:, 1])
    except:
        auc = float("nan")
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds),
        "roc_auc": auc
    }

trainer = Trainer(model=model, compute_metrics=compute_metrics)
result = trainer.predict(dataset)

# 7. print metrics
print("\n📊 Evaluation results of the model on real_life_data2:")
for k, v in result.metrics.items():
    print(f"{k}: {v:.4f}")

# 8. save result
df["predicted_label"] = np.argmax(result.predictions, axis=1)
df.to_csv("real_life_data2_with_predictions.csv", index=False)
print("✅ Prediction results saved as real_life_data2_with_predictions.csv")
