In [1]:
import pandas as pd
import torch
from sklearn.metrics import classification_report
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Setting up the parameters
maximum_features = 30522  # Maximum number of words to consider as features
maximum_length = 128  # Maximum length of input sequences
word_embedding_dims = 50  # Dimension of word embeddings
no_of_filters = 128  # Number of filters in the convolutional layer
kernel_size = 3  # Size of the convolutional filters
hidden_dim_1 = 128  # Number of neurons in the hidden layer

batch_size = 64  # Batch size for training
epochs = 10  # Number of training epochs
threshold = 0.5  # Threshold for binary classification

DATASET_SIZE = 10_000

df = pd.read_csv("../jigsaw/dataset_text_target.csv")
df_true = df[df.target > 0.5]
df_false = df[df.target <= 0.5]
df = pd.concat([df_true[DATASET_SIZE // 2:DATASET_SIZE], df_false[DATASET_SIZE // 2:DATASET_SIZE]], axis=0)
mapper = lambda x: 1 if x > 0.5 else 0
df.target = df.target.apply(mapper)


In [3]:

x_test = df.comment_text
y_test = df.target

In [4]:
# Tokenize and encode the data using the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

X_test_encoded = tokenizer.batch_encode_plus(
    x_test.tolist(),
    padding='max_length',
    truncation=True,
    max_length=maximum_length,
    add_special_tokens=True,
    return_tensors='pt',  # Return PyTorch tensors
)

# Create PyTorch Datasets
test_dataset = TensorDataset(X_test_encoded['input_ids'], torch.tensor(y_test.values, dtype=torch.float32))

# Create DataLoaders
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [5]:
import coremltools as ct

scikit-learn version 1.6.1 is not supported. Minimum required version: 0.17. Maximum required version: 1.5.1. Disabling scikit-learn conversion API.

A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_global

In [6]:
model = ct.models.MLModel("../conversions/ToxicCNN.mlpackage")

In [25]:
X_test_encoded.to("cpu")

{'input_ids': tensor([[  101,  1997,  2035,  ...,     0,     0,     0],
        [  101,  2000,  7868,  ...,     0,     0,     0],
        [  101,  2007,  1019,  ...,  2065,  8398,   102],
        ...,
        [  101,  2071,  2070,  ...,     0,     0,     0],
        [  101,  2077,  2057,  ...,     0,     0,     0],
        [  101,  2030, 15671,  ...,     0,     0,     0]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 1, 1, 1],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}

In [26]:
results = []
for i, row in enumerate(X_test_encoded["input_ids"]):
    res = model.predict({"input_ids": [row.numpy().astype("float32")]})
    results.append(res["var_72"][0][0] > 0.5)
    print(f"{((i + 1) / len(X_test_encoded['input_ids'])) * 100:.2f}%", end="\r")

100.00%

In [27]:
print(classification_report(results, y_test))

              precision    recall  f1-score   support

       False       0.95      0.89      0.92      5288
        True       0.89      0.94      0.91      4712

    accuracy                           0.92     10000
   macro avg       0.92      0.92      0.92     10000
weighted avg       0.92      0.92      0.92     10000



In [34]:
sentences = x_test.tolist()
predicted = results
truth = y_test.tolist()
size = len(sentences)

mismatches = []
for i in range(size):
    if bool(predicted[i]) == bool(truth[i]):
        continue
    mismatches.append((sentences[i], bool(predicted[i]), bool(truth[i])))

In [35]:
errors = pd.DataFrame(mismatches, columns=['sentence', 'predicted', 'truth'])
errors.to_csv("cnn_coreml_errors.csv", index=False)