In [83]:
import tensorflow as tf
from transformers import BertTokenizer, TFBertModel
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

In [74]:
df = pd.read_csv('traindata.csv',nrows = 200)
dropped = ['processed', 'offensiveness_score']
rename = {'txt': 'comment', 'isOffensive': 'label'}
df = df.drop(columns=dropped)
df = df.rename(columns=rename)
df.head()

Unnamed: 0,comment,label
0,> The difference in average earnings between m...,0
1,"The myth is that the ""gap"" is entirely based o...",0
2,The assertion is that women get paid less for ...,0
3,You said in the OP that's not what they're mea...,0
4,>Men and women are not payed less for the same...,0


In [3]:
comments = df['comment'].tolist()
labels = df['label'].tolist()

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
encoded_inputs = tokenizer(comments, padding=True, truncation=True, return_tensors='tf')

In [5]:
labels = tf.convert_to_tensor(labels)

In [6]:
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [7]:
input_ids = encoded_inputs['input_ids']
attention_mask = encoded_inputs['attention_mask']

In [8]:
outputs = bert_model(input_ids, attention_mask=attention_mask)
pooled_output = outputs['pooler_output']


In [9]:
discriminator = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [10]:
discriminator.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                      loss=tf.keras.losses.BinaryCrossentropy(),
                      metrics=['accuracy'])

In [12]:
discriminator.fit(pooled_output, labels, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1d635c700d0>

In [26]:
generator = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(768, activation='linear')
])


In [38]:
gan_input = tf.keras.Input(shape=(768,))
generated_output = generator(gan_input)
gan_output = discriminator(generated_output)

gan = tf.keras.Model(gan_input, gan_output)

In [40]:
discriminator.trainable = False

# Compile and train the GAN
gan.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
            loss=tf.keras.losses.BinaryCrossentropy(),
            metrics=['accuracy'])

In [41]:
gan.fit(pooled_output, labels, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1d6368e3a10>

In [75]:
dftest = pd.read_csv('traintest.csv',nrows = 20)
droppedt = ['severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
# rename = {'txt': 'comment', 'isOffensive': 'label'}
dftest = dftest.drop(columns=droppedt)
# df = df.rename(columns=rename)
dftest

Unnamed: 0,id,comment_text,toxic
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0
5,00025465d4725e87,"""\n\nCongratulations from me as well, use the ...",0
6,0002bcb3da6cb337,COCKSUCKER BEFORE YOU PISS AROUND ON MY WORK,1
7,00031b1e95af7921,Your vandalism to the Matt Shirvington article...,0
8,00037261f536c51d,Sorry if the word 'nonsense' was offensive to ...,0
9,00040093b2687caa,alignment on this subject and which are contra...,0


In [76]:
test_comments = dftest['comment_text'].tolist()
test_labels = dftest['toxic'].tolist()

In [77]:
encoded_test_inputs = tokenizer(test_comments, padding=True, truncation=True, return_tensors='tf')
test_input_ids = encoded_test_inputs['input_ids']
test_attention_mask = encoded_test_inputs['attention_mask']


In [78]:
test_outputs = bert_model(test_input_ids, attention_mask=test_attention_mask)
test_pooled_output = test_outputs['pooler_output']

In [81]:
discriminator_predictions = discriminator.predict(test_pooled_output)
discriminator_predictions = np.round(discriminator_predictions).flatten()



In [84]:
discriminator_accuracy = accuracy_score(test_labels, discriminator_predictions)
discriminator_precision = precision_score(test_labels, discriminator_predictions)
discriminator_recall = recall_score(test_labels, discriminator_predictions)
discriminator_f1 = f1_score(test_labels, discriminator_predictions)

In [85]:
print("Discriminator Metrics:")
print("Accuracy:", discriminator_accuracy)
print("Precision:", discriminator_precision)
print("Recall:", discriminator_recall)
print("F1-Score:", discriminator_f1)

Discriminator Metrics:
Accuracy: 0.65
Precision: 0.16666666666666666
Recall: 0.3333333333333333
F1-Score: 0.2222222222222222


In [86]:
toxic_comments = ["I hate you.", "I like donuts."]
toxic_encoded_inputs = tokenizer(toxic_comments, padding=True, truncation=True, return_tensors='tf')
toxic_input_ids = toxic_encoded_inputs['input_ids']
toxic_attention_mask = toxic_encoded_inputs['attention_mask']


In [87]:
toxic_outputs = bert_model(toxic_input_ids, attention_mask=toxic_attention_mask)
toxic_pooled_output = toxic_outputs['pooler_output']


In [88]:
non_toxic_output = generator(toxic_pooled_output)

In [92]:
non_toxic_comments = tokenizer.batch_decode(non_toxic_output.numpy(), skip_special_tokens=True)

In [93]:
for comment in non_toxic_comments:
    print(comment)

[unused0] [unused0] [UNK] [unused0] [unused0] [UNK] [unused0] [UNK] [UNK] [unused0] [UNK] [unused0] [unused0] [UNK] [unused0] [UNK] [UNK] [unused0] [UNK] [unused0] [UNK] [unused0] [UNK] [UNK] [unused0] [UNK] [unused0] [unused0] [unused0] [unused0] [UNK] [UNK] [unused0] [UNK] [unused0] [unused0] [UNK] [unused0] [unused0] [UNK] [unused0] [UNK] [UNK] [UNK] [UNK] [UNK] [unused0] [unused0] [unused0] [UNK] [unused0] [UNK] [UNK] [UNK] [unused0] [UNK] [UNK] [unused0] [unused0] [unused0] [UNK] [UNK] [UNK] [UNK] [unused0] [unused0] [unused0] [UNK] [unused0] [UNK] [UNK] [UNK] [UNK] [unused0] [unused0] [unused0] [unused0] [unused0] [unused0] [UNK] [UNK] [UNK] [UNK] [UNK] [unused0] [unused0] [unused0] [UNK] [unused0] [unused0] [UNK] [unused0] [unused0] [UNK] [UNK] [unused0] [unused0] [unused0]
[UNK] [unused0] [UNK] [unused0] [UNK] [UNK] [unused0] [UNK] [UNK] [UNK] [UNK] [unused0] [unused0] [unused0] [UNK] [UNK] [UNK] [unused0] [unused0] [UNK] [unused0] [UNK] [unused0] [unused0] [unused0] [UNK] [UNK