In [None]:
"""
This model is used in the "Recipe Book" website.

The purpose of the model is given a string of words, provide a score between 0 and 1 based on how offensive or aggressive it is, with 0 being fine and 1
being entirely aggressive. It will do this by reading not only the words and checking for profanity (which can be handled by a simple profanity checker),
but also by attempting to check for aggressive phrases.

While this model can be handled simply by a word checker, I'm making this model more for practice before moving onto more complicated ones (like the other
model in this website, a recommendation system).

Success Goal: 95% accuracy

Once the success goal has been met, the final model will be implemented into the 
"""

In [2]:
# Imports
import torch
import torch.nn as nn

from datasets import load_dataset

In [35]:
# Setting torch to use GPU acceleration if possible.
device = torch.device("cpu")

if torch.cuda.is_available():
    device = torch.device("cuda")

torch.set_default_device(device)
print(f"Using device: {torch.get_default_device()}")

Using device: cpu


In [3]:
# ====================== DATA COLLECTION ======================

In [16]:
# Loading the datasets (for testing purposes, we'll only take a sample for now).
datasetStream = load_dataset("allenai/real-toxicity-prompts", split="train", streaming=True)
dataset = datasetStream.take(1000)

In [17]:
# Preparing and splitting the dataset.
editedDataset = [] # Text, Toxicity
toxicityThreshold = 1.0

for element in dataset:
    try:
        newElement = [element["prompt"]["text"] + element["continuation"]["text"], 0.0]
    
        for tag in ["toxicity", "profanity", "sexually_explicit", "flirtation", "identity_attack", "threat", "insult", "severe_toxicity"]:
            newElement[1] += (element["prompt"][tag] + element["continuation"][tag]) / 2
    
        if newElement[1] >= toxicityThreshold:
            newElement[1] = 1.0
        else:
            newElement[1] = 0.0
    
        editedDataset.append(newElement)

    except:
        continue

trainSplit = 0.8

trainX, trainY = zip(*editedDataset[:int(len(editedDataset) * trainSplit)])
testX, testY = zip(*editedDataset[int(len(editedDataset) * trainSplit):])

trainX = list(trainX)
trainY = list(trainY)

testX = list(testX)
testY = list(testY)

In [27]:
# Tensor Conversions
class Tokenizer:
    def __init__(self, conversionString = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789[]{}\"!?Â£$%^&*()_+-=~#@':;/,.<>"):
        self.conversionString = conversionString
        self.tokenDimension = len(conversionString) + 1

    def encode(self, inputString):
        output = torch.zeros(len(inputString), 1, self.tokenDimension)

        for i, element in enumerate(inputString):
            index = self.conversionString.index(element)
            if index == -1:
                index = self.tokenDimension - 1

            output[i][0][index] = 1.0

        return output

# We only need to consider characters that are in the training dataset.
characters = ""

for string in trainX:
    for character in string:
        if not character in characters:
            characters += character

In [28]:
# ====================== Model Construction ======================

In [40]:
# Building the model.
class ProfanityCheckerModel(nn.Module):
    def __init__(self, hiddenSize):
        super(ProfanityCheckerModel, self).__init__()

        self.tokenizer = Tokenizer(characters)

        self.rnn = nn.RNN(self.tokenizer.tokenDimension, hiddenSize)
        self.linear = nn.Linear(hiddenSize, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, inputString):
        inputTensor = self.tokenizer.encode(inputString)
        rnnOutput, hidden = self.rnn(inputTensor)
        output = self.linear(hidden[0])
        output = self.sigmoid(output)

        return output

In [49]:
rnn = ProfanityCheckerModel(128)
output = rnn(trainX[0])
print(output)

tensor([[0.5341]], grad_fn=<SigmoidBackward0>)
