In [1]:
import numpy as np
import pandas as pd
import spacy as sp

In [2]:
# load data
train = pd.read_csv('../input/train.csv')
trainX = train['comment_text'].values.tolist()[0:200]

test = pd.read_csv('../input/test.csv')
testX = test['comment_text'].values.tolist()[0:9]    #####

class_names = list(train)[-6:]


# make train set for model
def load(tox_type):
    target = train[tox_type].values.tolist() 
    cats = [{tox_type: bool(y)} for y in target]
    train_data = list(zip(trainX,
                          [{'cats': c} for c in cats]))
    return train_data

In [3]:
n_iter = 4
test_preds = np.zeros((len(class_names), len(testX)))       #######

for i, tox_type in enumerate(class_names):

    train_data = load(tox_type)

    nlp = sp.blank('en')
    clf = nlp.create_pipe('textcat')
    nlp.add_pipe(clf, last=True)
    clf.add_label(tox_type)

    optimizer = nlp.begin_training()
    print("\nClass {}:".format(tox_type))
    print("Training the model...")

    
    for iter in range(n_iter):
        losses = {}
        batches = sp.util.minibatch(train_data, size=sp.util.compounding(4., 16., 1.001))
        for batch in batches:
            texts, annotations = zip(*batch)
            nlp.update(texts, annotations, sgd=optimizer, drop=0.2, losses=losses)
        print("Iteration {} of {} complete.".format(iter+1, n_iter))


    for j, txt in enumerate(testX):
        doc = nlp(txt)
        test_preds[i, j] = np.fromiter(doc.cats.values(), dtype=float)
    
    


Class toxic:
Training the model...
Iteration 1 of 4 complete.
Iteration 2 of 4 complete.
Iteration 3 of 4 complete.
Iteration 4 of 4 complete.

Class severe_toxic:
Training the model...
Iteration 1 of 4 complete.
Iteration 2 of 4 complete.
Iteration 3 of 4 complete.
Iteration 4 of 4 complete.

Class obscene:
Training the model...
Iteration 1 of 4 complete.
Iteration 2 of 4 complete.
Iteration 3 of 4 complete.
Iteration 4 of 4 complete.

Class threat:
Training the model...
Iteration 1 of 4 complete.
Iteration 2 of 4 complete.
Iteration 3 of 4 complete.
Iteration 4 of 4 complete.

Class insult:
Training the model...
Iteration 1 of 4 complete.
Iteration 2 of 4 complete.
Iteration 3 of 4 complete.
Iteration 4 of 4 complete.

Class identity_hate:
Training the model...
Iteration 1 of 4 complete.
Iteration 2 of 4 complete.
Iteration 3 of 4 complete.
Iteration 4 of 4 complete.


In [None]:
print(test_preds)

In [4]:
train_data[0]

("Explanation\nWhy the edits made under my username Hardcore Metallica Fan were reverted? They weren't vandalisms, just closure on some GAs after I voted at New York Dolls FAC. And please don't remove the template from the talk page since I'm retired now.89.205.38.27",
 {'cats': {'identity_hate': False}})