In [5]:
# import spacy
import numpy as np
import json
from nlp_pipelines import nltk_POS_lemmatizer
import random
import nltk
import pickle
from sklearn.svm import LinearSVC

In [2]:
# Preprocessing intents data
intents = json.loads(open('intents.json').read())

words = []  # all possible words in intents vocabulary
classes = []  # tags
docs = []  # pairs of tokenized word patterns and corresponding classes

for intent in intents['intents']:
    for pattern in intent['patterns']:
        # Remove punctuation
        pattern = pattern.replace('?', '').replace('!', '').replace(',', '').replace('.', '')
        tokens = nltk.word_tokenize(pattern)
        words.extend(nltk_POS_lemmatizer(pattern))
        docs.append((tokens, intent['tag']))
        
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# Sort tag names list
classes = sorted(classes)

# Sort and remove duplicate words
words = sorted(list(set(words)))

# Prepare training dataset
training_data = []
output_empty = [0] * len(classes)

for doc in docs:
    bag = []
    
    # Apply chosen pipeline to word_pattern in doc
    word_pattern = nltk_POS_lemmatizer(' '.join(doc[0]))
    
    # Populate the bag using one-hot encoding method
    for word in words:
        bag.append(1) if word in word_pattern else bag.append(0)

    # Add (bag, tag) pair to total training set
    training_data.append([bag, doc[1]])


random.shuffle(training_data)
training_data = np.array(training_data, dtype=object)

train_x = list(training_data[:, 0])
train_y = list(training_data[:, 1])
print(train_x[0])
print(train_y[0])

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
about yourself


In [4]:
svc = LinearSVC(dual=False, max_iter=120)
svc.fit(train_x, train_y)

Accuracy: 83.333%


In [6]:
pickle.dump(words, open('svm_words.pkl', 'wb'))
pickle.dump(classes, open('svm_classes.pkl', 'wb'))
pickle.dump(svc, open('svm_model.pkl', 'wb'))