In [1]:
import numpy as np
import tensorflow as tf
import nltk
from nltk.corpus import stopwords
from collections import defaultdict
import random

from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

In [2]:
ml_classes = ['location', 'professor', 'time']
store = defaultdict(list)
for ml_class in ml_classes:
    with open("{}.txt".format(ml_class), encoding="utf-8") as file:
        lines = file.read().splitlines()
        for line in lines:
            store[ml_class].append(line)

stop_words = set(stopwords.words('english'))


all_words = []
sentences = []

def preprocess(sentence):
    tokens = nltk.word_tokenize(sentence) # tokenize
    tokens = [t.lower() for t in tokens]
    # tokens_filtered = filter(lambda token: token not in stop_words, tokens)
    tokens_filtered = tokens
    stemmed = [stemmer.stem(t) for t in tokens_filtered]
    lemmatized = [lemmatizer.lemmatize(t) for t in stemmed]
    return lemmatized


for ml_class, lines in store.items():
    for line in lines:
        processed_tokens = preprocess(line)
        all_words.extend(processed_tokens)
        sentences.append((processed_tokens, ml_class))

In [3]:
all_uniq_words = list(set(all_words))
uniq_ml_classes = list(set(ml_classes))

def vectorize(token_set, all_uniq_words):
    return list(map(lambda x: 1 if x in token_set else 0, all_uniq_words))

training = []

for tokens, ml_class in sentences:
    token_set = set(tokens)
    x = vectorize(token_set, all_uniq_words)
    y = uniq_ml_classes.index(ml_class)
    training.append([np.array(x), np.array([y])])

random.shuffle(training)
train_set = np.asarray(training)

train_X = train_set[:, 0]
train_X = np.vstack(train_X)
print(train_X.shape)

train_Y = train_set[:, 1]
train_Y = np.vstack(train_Y)
print(train_Y.shape)




(1456, 31)
(1456, 1)


In [4]:
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(train_X)
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[10, 10], n_classes=3, feature_columns=feature_columns)
dnn_clf.fit(x=train_X, y=train_Y, steps=20000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11fbb2e80>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/var/folders/_4/21x8bvt14zj4j_sw7n6910b80000gn/T/tmpd_6nlzvb'}
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKC

INFO:tensorflow:global_step/sec: 241.22
INFO:tensorflow:loss = 8.0038e-05, step = 5701 (0.414 sec)
INFO:tensorflow:global_step/sec: 213.23
INFO:tensorflow:loss = 7.83943e-05, step = 5801 (0.469 sec)
INFO:tensorflow:global_step/sec: 241.451
INFO:tensorflow:loss = 7.68101e-05, step = 5901 (0.414 sec)
INFO:tensorflow:global_step/sec: 215.636
INFO:tensorflow:loss = 7.52865e-05, step = 6001 (0.463 sec)
INFO:tensorflow:global_step/sec: 257.384
INFO:tensorflow:loss = 7.38201e-05, step = 6101 (0.389 sec)
INFO:tensorflow:global_step/sec: 257.885
INFO:tensorflow:loss = 7.24056e-05, step = 6201 (0.388 sec)
INFO:tensorflow:global_step/sec: 256.674
INFO:tensorflow:loss = 7.10387e-05, step = 6301 (0.390 sec)
INFO:tensorflow:global_step/sec: 259.236
INFO:tensorflow:loss = 6.97227e-05, step = 6401 (0.386 sec)
INFO:tensorflow:global_step/sec: 256.662
INFO:tensorflow:loss = 6.84516e-05, step = 6501 (0.390 sec)
INFO:tensorflow:global_step/sec: 258.307
INFO:tensorflow:loss = 6.72216e-05, step = 6601 (0.38

INFO:tensorflow:global_step/sec: 256.266
INFO:tensorflow:loss = 2.83726e-05, step = 13801 (0.390 sec)
INFO:tensorflow:global_step/sec: 261.709
INFO:tensorflow:loss = 2.81357e-05, step = 13901 (0.382 sec)
INFO:tensorflow:global_step/sec: 261.72
INFO:tensorflow:loss = 2.79044e-05, step = 14001 (0.382 sec)
INFO:tensorflow:global_step/sec: 258.827
INFO:tensorflow:loss = 2.76722e-05, step = 14101 (0.386 sec)
INFO:tensorflow:global_step/sec: 261.306
INFO:tensorflow:loss = 2.74493e-05, step = 14201 (0.383 sec)
INFO:tensorflow:global_step/sec: 259.636
INFO:tensorflow:loss = 2.72307e-05, step = 14301 (0.385 sec)
INFO:tensorflow:global_step/sec: 223.349
INFO:tensorflow:loss = 2.70101e-05, step = 14401 (0.449 sec)
INFO:tensorflow:global_step/sec: 241.726
INFO:tensorflow:loss = 2.67948e-05, step = 14501 (0.412 sec)
INFO:tensorflow:global_step/sec: 233.054
INFO:tensorflow:loss = 2.65846e-05, step = 14601 (0.429 sec)
INFO:tensorflow:global_step/sec: 209.047
INFO:tensorflow:loss = 2.63755e-05, step =

DNNClassifier(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._MultiClassHead object at 0x119325b38>, 'hidden_units': [10, 10], 'feature_columns': (_RealValuedColumn(column_name='', dimension=31, default_value=None, dtype=tf.int64, normalizer=None),), 'optimizer': None, 'activation_fn': <function relu at 0x112e2a6a8>, 'dropout': None, 'gradient_clip_norm': None, 'embedding_lr_multipliers': None, 'input_layer_min_slice_size': None})

In [6]:
a = "who is the professor"
a = "where is class?"
a = "yo when class start eh?"
tokens = preprocess(a)
token_set = set(tokens)
x = vectorize(token_set, all_uniq_words)
test_X = np.array(x).reshape(1, -1)
y_pred = list(dnn_clf.predict(test_X))
print(test_X)
print(y_pred)
print(uniq_ml_classes[y_pred[0]])

Instructions for updating:
Please switch to predict_classes, or set `outputs` argument.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
INFO:tensorflow:Restoring parameters from /var/folders/_4/21x8bvt14zj4j_sw7n6910b80000gn/T/tmpd_6nlzvb/model.ckpt-20000
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0]]
[1]
time
