[参考](https://qiita.com/nymwa/items/9c8484ff511123e03ba8)

# 70

In [1]:
import re
import spacy
import numpy as np
from gensim.models import KeyedVectors

In [2]:
nlp = spacy.load('en')
categories = ['b', 't', 'e', 'm']
category_names = ['business', 'science and technology', 'entertainment', 'health']

In [3]:
def tokenize(x):
    x = re.sub(r'\s+', ' ', x)
    x = nlp.make_doc(x)
    x = [d.text for d in x]
    return x

def read_feature_dataset(filename):
    with open(filename) as f:
        dataset = f.read().splitlines()
    dataset = [line.split('\t') for line in dataset]
    t_index = [categories.index(line[0]) for line in dataset]
    dataset_t = []
    for index in t_index:
        label = [0]*4
        label[index] = 1
        dataset_t.append(label)
    dataset_x = [tokenize(line[1]) for line in dataset]
    return dataset_x, dataset_t

In [4]:
train_x, train_t = read_feature_dataset('train.txt')
valid_x, valid_t = read_feature_dataset('valid.txt')
test_x, test_t = read_feature_dataset('test.txt')

In [5]:
model = KeyedVectors.load('GoogleNews-vectors-negative300.kv', mmap='r')

In [6]:
model

<gensim.models.keyedvectors.Word2VecKeyedVectors at 0x10acfd3d0>

### tenosrflowでいきます

In [7]:
import tensorflow as tf

In [8]:
def sent_to_vector(sent):
    lst = [tf.constant(model[token]) for token in sent if token in model]
    return sum(lst) / len(lst)

def dataset_to_vector(dataset):
    return tf.stack([sent_to_vector(x) for x in dataset])

In [9]:
train_v = dataset_to_vector(train_x)
valid_v = dataset_to_vector(valid_x)
test_v = dataset_to_vector(test_x)

In [10]:
train_t = tf.constant(train_t)
valid_t = tf.constant(valid_t)
test_t = tf.constant(test_t)

In [11]:
train_t[0]

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 1, 0, 0], dtype=int32)>

In [12]:
import pickle

In [13]:
with open('data/train.feature.pickle', 'wb') as f:
    pickle.dump(train_v, f)
with open('data/train.label.pickle', 'wb') as f:
    pickle.dump(train_t, f)

with open('data/valid.feature.pickle', 'wb') as f:
    pickle.dump(valid_v, f)
with open('data/valid.label.pickle', 'wb') as f:
    pickle.dump(valid_t, f)

with open('data/test.feature.pickle', 'wb') as f:
    pickle.dump(test_v, f)
with open('data/test.label.pickle', 'wb') as f:
    pickle.dump(test_t, f)

# 71

In [14]:
train_v.shape

TensorShape([10684, 300])

In [15]:
class NN(tf.keras.Model):
    def __init__(self, out_dim, name="NN"):
        super().__init__(name=name)
        self.out_dim = out_dim
        self.l1 = tf.keras.layers.Dense(
            self.out_dim, 
            use_bias=False, 
        )

    def call(self, x):
        y = self.l1(x)
        return y

In [16]:
model = NN(4)

In [17]:
x = model(train_v[:4])
y = tf.nn.softmax(x, axis=1)
print(y)

tf.Tensor(
[[0.2334692  0.27815533 0.2055894  0.28278604]
 [0.24997279 0.25695467 0.23655325 0.25651932]
 [0.26750898 0.24725448 0.25268868 0.2325479 ]
 [0.26104128 0.27834803 0.23206748 0.2285432 ]], shape=(4, 4), dtype=float32)


# 72

In [18]:
class NN(tf.keras.Model):
    def __init__(self, out_dim, name="NN"):
        super().__init__(name=name)
        self.out_dim = out_dim
        self.l1 = tf.keras.layers.Dense(
            self.out_dim, 
            use_bias=False, 
        )

    def call(self, x):
        y = tf.nn.softmax(self.l1(x))
        return y

In [19]:
model = NN(4)

In [20]:
def loss(t, y):
    return tf.keras.backend.mean(tf.keras.losses.categorical_crossentropy(t,y))

In [21]:
loss(train_t, model(train_v))

<tf.Tensor: shape=(), dtype=float32, numpy=1.3457048>

In [22]:
model.weights

[<tf.Variable 'NN/dense_1/kernel:0' shape=(300, 4) dtype=float32, numpy=
 array([[-0.02457763,  0.05170919, -0.00212602, -0.08934447],
        [ 0.08095491,  0.10621148,  0.02188836, -0.03038672],
        [ 0.07699631, -0.09801296,  0.1181428 ,  0.12297057],
        ...,
        [ 0.09177855, -0.01307347,  0.09992485, -0.00326847],
        [-0.13962416, -0.05638149, -0.02081947,  0.003896  ],
        [ 0.08044873, -0.06248715, -0.12428132,  0.03352007]],
       dtype=float32)>]

In [23]:
with tf.GradientTape() as t:
    current_loss = loss(train_t, model(train_v))
dW = t.gradient(current_loss, [model.weights])

In [24]:
print('損失', loss(train_t, model(train_v)))
print('勾配', dW)

損失 tf.Tensor(1.3457048, shape=(), dtype=float32)
勾配 [[<tf.Tensor: shape=(300, 4), dtype=float32, numpy=
array([[ 0.00271831,  0.00213254, -0.00975994,  0.00490909],
       [-0.00364486,  0.00306664, -0.00378065,  0.00435887],
       [ 0.00312543, -0.00632926,  0.01026588, -0.00706205],
       ...,
       [-0.01006098, -0.00025011,  0.0119477 , -0.00163662],
       [-0.01270853,  0.00809463, -0.00161246,  0.00622636],
       [ 0.01346563, -0.00197507, -0.00591334, -0.00557721]],
      dtype=float32)>]]


# 73

In [25]:
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
              loss=loss,
              metrics=['accuracy']
             )

In [26]:
history = model.fit(train_v,train_t,
                    epochs=10, 
                    validation_data=(valid_v, valid_t),
                    verbose=2
                   )

Train on 10684 samples, validate on 1336 samples
Epoch 1/10
10684/10684 - 1s - loss: 1.2189 - accuracy: 0.6482 - val_loss: 1.1157 - val_accuracy: 0.7478
Epoch 2/10
10684/10684 - 1s - loss: 1.0512 - accuracy: 0.7651 - val_loss: 0.9939 - val_accuracy: 0.7657
Epoch 3/10
10684/10684 - 1s - loss: 0.9540 - accuracy: 0.7742 - val_loss: 0.9165 - val_accuracy: 0.7672
Epoch 4/10
10684/10684 - 1s - loss: 0.8893 - accuracy: 0.7765 - val_loss: 0.8618 - val_accuracy: 0.7665
Epoch 5/10
10684/10684 - 1s - loss: 0.8421 - accuracy: 0.7770 - val_loss: 0.8202 - val_accuracy: 0.7680
Epoch 6/10
10684/10684 - 1s - loss: 0.8055 - accuracy: 0.7775 - val_loss: 0.7869 - val_accuracy: 0.7710
Epoch 7/10
10684/10684 - 1s - loss: 0.7757 - accuracy: 0.7777 - val_loss: 0.7592 - val_accuracy: 0.7717
Epoch 8/10
10684/10684 - 1s - loss: 0.7507 - accuracy: 0.7781 - val_loss: 0.7355 - val_accuracy: 0.7717
Epoch 9/10
10684/10684 - 1s - loss: 0.7292 - accuracy: 0.7783 - val_loss: 0.7150 - val_accuracy: 0.7732
Epoch 10/10
106