# Classification sample code
The goal is checking an input token is positive or negative

# Prepare data

In [1]:
import csv
import numpy as np
from sklearn.model_selection import train_test_split
from gensim.models.keyedvectors import KeyedVectors
from keras.layers import Dense,Activation,Dropout
from keras.models import Sequential

SIZE_OF_W2V = 200
W2V = KeyedVectors.load_word2vec_format('../data/embeddings/entity_vector.model.bin', binary=True)

def prepare_y(y):
    if int(y) == 2:
        return [1,0]
    else:
        return [0,1]
    
def prepare_x(x):
    try:
        return W2V[x]
    except:
        return [0] * SIZE_OF_W2V

x_data = []
y_data = []

with open('../data/training/sentiment.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        x_data.append(prepare_x(row["x"]))
        y_data.append(prepare_y(row["y"]))
        
x_data = np.array(x_data)
y_data = np.array(y_data)

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2)

print(x_train.shape)
print(y_train.shape)

Using TensorFlow backend.


(18, 200)
(18, 2)


# Train model

In [2]:
def create_model(input_length):
    model = Sequential()
    model.add(Dense(SIZE_OF_W2V, input_dim=SIZE_OF_W2V))
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(Dense(2))
    model.add(Activation('sigmoid'))
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

    return model

model = create_model(len(x_train))
model.fit(x_train, y_train, batch_size=1, epochs=10, verbose=1)





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1122195d0>

# Test

In [3]:
results = model.evaluate(x_test, y_test)
print(f"error: {results[0]}, accuracy: {results[1]}")

error: 0.12244538962841034, accuracy: 1.0


# Predict

In [4]:
def translate_y(y):
    print(y)
    if y.flat[0] >= y.flat[1]:
        return "negative"
    else:
        return "postive"
        
def do_prediction(x):
    label = translate_y(model.predict(np.array(prepare_x(x)).reshape(1,-1)))
    print(f"{x} is {label} \n")

do_prediction("好い")
do_prediction("邪悪")
do_prediction("ナイス")
do_prediction("堕落")

[[0.33617374 0.7034423 ]]
好い is postive 

[[9.9992287e-01 2.3520677e-05]]
邪悪 is negative 

[[0.19780168 0.78985876]]
ナイス is postive 

[[0.995552   0.00196384]]
堕落 is negative 

