In [1]:
from keras.layers import Input, Dense, Embedding, Conv2D, MaxPool2D
from keras.layers import Reshape, Flatten, Dropout, Concatenate
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.optimizers import Adam
from keras.models import Model
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from get_json_data import load_data
import pandas as pd
import numpy as np
import math

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
#print('Loading training data')
data_name = "movie_score.json"
#data_name = "small_data_set/movie_score_train.json"
x, y = load_data(data_name)

배경음 love theme의 감동적인 선율과 영사기에서 나오는 달콤한 키스 장면들을 보며 토토의 회상에 젖은듯한 애잔한 표정 지금도 잊을수가 없다 


In [3]:
y_squeeze = np.squeeze(y)
(y_squeeze == 2).sum() / len(y_squeeze) * 100

0.5969038931595169

In [4]:
print(set(y))
y = to_categorical(y)
print(y[0])

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]


In [5]:
X_train, X_val, y_train, y_val = train_test_split(x, y, test_size=0.4, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=30)

In [6]:
sequence_length = 200 # x.shape[1] 
character_size = 251 # 251
embedding_dim = 256
filter_sizes = [3,4,5]
num_filters = 512
drop = 0.5

In [7]:
epochs = 1
batch_size = 30
MODEL_WEIGHTS_FILE = 'rating_weights.h5'

In [8]:
# this returns a tensor
print("Creating Model...")
inputs = Input(shape=(sequence_length,), dtype='int32')
embedding = Embedding(input_dim=character_size, output_dim=embedding_dim, input_length=sequence_length)(inputs)
reshape = Reshape((sequence_length,embedding_dim,1))(embedding)

conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)

maxpool_0 = MaxPool2D(pool_size=(sequence_length - filter_sizes[0] + 1, 1), strides=(1,1), padding='valid')(conv_0)
maxpool_1 = MaxPool2D(pool_size=(sequence_length - filter_sizes[1] + 1, 1), strides=(1,1), padding='valid')(conv_1)
maxpool_2 = MaxPool2D(pool_size=(sequence_length - filter_sizes[2] + 1, 1), strides=(1,1), padding='valid')(conv_2)

concatenated_tensor = Concatenate(axis=1)([maxpool_0, maxpool_1, maxpool_2])
flatten = Flatten()(concatenated_tensor)
dropout = Dropout(drop)(flatten)
output = Dense(units=10, activation='softmax')(dropout)

Creating Model...


In [9]:
# this creates a model that includes
model = Model(inputs=inputs, outputs=output)

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.0000001)

callbacks = [learning_rate_reduction,
             EarlyStopping('val_loss', patience=2),
             ModelCheckpoint(MODEL_WEIGHTS_FILE, monitor='val_acc', verbose=1, save_best_only=True, mode='auto')]

model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
print("Traning Model...")
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, callbacks=callbacks, validation_data=(X_test, y_test))  # starts training

Traning Model...
Train on 54199 samples, validate on 13550 samples
Epoch 1/1

Epoch 00001: val_acc improved from -inf to 0.93562, saving model to rating_weights.h5


In [10]:
model.load_weights(MODEL_WEIGHTS_FILE)
print(set(y_test.argmax(axis=-1)))

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}


In [11]:
pred = model.predict(X_val).argmax(axis=1)
pred
y_squeeze = np.squeeze(pred)
(y_squeeze == 9).sum() / len(y_squeeze) * 100

100.0

In [12]:
import sklearn
acc = sklearn.metrics.accuracy_score(y_val.argmax(axis=-1), pred)
acc

0.6710430181327075