In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [22]:
# import datasets

from sklearn.utils import shuffle

RANDOMSEED = 114 | 7

data = pd.read_csv('../Database/emotionjudgemen.csv')
data = shuffle(data, random_state = RANDOMSEED)

Y, X = np.array(data['label']), np.array(data['review'])

dataSize = len(X)
baseCut = 0.7

In [23]:
# create vocabulary idx -> character; character -> idx
char2idx = {}
idx2char = {}
vocabulary = []

idx = 0
meansSentenceLength = 0

buf = 0

for line in X:
    meansSentenceLength += len(line)
    for word in line:
        if word not in vocabulary:
            vocabulary.append(word)
            char2idx[word] = idx
            idx2char[idx] = word
            idx += 1

meansSentenceLength /= idx


systemWords = ['<EOS>', '<STA>', '<PAD>', '<UNK>']
vocabulary += systemWords

for word in systemWords:
    char2idx[word] = idx
    idx2char[idx] = word
    idx += 1

data = shuffle(data, random_state = RANDOMSEED)
Y, X = np.array(data['label']), np.array(data['review'])
trainX, trainY = X, Y
# trainX, trainY = X[ : int(dataSize * baseCut)], Y[ : int(dataSize * baseCut)]
testX, testY = X[int(dataSize * baseCut) :], Y[int(dataSize * baseCut) :]


In [24]:
'''
padding the sentences and translate the sentences into indexs
'''

paddingLength = 60

'''
len = 10
padL = 10
start + 8 + end = 10

len = 9
padL = 10
s + 7 + end = 9


'''

def Translate(sentence, paddingLength):
    x = [char2idx['<STA>']] + [char2idx[word] for word in sentence] + [char2idx['<EOS>']] + [char2idx['<PAD>']] * paddingLength
    return x[ : paddingLength]
xBuf = []

for i in range(len(trainX)):
    xBuf.append(Translate(trainX[i], paddingLength))

trainX = np.array(xBuf)

xBuf = []
for i in range(len(testX)):
    xBuf.append(Translate(testX[i], paddingLength))

testX = np.array(xBuf)

trainY, testY = np.array(trainY), np.array(testY)

In [25]:
'''Renew the parameter'''
dictSize = len(vocabulary)
print(dictSize)
embeddingOutlength = 128

2562


In [26]:
# from tensorflow import keras

# class JudgementaModel(keras.Model):
#     def __init__(self, dictSize, outputDim):
#         super(JudgementaModel, self).__init__()
        
#         self.embeddingLayer = keras.layers.Embedding(
#             input_dim = dictSize,
#             output_dim = outputDim
#         )

#         self.encoder = keras.layers.LSTM(
#             units = 128,
#             return_sequences = True,
#             return_state = True
#         )
        
#         self.transDense = keras.layers.Dense(
#             units = 64,
#             activation = 'relu'
#         )

#         self.dropOut = keras.layers.Dropout(0.3)

#         self.outputDense = keras.layers.Dense(
#             units = 2,
#         )


#     def call(self, inputs):
#         embeddingOutput = self.embeddingLayer(inputs)

#         coderOutput, coderStateH, coderStateC = self.encoder(embeddingOutput)

#         x = self.transDense(coderOutput)
#         output = self.outputDense(x)

#         return output

In [27]:
'''create the model'''
# model = JudgementaModel(dictSize, 128)

from tensorflow import keras

model = keras.Sequential([
    keras.layers.Embedding(
        input_dim = dictSize,
        output_dim = 128,
    ),
    
    keras.layers.GRU(
        units = 128,
        activation = 'relu',
        return_sequences = True
    ),

    keras.layers.Dropout(0.5),
    
    keras.layers.GRU(
        units = 128,
        activation = 'relu'
    ),

    keras.layers.Dropout(0.5),

    keras.layers.Dense(
        units = 64,
        activation = 'relu'
    ),

    keras.layers.Dropout(0.5),

    keras.layers.Dense(1, activation = 'sigmoid'),
])

model.compile(
    optimizer = keras.optimizers.Adam(0.003),
    loss = keras.losses.binary_crossentropy,
    metrics=['accuracy']
)

In [28]:
import os

if os.path.exists('../SavedModel/EJ.h5'):
    model.load_weights('../SavedModel/EJ.h5')

history = model.fit(trainX, trainY, batch_size = 64, epochs = 3, validation_split = 0.3)

model.save_weights('../SavedModel/EJ.h5')

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [29]:
output = model.predict(np.array([testX[114]]))
print(output)
print(testY[114])

[[0.00930882]]
0


In [32]:
inputSentence = '杂鱼杂鱼'

def Judgement(result):
    return result > 0.5


def GetResult(sentence):

    inputIndexs = []
    for w in sentence:
        if w not in vocabulary:
            inputIndexs.append(char2idx['<UNK>'])
        else:
            inputIndexs.append(char2idx[w])

    inputIndexs = [char2idx['<STA>']] + inputIndexs + [char2idx['<EOS>']] + [char2idx['<PAD>']] * paddingLength

    inputIndexs = inputIndexs[:paddingLength]

    probability = model(np.array([inputIndexs])).numpy().reshape(1)[0]

    if Judgement(probability): 
        return "正面评价", probability
    return "负面评价", probability

print(inputSentence)
print(GetResult(inputSentence))

杂鱼杂鱼
('正面评价', 0.82233304)
