In [67]:
import tensorflow as tf

# Get the GPU device name.
device_name = tf.test.gpu_device_name()

# The device name should look like the following:
if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    print('GPU device not found')

Found GPU at: /device:GPU:0


In [68]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: NVIDIA GeForce GTX 1080 Ti


In [69]:
import pandas as pd

In [70]:
from numpy.random import RandomState
#%% Split whole dataset into train and valid set
df = pd.read_csv('mr/twitter1.csv', encoding='latin-1', header=0)
rng = RandomState()
#df.drop(['Cleanedreview'],axis=1,inplace=True)
#df.to_csv('mr/sst5.csv', index=False)
train = df.sample(frac=0.9, random_state=rng)
test = df.loc[~df.index.isin(train.index)]
train.to_csv('mr/train-1twitter.csv', index=False)
test.to_csv('mr/test-1twitter.csv', index=False)

In [71]:
train_df = pd.read_csv("mr/train-1twitter.csv", encoding='latin-1', header=0)

In [72]:
train_df

Unnamed: 0,review,sentiment
0,JUST IN: The ninth film by Quentin Tarantino e...,2
1,Beyond frickin' stoked to catch this! And the ...,2
2,Frozen2 poll,1
3,IÂm at the theater to see captainmarvel and I...,1
4,If Jennifer Lawrence didnt wanted to continue ...,0
...,...,...
26996,Epic team up. CaptainMarvel,2
26997,Don't miss DarkPhoenix at ArcLight Cinemas!,0
26998,CaptainMarvel is proof there is a Disney/Marve...,1
26999,IÂm seeing a lot of articles and interviews a...,2


In [73]:
train_df.iloc[0][1]

2

In [74]:
test_df = pd.read_csv("mr/test-1twitter.csv", encoding='latin-1', header=0)

In [75]:
test_df.shape

(3000, 2)

In [76]:
test_df.head()

Unnamed: 0,review,sentiment
0,I spent my NYE falling asleep to CatsMovie in ...,0
1,The latest The Eyewear Insight Daily! captain...,2
2,Hasbro Bop It Disney Frozen2 Olaf Edition is p...,1
3,It wouldÂve been easier if half the universe ...,1
4,I won a 3D printed model of the captainmarvel ...,1


In [77]:
train_df['sentiment'] = (train_df['sentiment']).astype(int)
test_df['sentiment'] = (test_df['sentiment']).astype(int)

train_df.head(30)

Unnamed: 0,review,sentiment
0,JUST IN: The ninth film by Quentin Tarantino e...,2
1,Beyond frickin' stoked to catch this! And the ...,2
2,Frozen2 poll,1
3,IÂm at the theater to see captainmarvel and I...,1
4,If Jennifer Lawrence didnt wanted to continue ...,0
5,TAQUILLA VIERNES est. JumanjiTheNextLevel 194...,0
6,We went to watch usmovie yesterday. Not horror...,0
7,Morning stroll on the way to my cinema OnceUpo...,2
8,ICYMI - In the first of our CaptainMarvel seri...,1
9,Lastly did you know the 1965 Cadillac is the ...,2


In [78]:
train_df_bert = pd.DataFrame({
    "id": range(len(train_df)),
    "label": train_df['sentiment'],
    "sentence": train_df['review'].replace(r'\n', ' ', regex=True)
})

train_df_bert.head()

Unnamed: 0,id,label,sentence
0,0,2,JUST IN: The ninth film by Quentin Tarantino e...
1,1,2,Beyond frickin' stoked to catch this! And the ...
2,2,1,Frozen2 poll
3,3,1,IÂm at the theater to see captainmarvel and I...
4,4,0,If Jennifer Lawrence didnt wanted to continue ...


In [79]:
train_df_bert = train_df_bert.iloc[0:10000]

In [80]:
train_df_bert

Unnamed: 0,id,label,sentence
0,0,2,JUST IN: The ninth film by Quentin Tarantino e...
1,1,2,Beyond frickin' stoked to catch this! And the ...
2,2,1,Frozen2 poll
3,3,1,IÂm at the theater to see captainmarvel and I...
4,4,0,If Jennifer Lawrence didnt wanted to continue ...
...,...,...,...
9995,9995,2,How old were you when you realised that the Ir...
9996,9996,2,JamesCameron congratulates producer KevinFeige...
9997,9997,1,Was breaking the dam a political message? Abso...
9998,9998,1,What makes this impressive is that James Camer...


In [81]:
test_df_bert = pd.DataFrame({
    "id": range(len(test_df)),
    "label": test_df['sentiment'],
    "sentence": test_df['review'].replace(r'\n', ' ', regex=True)
})

test_df_bert.head()

Unnamed: 0,id,label,sentence
0,0,0,I spent my NYE falling asleep to CatsMovie in ...
1,1,2,The latest The Eyewear Insight Daily! captain...
2,2,1,Hasbro Bop It Disney Frozen2 Olaf Edition is p...
3,3,1,It wouldÂve been easier if half the universe ...
4,4,1,I won a 3D printed model of the captainmarvel ...


In [82]:
test_df_bert = test_df_bert.iloc[0:3000]

In [83]:
test_df_bert

Unnamed: 0,id,label,sentence
0,0,0,I spent my NYE falling asleep to CatsMovie in ...
1,1,2,The latest The Eyewear Insight Daily! captain...
2,2,1,Hasbro Bop It Disney Frozen2 Olaf Edition is p...
3,3,1,It wouldÂve been easier if half the universe ...
4,4,1,I won a 3D printed model of the captainmarvel ...
...,...,...,...
2995,2995,1,So I just watched Avengers Infinity War....yes...
2996,2996,2,Hollywood Ever After... Once Upon a Time...in ...
2997,2997,0,We are on dance break/orgy? Number 63 and I st...
2998,2998,2,And our captainMarvel scorecard


In [84]:
# Get the lists of sentences and their labels.
train_sentences = train_df_bert.sentence.values
train_labels = train_df_bert.label.values

In [85]:
# Get the lists of sentences and their labels.
test_sentences = test_df_bert.sentence.values
test_labels = test_df_bert.label.values

In [86]:
from transformers import BertTokenizer

# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

Loading BERT tokenizer...


In [87]:
# Print the original sentence.
print(' Original: ', train_sentences[1])

# Print the sentence split into tokens.
print('Tokenized: ', tokenizer.tokenize(train_sentences[1]))

# Print the sentence mapped to token ids.
print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(train_sentences[1])))

 Original:  Beyond frickin' stoked to catch this! And the gifted PokÃ©cards with my ticket are baller!! Will show you the cards and some things I got today with it! Also DetectivePikachu DontSpoilTheMystery for others even if it isn't as big as AvengersEndgame 
Tokenized:  ['beyond', 'fr', '##ick', '##in', "'", 'stoke', '##d', 'to', 'catch', 'this', '!', 'and', 'the', 'gifted', 'po', '##ka', '##©', '##cards', 'with', 'my', 'ticket', 'are', 'ball', '##er', '!', '!', 'will', 'show', 'you', 'the', 'cards', 'and', 'some', 'things', 'i', 'got', 'today', 'with', 'it', '!', 'also', 'detective', '##pi', '##ka', '##chu', 'don', '##ts', '##po', '##il', '##the', '##my', '##ster', '##y', 'for', 'others', 'even', 'if', 'it', 'isn', "'", 't', 'as', 'big', 'as', 'avengers', '##end', '##game']
Token IDs:  [3458, 10424, 6799, 2378, 1005, 13299, 2094, 2000, 4608, 2023, 999, 1998, 1996, 12785, 13433, 2912, 29652, 17965, 2007, 2026, 7281, 2024, 3608, 2121, 999, 999, 2097, 2265, 2017, 1996, 5329, 1998, 207

In [88]:
import warnings
warnings.filterwarnings('ignore')

max_len_train = 0

# For every sentence...
for sent in train_sentences:
    # Tokenize the text and add `[CLS]` and `[SEP]` tokens.
    input_ids = tokenizer.encode(sent[:512], add_special_tokens=True)

    # Update the maximum sentence length.
    max_len_train = max(max_len_train, len(input_ids))

print('Max sentence length: ', max_len_train)

Max sentence length:  139


In [89]:
max_len_test = 0

# For every sentence...
for sent in test_sentences:

    # Tokenize the text and add `[CLS]` and `[SEP]` tokens.
    input_ids = tokenizer.encode(sent[:512], add_special_tokens=True)

    # Update the maximum sentence length.
    max_len_test = max(max_len_test, len(input_ids))

print('Max sentence length: ', max_len_test)

Max sentence length:  143


In [90]:
# Tokenize all of the sentences and map the tokens to thier word IDs.
input_ids = []
attention_masks = []

# For every sentence...
for sent in train_sentences:
    
    # `encode_plus` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
    #   (5) Pad or truncate the sentence to `max_length`
    #   (6) Create attention masks for [PAD] tokens.
    encoded_dict = tokenizer.encode_plus(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 60,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                        truncation=True
                   )
    
    # Add the encoded sentence to the list.    
    input_ids.append(encoded_dict['input_ids'])
    
    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(train_labels)

# Print sentence 0, now as a list of IDs.
print('Original: ', train_sentences[0])
print('Token IDs:', input_ids[0])

Original:  JUST IN: The ninth film by Quentin Tarantino exceeded all box-office expectations to take in $41 million over its first three days in theaters surpassing the studioÂs own pre-release projections by $11 million. OnceUponATimeInHollywood 
Token IDs: tensor([  101,  2074,  1999,  1024,  1996,  6619,  2143,  2011, 15969, 10225,
        25318, 14872,  2035,  3482,  1011,  2436, 10908,  2000,  2202,  1999,
         1002,  4601,  2454,  2058,  2049,  2034,  2093,  2420,  1999, 12370,
        27097,  1996,  2996,  3022,  2219,  3653,  1011,  2713, 21796,  2011,
         1002,  2340,  2454,  1012,  2320,  6279,  7856,  7292, 29344,  9215,
         3702,   102,     0,     0,     0,     0,     0,     0,     0,     0])


In [91]:

train_df_bert.head()

Unnamed: 0,id,label,sentence
0,0,2,JUST IN: The ninth film by Quentin Tarantino e...
1,1,2,Beyond frickin' stoked to catch this! And the ...
2,2,1,Frozen2 poll
3,3,1,IÂm at the theater to see captainmarvel and I...
4,4,0,If Jennifer Lawrence didnt wanted to continue ...


In [92]:
(x_train, y_train), (x_test, y_test) = (train_df_bert.sentence,train_df_bert.label),(test_df_bert.sentence,test_df_bert.label)

In [93]:
from keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(x_train)
x_train = tokenizer.texts_to_sequences(x_train)
x_test = tokenizer.texts_to_sequences(x_test)

In [94]:
from keras_preprocessing.sequence import pad_sequences
# 2 - Define the number of words to be used from a review (truncate and pad input sequences)
print('Pad sequences (samples x time)')

max_review_length = 60

x_train = pad_sequences(x_train, maxlen=max_review_length)
print('X_train shape:',x_train.shape)
x_test = pad_sequences(x_test, maxlen=max_review_length)
print('X_test shape:',x_test.shape)

Pad sequences (samples x time)
X_train shape: (10000, 60)
X_test shape: (3000, 60)


In [95]:
from keras import backend as K
from tensorflow import keras
import tensorflow as tf

class FuzzyLayer(keras.layers.Layer):

    def __init__(self, 
                 output_dim, 
                 initial_centers=None,
                 initial_sigmas=None, 
                 **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        self.output_dim = output_dim
        self.initial_centers = initial_centers
        self.initial_sigmas = initial_sigmas
        super(FuzzyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.input_dimensions = list(input_shape)[:-1:-1]

        c_init_values =[]
        
        if self.initial_centers is None:
            c_init_values= tf.random_uniform_initializer(-1, 1)(shape=(input_shape[-1], self.output_dim), dtype="float32")    
        else:
            c_init_values =tf.convert_to_tensor(self.initial_centers, dtype="float32")
        self.c = tf.Variable(initial_value= c_init_values, trainable=True)

        a_init_values = []
        if self.initial_sigmas is None:
            a_init_values = tf.ones_initializer()(shape=(input_shape[-1], self.output_dim), dtype="float32")    
        else:
            a_init_values = tf.convert_to_tensor(self.initial_sigmas, dtype="float32")
        self.a = tf.Variable(initial_value= a_init_values, trainable=True)
        super(FuzzyLayer, self).build(input_shape)  

    def call(self, x):
        
        aligned_x = K.repeat_elements(K.expand_dims(x, axis = -1), self.output_dim, -1)
        aligned_c = self.c
        aligned_a = self.a
        for dim in self.input_dimensions:
            aligned_c = K.repeat_elements(K.expand_dims(aligned_c, 0), dim, 0)
            aligned_a = K.repeat_elements(K.expand_dims(aligned_a, 0), dim, 0)

        xc = K.exp(-K.sum(K.square((aligned_x - aligned_c) / (2 * aligned_a)), axis=-2, keepdims=False))
        
        return xc
        
    def compute_output_shape(self, input_shape):
        return tuple(input_shape[:-1]) + (self.output_dim,)


In [96]:
import tensorflow as tf
from tensorflow import keras
from keras import backend as K

class DefuzzyLayer(keras.layers.Layer):

    def __init__(self, 
                 output_dim, 
                 initial_rules_outcomes=None,
                 **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        self.output_dim = output_dim
        self.initial_rules_outcomes = initial_rules_outcomes
        super(DefuzzyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.input_dimensions = list(input_shape)[:-1:-1]

        outcomes_init_values = []
        if self.initial_rules_outcomes is None:
            outcomes_init_values = tf.random_uniform_initializer()(shape=(input_shape[-1], self.output_dim), dtype="float32")    
        else:
            outcomes_init_values = tf.convert_to_tensor(self.initial_rules_outcomes, dtype="float32")
        
        self.rules_outcome = tf.Variable(initial_value = outcomes_init_values, trainable=True)
        
        super(DefuzzyLayer, self).build(input_shape)  

    def call(self, x):
        aligned_x = K.repeat_elements(K.expand_dims(x, axis = -1), self.output_dim, -1)
        aligned_rules_outcome = self.rules_outcome
        for dim in self.input_dimensions:
            aligned_rules_outcome = K.repeat_elements(K.expand_dims(aligned_rules_outcome, 0), dim, 0)
        
        xc = K.sum((aligned_x * aligned_rules_outcome), axis=-2, keepdims=False)
        return xc
        
    def compute_output_shape(self, input_shape):
        return tuple(input_shape[:-1]) + (self.output_dim,)
    
    def get_config(self):
        return {"rules_outcome": self.rules_outcome.numpy()}


In [97]:
#FuzzyCNNLSTM
from keras.models import Sequential
from keras.layers import Dense,Dropout,Activation, Flatten
from keras.layers import LSTM
from tensorflow.keras.layers import Bidirectional
from keras.layers import Conv1D
from keras.layers import GlobalMaxPooling1D,MaxPooling1D
from keras.layers import Embedding
import matplotlib.pyplot as plt
#from FuzzyLayer import FuzzyLayer
#from DefuzzyLayer import DefuzzyLayer

# 3 - Neural network model design
print('Building model')
max_features = 2000
embedding_vector_length = 32

filters = 32
kernel_size = 3
hidden_dims = 40
nb_classes = 2
#input_shape=x_train.shape
model = Sequential()
model.add(Embedding(max_features, embedding_vector_length, input_length=max_review_length))

model.add(Bidirectional(LSTM(30,activation='tanh',
                             recurrent_activation='sigmoid',
                             return_state=False,
                             return_sequences=False)))
model.add(Dropout(0.1))
model.add(FuzzyLayer(20, input_dim=2))
model.add(Dropout(0.1))
model.add(DefuzzyLayer(10))
model.add(Dropout(0.1))
model.add(Dense(3, activation='softmax'))
#model.add(Dense(1, activation='sigmoid'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()


Building model
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 60, 32)            64000     
_________________________________________________________________
bidirectional_3 (Bidirection (None, 60)                15120     
_________________________________________________________________
dropout_9 (Dropout)          (None, 60)                0         
_________________________________________________________________
fuzzy_layer_3 (FuzzyLayer)   (None, 20)                2400      
_________________________________________________________________
dropout_10 (Dropout)         (None, 20)                0         
_________________________________________________________________
defuzzy_layer_3 (DefuzzyLaye (None, 10)                200       
_________________________________________________________________
dropout_11 (Dropout)         (None, 10)

In [98]:
import numpy
es_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

# 4 - Train the neural network model
history = model.fit(x_train, y_train,
                    epochs=20,
                    verbose=True,
                    callbacks=[es_callback],
                    #validation_data=(x_test, y_test),
                    validation_split=0.1,
                    batch_size=10)

loss, accuracy = model.evaluate(x_train, y_train, verbose=True)
print("Training Accuracy: {:.6f}".format(accuracy))
print("Training Loss: {:.6f}".format(loss))
loss, accuracy = model.evaluate(x_test, y_test, verbose=True)
print("Testing Accuracy:  {:.6f}".format(accuracy))
print("Testing Loss:  {:.6f}".format(loss))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Training Accuracy: 0.957700
Training Loss: 0.130868
Testing Accuracy:  0.873667
Testing Loss:  0.433170


In [99]:
#FuzzyCNNLSTM
from keras.models import Sequential
from keras.layers import Dense,Dropout,Activation, Flatten
from keras.layers import LSTM
from tensorflow.keras.layers import Bidirectional
from keras.layers import Conv1D
from keras.layers import GlobalMaxPooling1D,MaxPooling1D
from keras.layers import Embedding
import matplotlib.pyplot as plt
#from FuzzyLayer import FuzzyLayer
#from DefuzzyLayer import DefuzzyLayer

# 3 - Neural network model design
print('Building model')
max_features = 2000
embedding_vector_length = 32

filters = 32
kernel_size = 3
hidden_dims = 40
nb_classes = 2
#input_shape=x_train.shape
model = Sequential()
model.add(Embedding(max_features, embedding_vector_length, input_length=max_review_length))
model.add(Bidirectional(LSTM(30,activation='tanh',
                             recurrent_activation='sigmoid',
                             return_state=False,
                             return_sequences=False)))
model.add(Dropout(0.1))
model.add(FuzzyLayer(20, input_dim=2))
model.add(Dropout(0.1))
model.add(DefuzzyLayer(10))
model.add(Dropout(0.1))
model.add(Dense(3, activation='softmax'))
#model.add(Dense(1, activation='sigmoid'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()


Building model
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 60, 32)            64000     
_________________________________________________________________
bidirectional_4 (Bidirection (None, 60)                15120     
_________________________________________________________________
dropout_12 (Dropout)         (None, 60)                0         
_________________________________________________________________
fuzzy_layer_4 (FuzzyLayer)   (None, 20)                2400      
_________________________________________________________________
dropout_13 (Dropout)         (None, 20)                0         
_________________________________________________________________
defuzzy_layer_4 (DefuzzyLaye (None, 10)                200       
_________________________________________________________________
dropout_14 (Dropout)         (None, 10)

In [100]:
import numpy
es_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

# 4 - Train the neural network model
history = model.fit(x_train, y_train,
                    epochs=20,
                    verbose=True,
                    callbacks=[es_callback],
                    #validation_data=(x_test, y_test),
                    validation_split=0.1,
                    batch_size=10)

loss, accuracy = model.evaluate(x_train, y_train, verbose=True)
print("Training Accuracy: {:.6f}".format(accuracy))
print("Training Loss: {:.6f}".format(loss))
loss, accuracy = model.evaluate(x_test, y_test, verbose=True)
print("Testing Accuracy:  {:.6f}".format(accuracy))
print("Testing Loss:  {:.6f}".format(loss))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Training Accuracy: 0.953500
Training Loss: 0.130889
Testing Accuracy:  0.867333
Testing Loss:  0.459966


In [1]:
import matplotlib.pyplot as plt
#% matplotlib inline

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
x = range(1, len(acc) + 1)

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(x, acc, 'b', label='Training acc')
plt.plot(x, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(x, loss, 'b', label='Training loss')
plt.plot(x, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

#print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))
# Vẽ đồ thị hàm loss
plt.subplot(211)
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
# Vẽ đồ thị độ chính xác
plt.subplot(212)
plt.title('Accuracy')
plt.plot(history.history['accuracy'], label='train')
plt.plot(history.history['val_accuracy'], label='test')
plt.legend()
plt.show()

NameError: name 'history' is not defined