In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.layers import Dense, Embedding, LSTM, GRU, SimpleRNN, Dropout
from sklearn.model_selection import train_test_split

In [2]:
pretrained_model = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
hub_layer = hub.KerasLayer(pretrained_model, input_shape=[], dtype=tf.string, trainable=True)

In [3]:
embed = hub.load("https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1")
embeddings = embed(["cat is on the mat dog is in the fog"])

embeddings

<tf.Tensor: shape=(1, 20), dtype=float32, numpy=
array([[ 1.2917835 ,  0.6960632 ,  0.04837722,  0.7461801 , -0.20820257,
        -0.12027001, -0.7988958 ,  0.84001   , -0.02885479, -0.83123875,
        -1.4375954 ,  1.1317196 , -0.5531615 , -0.45697683, -0.9780804 ,
         1.0363967 , -0.2558647 ,  0.24833947, -1.6134161 , -0.55305725]],
      dtype=float32)>

In [4]:
path = "../Clean/lemma_allresult.csv"
dataset = pd.read_csv(path)

In [5]:
feat = dataset.iloc[:,0]
target = dataset.iloc[:,1]

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(feat,target, test_size = 0.2, train_size = 0.8, random_state = 42)

In [7]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Reshape((20, 1), input_shape=(20,)))
model.add(tf.keras.layers.LSTM(32))
model.add(tf.keras.layers.Dense(16,activation="relu"))
model.add(Dropout(0.5))
model.add(tf.keras.layers.Dense(1,activation="sigmoid"))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 20)                400020    
_________________________________________________________________
reshape (Reshape)            (None, 20, 1)             0         
_________________________________________________________________
lstm (LSTM)                  (None, 32)                4352      
_________________________________________________________________
dense (Dense)                (None, 16)                528       
_________________________________________________________________
dropout (Dropout)            (None, 16)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 404,917
Trainable params: 404,917
Non-trainable params: 0
__________________________________________________

In [9]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [10]:
model.fit(X_train,
          Y_train,
         epochs=10,
         validation_split=0.2,
         verbose=1)

Train on 6400 samples, validate on 1600 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1fee66cd9c8>

In [16]:
score,acc = model.evaluate(X_test, Y_test, verbose = 1, batch_size = 32)



In [11]:
np.set_printoptions(suppress=True)
result = model.predict(X_test)

result

array([[0.00252537],
       [0.04194932],
       [0.01022991],
       ...,
       [0.9967399 ],
       [0.0938549 ],
       [0.8903085 ]], dtype=float32)

In [12]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true=Y_test , y_pred=result>0.5)

cm

array([[847, 149],
       [158, 846]], dtype=int64)