# Things to do:
    1. Get the data
    2. Import the packages
    3. Read the Data
    4. Split the data
    5. Clean the Text
    6. Tokenize to get the text max len
    7. Tokenize to index retrieve vocab length then to sequence then pad it to max length
    8. Define the model architecture
    9. Fit on the train data 
    10. Load Pretrained Embedding matrix
    11. Define model architecture for preloaded embedding matrix
    12. Fit on training data 
    13. Retrive auc for all the models on test data

In [2]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.metrics import roc_auc_score

from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer

import re
import string

import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

import warnings
warnings.filterwarnings('ignore')

from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Flatten, GRU, LSTM, Conv1D, MaxPooling1D, Dropout, Activation
from keras.layers.embeddings import Embedding


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\91876\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\91876\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\91876\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Using TensorFlow backend.


In [None]:
# READING THE DATA

In [3]:
quora = pd.read_csv(r"C:\Users\91876\Desktop\Poonam\Python_Project_3\train.csv")

In [4]:
quora.head()

Unnamed: 0,qid,question_text,target
0,00002165364db923c7e6,How did Quebec nationalists see their province...,0
1,000032939017120e6e44,"Do you have an adopted dog, how would you enco...",0
2,0000412ca6e4628ce2cf,Why does velocity affect time? Does velocity a...,0
3,000042bf85aa498cd78e,How did Otto von Guericke used the Magdeburg h...,0
4,0000455dfa3e01eae3af,Can I convert montra helicon D to a mountain b...,0


In [5]:
quora.shape

(1306122, 3)

In [6]:
quora.target.value_counts(normalize=True)*100  ##IMBALANCED CLASSIFICATION PROBLEM

0    93.812982
1     6.187018
Name: target, dtype: float64

In [None]:
#SPLITTING THE DATA SUCH THAT TRAIN AND TEST HAVE SAME TARGET PERCENTAGE 

In [7]:
train,test =train_test_split(quora,test_size=0.25,stratify= quora.target,random_state=123)

In [8]:
print (train.shape)
print (train.target.value_counts())
test.shape , test.target.value_counts()

(979591, 3)
0    918984
1     60607
Name: target, dtype: int64


((326531, 3),
 0    306328
 1     20203
 Name: target, dtype: int64)

In [9]:
x_train= train.question_text
y_train= train.target

x_test=test.question_text
y_test=test.target


In [None]:
##CREATING FUNCTION FOR CLEANING THE DATA FOR STOP WORDS, TEXT IN BRACKETS , PUNCTUATIONS, WORDS CONTAINING NUMBERS, CORRECTING SHORT FORMS AND LEMMATIZING THE SENETENCE

In [10]:
def clean_text(text):
    
    text = text.lower().split()
    
    stops = set(stopwords.words("english"))
    text = [w for w in text if not w in stops and len(w) >= 3]
    
    text = " ".join(text)
    
    text = re.sub('\[.*?\]','',text)
    text = re.sub('[%s]' % re.escape(string.punctuation),'',text)
    text = re.sub('\w*\d\w*','',text)
    text = re.sub(r"what’s", "what is ", text)
    text = re.sub(r"\’s", " ", text)
    text = re.sub(r"\’ve", " have ", text)
    text = re.sub(r"n’t", " not ", text)
    text = re.sub(r"i’m", "i am ", text)
    text = re.sub(r"\’re", " are ", text)
    text = re.sub(r"\’d", " would ", text)
    text = re.sub(r"\’ll", " will ", text)
    
    text = text.split()
    
    lemma = WordNetLemmatizer()
    lemma_words = [lemma.lemmatize(word) for word in text]
    
    text = " ".join(lemma_words)
    
    return text


In [11]:
x_train.describe

<bound method NDFrame.describe of 1026350    How do I select a tyre size and rim size for a...
655022     Could Fox buy the rights to Last Man Standing ...
526743     James Comey testified the FBI requested access...
179882     Why is it that someone, such as Trump for inst...
517925     Is there a shortage of trade workers (plumbers...
                                 ...                        
1294229    How would I make my television show idea a rea...
663481     What is the possibility of a having a camera i...
273811     Is the discount rate of buying one share of a ...
39353      What is the best way to get a personal loan in...
104244     Do you think a piloted airplane could fly unde...
Name: question_text, Length: 979591, dtype: object>

In [12]:
x_train = x_train.map(lambda x: clean_text(x))

In [13]:
x_train.head()

1026350           select tyre size rim size baja atv vehicle
655022     could fox buy right last man standing continue...
526743     james comey testified fbi requested access hac...
179882     someone trump instance whose name people pay m...
517925         shortage trade worker plumber carpenter etc u
Name: question_text, dtype: object

In [14]:
x_test.head()

157891            Why can’t I sign up if I’m older than 12?
718307         What pictures remind you of something funny?
577413    Explain how does a computer help simplying tas...
103484            Why doesn't Europe expel all the Muslims?
693485    What are the importance of necessity of protec...
Name: question_text, dtype: object

In [15]:
x_test = x_test.map(lambda x: clean_text(x))

In [16]:
x_test.head

<bound method NDFrame.head of 157891                                ca not sign i am older
718307                        picture remind something funny
577413          explain computer help simplying task example
103484                                   europe expel muslim
693485     importance necessity protected water supply sy...
                                 ...                        
197601                                      puzzle pessimism
1076955    example successful socialist community commune...
349099          judge really woken middle night sign warrant
991067                                           earn tinder
358803                          beautiful poem english hindi
Name: question_text, Length: 326531, dtype: object>

In [None]:
# TOKENIZING TO GET THE LENGTH OF SENETENCE 

In [17]:

question_len=[]

for quest in x_train:
    question_len.append(len(word_tokenize(quest)))

max(question_len)

np.quantile(question_len,0.9999)

question_len1=[]

for quest in x_test:
    question_len1.append(len(word_tokenize(quest)))


In [18]:
print(max(question_len1))
np.quantile(question_len1,0.9999) ##99.99% OF DATA HAVE LENGTH LESS THAN 29 


34


29.0

In [19]:
max_len=29

In [20]:
tok = Tokenizer(char_level=False, split=' ') ###INDEXING EVERY WORD OF THE SENETENCE
tok.fit_on_texts(x_train)

tok.index_word

{1: 'get',
 2: 'best',
 3: 'would',
 4: 'people',
 5: 'like',
 6: 'good',
 7: 'one',
 8: 'make',
 9: 'india',
 10: 'year',
 11: 'way',
 12: 'time',
 13: 'think',
 14: 'many',
 15: 'life',
 16: 'much',
 17: 'someone',
 18: 'want',
 19: 'use',
 20: 'know',
 21: 'work',
 22: 'take',
 23: 'country',
 24: 'thing',
 25: 'job',
 26: 'woman',
 27: 'ever',
 28: 'it',
 29: 'indian',
 30: 'find',
 31: 'world',
 32: 'feel',
 33: 'become',
 34: 'person',
 35: 'without',
 36: 'book',
 37: 'could',
 38: 'student',
 39: 'better',
 40: 'quora',
 41: 'girl',
 42: 'day',
 43: 'mean',
 44: 'company',
 45: 'difference',
 46: 'need',
 47: 'trump',
 48: 'new',
 49: 'whats',
 50: 'possible',
 51: 'college',
 52: 'school',
 53: 'start',
 54: 'im',
 55: 'used',
 56: 'friend',
 57: 'first',
 58: 'question',
 59: 'say',
 60: 'state',
 61: 'american',
 62: 'money',
 63: 'still',
 64: 'different',
 65: 'business',
 66: 'do',
 67: 'love',
 68: 'not',
 69: 'really',
 70: 'give',
 71: 'university',
 72: 'learn',
 73: 

In [21]:
vocab_len=len(tok.index_word.keys())
vocab_len

181603

In [22]:
seq_train = tok.texts_to_sequences(x_train) ##TRANSFORMING THE SENTENCE TO SEQUENCE OF INDEXED WORDS
seq_train

[[2420, 5194, 599, 8137, 599, 10459, 15910, 1140],
 [37, 2620, 110, 79, 212, 148, 3015, 984, 370],
 [1677,
  6132,
  43462,
  2373,
  6683,
  1011,
  3540,
  7882,
  1368,
  4819,
  1172,
  68,
  98,
  317,
  5531,
  182,
  23,
  383,
  1238,
  336,
  4095,
  3302,
  131,
  4958,
  1473],
 [17,
  47,
  3023,
  1598,
  86,
  4,
  205,
  62,
  19,
  5498,
  395,
  2722,
  22954,
  7313,
  10334,
  673,
  206,
  185,
  2756,
  24119,
  2078],
 [6816, 970, 1566, 11822, 10335, 331, 146],
 [23, 34, 3, 666],
 [24, 711, 2139, 35, 160, 344],
 [6, 1363, 187, 65, 73427],
 [2, 114, 21978, 2242, 25, 3197, 311],
 [170, 25, 1222, 167, 25432, 344],
 [96, 1566, 55, 3810],
 [1113, 137],
 [43463, 1653],
 [511, 53, 351, 2919, 312, 9],
 [684, 4, 13, 454, 35, 27, 337, 365, 15],
 [311, 82, 1, 761, 204],
 [49, 847, 442, 356, 288, 1088, 82, 390, 85],
 [5030, 3057, 21, 1171, 1365],
 [122, 169, 12, 25, 106, 62],
 [21979, 73428, 3387],
 [14, 576, 147, 381],
 [884, 33, 803, 3943, 1075, 202],
 [73,
  718,
  1196,
 

In [23]:
train_matrix = sequence.pad_sequences(seq_train, maxlen=max_len) ##PADDING SHORTER SEQUENCE TO GET A DEFINED MATRIX
train_matrix.shape

(979591, 29)

In [24]:
seq_test = tok.texts_to_sequences(x_test)
seq_test

[[959, 68, 535, 376, 489, 1044],
 [570, 7557, 125, 1790],
 [534, 132, 77, 2131, 137],
 [439, 12517, 102],
 [891, 6338, 3888, 138, 1319, 101],
 [1056, 1175, 4015, 10507, 1458, 7242],
 [100436, 65, 67724, 173, 345, 676, 68, 550, 210, 67724, 3791, 19444, 639],
 [459, 2, 100],
 [24, 387, 49156],
 [292, 15092, 25210, 60, 499],
 [18, 8, 1922, 606, 3587, 16412, 1672, 324, 5646, 8, 15640],
 [250, 26596, 2590, 188, 8458],
 [403, 813, 11742, 285, 1508, 1876],
 [653, 958, 58],
 [558, 1601, 3, 27, 2288, 83, 1921, 3, 12, 3, 380],
 [4, 3220, 151],
 [6259, 4576, 1682, 15141, 7452, 440],
 [259, 40, 762, 15, 11],
 [137, 2279, 495],
 [11, 454, 648],
 [601, 45, 414, 5344, 414],
 [1122, 5221, 14800, 228, 321, 36, 386, 694, 10787],
 [1480, 8532, 5700],
 [63, 459, 3447, 10306, 1327, 456],
 [22452, 20705, 646, 4815, 3060],
 [6156, 74, 339, 230, 10, 4079, 1416, 576, 622, 907, 656, 334],
 [141, 17864, 2443, 2673, 3175],
 [2, 1659, 8275, 1076],
 [2945, 123, 17107],
 [2586, 248, 1552],
 [209, 270, 56, 1, 74],
 [

In [25]:
test_matrix = sequence.pad_sequences(seq_test, maxlen=max_len)
test_matrix.shape

(326531, 29)

In [26]:
vocab_len ###NUMBER PF UNIQUE WORDS IN THE DATA 

181603

# DEFINING MODEL ARCHITECTURE

In [27]:
model_lstm = Sequential()
model_lstm.add(Embedding(vocab_len+1, 200, input_length=max_len))
model_lstm.add(Dropout(0.2))
model_lstm.add(LSTM(100 , activation='relu'))
model_lstm.add(Dropout(0.2))
model_lstm.add(Dense(1, activation="sigmoid"))
model_lstm.compile(loss="binary_crossentropy", optimizer="adam",metrics=['accuracy'])

model_lstm.summary()

model_lstm.fit(train_matrix,y_train.values , batch_size=1000 ,verbose=2, epochs = 2 , validation_split=0.3)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 29, 200)           36320800  
_________________________________________________________________
dropout_1 (Dropout)          (None, 29, 200)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               120400    
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 36,441,301
Trainable params: 36,441,301
Non-trainable params: 0
_________________________________________________________________
Train on 685713 samples, validate on 293878 samples
Epoch 1/2
 - 836s - loss: 0.1450 - accuracy: 0.9459 - va

<keras.callbacks.callbacks.History at 0x1e54d355f48>

In [28]:
pred_lstm = model_lstm.predict(test_matrix)
prediction_lstm = roc_auc_score(y_test,pred_lstm)
prediction_lstm

0.9487366408172323

In [29]:
model_cnn_lstm = Sequential()
model_cnn_lstm.add(Embedding(vocab_len+1, 200, input_length=max_len))
model_cnn_lstm.add(Dropout(0.2))
model_cnn_lstm.add(Conv1D(64,5,activation="relu"))
model_cnn_lstm.add(MaxPooling1D(pool_size=4))
model_cnn_lstm.add(Dropout(0.2))
model_cnn_lstm.add(LSTM(100 , activation='relu'))
model_cnn_lstm.add(Dropout(0.2))
model_cnn_lstm.add(Dense(1, activation="sigmoid"))
model_cnn_lstm.compile(loss="binary_crossentropy", optimizer="adam",metrics=['accuracy'])
model_cnn_lstm.summary()
model_cnn_lstm.fit(train_matrix,y_train.values , batch_size=1000 ,verbose=2, epochs = 2 , validation_split=0.3)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 29, 200)           36320800  
_________________________________________________________________
dropout_3 (Dropout)          (None, 29, 200)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 25, 64)            64064     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 6, 64)             0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 6, 64)             0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 100)               66000     
_________________________________________________________________
dropout_5 (Dropout)          (None, 100)              

<keras.callbacks.callbacks.History at 0x1e5451dca48>

In [30]:
pred_cnn_lstm = model_cnn_lstm.predict(test_matrix)
prediction_cnn_lstm = roc_auc_score(y_test,pred_cnn_lstm)
prediction_cnn_lstm

0.9357573720156617

In [31]:
model_gru = Sequential()
model_gru.add(Embedding(vocab_len+1, 200, input_length=max_len))
model_gru.add(Dropout(0.2))
model_gru.add(GRU(100 , activation='relu'))
model_gru.add(Dropout(0.2))
model_gru.add(Dense(1, activation="sigmoid"))
model_gru.compile(loss="binary_crossentropy", optimizer="adam",metrics=['accuracy'])

model_gru.summary()

model_gru.fit(train_matrix,y_train.values , batch_size=1000 ,verbose=2, epochs = 2 , validation_split=0.3)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 29, 200)           36320800  
_________________________________________________________________
dropout_6 (Dropout)          (None, 29, 200)           0         
_________________________________________________________________
gru_1 (GRU)                  (None, 100)               90300     
_________________________________________________________________
dropout_7 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 101       
Total params: 36,411,201
Trainable params: 36,411,201
Non-trainable params: 0
_________________________________________________________________
Train on 685713 samples, validate on 293878 samples
Epoch 1/2
 - 811s - loss: 0.1513 - accuracy: 0.9443 - va

<keras.callbacks.callbacks.History at 0x1e545b25d08>

In [32]:
pred_gru = model_gru.predict(test_matrix)
prediction_gru = roc_auc_score(y_test,pred_gru)
prediction_gru

0.9484868821660196

### IMPPORTING PRE-TRAINED EMBEDDING LAYER

In [34]:
embeddings_index = dict()
f = open(r"C:\Users\91876\Desktop\Poonam\Python_Project_3\glove.6B.200d.txt" , encoding="utf8")
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 400000 word vectors.


In [35]:
embedding_matrix = np.zeros((vocab_len + 1, 200))
for word, index in tok.word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[index] = embedding_vector

In [36]:
embedding_matrix.shape

(181604, 200)

### DEFINING MODEL ARCHITECTURE WITH EMBEDDING LAYER

In [37]:
model_lstm_emb = Sequential()
model_lstm_emb.add(Embedding(vocab_len+1, 200, input_length=max_len , weights=[embedding_matrix], trainable=False))
model_lstm_emb.add(Dropout(0.2))
model_lstm_emb.add(LSTM(100 , activation='relu'))
model_lstm_emb.add(Dropout(0.2))
model_lstm_emb.add(Dense(1, activation="sigmoid"))
model_lstm_emb.compile(loss="binary_crossentropy", optimizer="adam",metrics=['accuracy'])

model_lstm_emb.summary()

model_lstm_emb.fit(train_matrix,y_train.values , batch_size=1000 ,verbose=2, epochs = 2 , validation_split=0.3)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 29, 200)           36320800  
_________________________________________________________________
dropout_8 (Dropout)          (None, 29, 200)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 100)               120400    
_________________________________________________________________
dropout_9 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 101       
Total params: 36,441,301
Trainable params: 120,501
Non-trainable params: 36,320,800
_________________________________________________________________
Train on 685713 samples, validate on 293878 samples
Epoch 1/2
 - 462s - loss: 0.1599 - accuracy: 0.942

<keras.callbacks.callbacks.History at 0x1e55163ef08>

In [38]:
pred_lstm_emb= model_lstm_emb.predict(test_matrix)
prediction_lstm_emb = roc_auc_score(y_test,pred_lstm_emb)
prediction_lstm_emb

0.9437698911666703

In [39]:
model_cnn_lstm_emb = Sequential()
model_cnn_lstm_emb.add(Embedding(vocab_len+1, 200, input_length=max_len , weights=[embedding_matrix], trainable=False))
model_cnn_lstm_emb.add(Dropout(0.2))
model_cnn_lstm_emb.add(Conv1D(64,5,activation="relu"))
model_cnn_lstm_emb.add(MaxPooling1D(pool_size=4))
model_cnn_lstm_emb.add(Dropout(0.2))
model_cnn_lstm_emb.add(LSTM(100 , activation='relu'))
model_cnn_lstm_emb.add(Dropout(0.2))
model_cnn_lstm_emb.add(Dense(1, activation="sigmoid"))
model_cnn_lstm_emb.compile(loss="binary_crossentropy", optimizer="adam",metrics=['accuracy'])
model_cnn_lstm_emb.summary()
model_cnn_lstm_emb.fit(train_matrix,y_train.values , batch_size=1000 ,verbose=2, epochs = 2 , validation_split=0.3)

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, 29, 200)           36320800  
_________________________________________________________________
dropout_10 (Dropout)         (None, 29, 200)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 25, 64)            64064     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 6, 64)             0         
_________________________________________________________________
dropout_11 (Dropout)         (None, 6, 64)             0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 100)               66000     
_________________________________________________________________
dropout_12 (Dropout)         (None, 100)              

<keras.callbacks.callbacks.History at 0x1e558b65d48>

In [40]:
pred_model_cnn_lstm_emb = model_cnn_lstm_emb.predict(test_matrix)
prediction_model_cnn_lstm_emb = roc_auc_score(y_test,pred_model_cnn_lstm_emb )
prediction_model_cnn_lstm_emb

0.9270292115193229

In [41]:
model_gru_emb = Sequential()
model_gru_emb.add(Embedding(vocab_len+1, 200, input_length=max_len , weights=[embedding_matrix], trainable=False))
model_gru_emb.add(Dropout(0.2))
model_gru_emb.add(GRU(100 , activation='relu'))
model_gru_emb.add(Dropout(0.2))
model_gru_emb.add(Dense(1, activation="sigmoid"))
model_gru_emb.compile(loss="binary_crossentropy", optimizer="adam",metrics=['accuracy'])

model_gru_emb.summary()

model_gru_emb.fit(train_matrix,y_train.values , batch_size=1000 ,verbose=2, epochs = 2 , validation_split=0.3)

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, 29, 200)           36320800  
_________________________________________________________________
dropout_13 (Dropout)         (None, 29, 200)           0         
_________________________________________________________________
gru_2 (GRU)                  (None, 100)               90300     
_________________________________________________________________
dropout_14 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 101       
Total params: 36,411,201
Trainable params: 90,401
Non-trainable params: 36,320,800
_________________________________________________________________
Train on 685713 samples, validate on 293878 samples
Epoch 1/2
 - 316s - loss: 0.1580 - accuracy: 0.9422

<keras.callbacks.callbacks.History at 0x1e55905edc8>

In [42]:
pred_gru_emb = model_gru_emb.predict(test_matrix)
prediction_gru_emb = roc_auc_score(y_test,pred_gru_emb)
prediction_gru_emb

0.9447059996813079

#### MODEL PERFORMANCE

In [43]:
predictions={"MODELS":["LSTM","EMB_LSTM","CNN_LSTM","EMB_CNN_LSTM","GRU","EMB_GRU"], "AUC_SCORE":[prediction_lstm, prediction_lstm_emb,prediction_cnn_lstm,prediction_model_cnn_lstm_emb,prediction_gru,prediction_gru_emb]}
Predictions = pd.DataFrame(predictions)
Predictions

Unnamed: 0,MODELS,AUC_SCORE
0,LSTM,0.948737
1,EMB_LSTM,0.94377
2,CNN_LSTM,0.935757
3,EMB_CNN_LSTM,0.927029
4,GRU,0.948487
5,EMB_GRU,0.944706


###### EMBEDDING LAYER REDUCED THE TRAINING TIME WITH SIMILAR ACCURACY

### SAVING THE MODEL AND WEIGHT FILE

In [44]:
model_lstm_json = model_lstm.to_json()
with open ("model_lstm_json.json","w")as json_file:
    json_file.write(model_lstm_json)
model_lstm.save_weights("model_lstm.h5")

In [45]:
model_lstm_emb_json = model_lstm_emb.to_json()
with open ("model_lstm_emb_json.json","w")as json_file:
    json_file.write(model_lstm_emb_json)
model_lstm_emb.save_weights("model_lstm_emb.h5")

In [46]:
model_cnn_lstm_json = model_cnn_lstm.to_json()
with open ("model_cnn_lstm_json.json","w")as json_file:
    json_file.write(model_cnn_lstm_json)
model_cnn_lstm.save_weights("model_cnn_lstm.h5")

In [47]:
model_cnn_lstm_json = model_cnn_lstm.to_json()
with open ("model_cnn_lstm_json.json","w")as json_file:
    json_file.write(model_cnn_lstm_json)
model_cnn_lstm.save_weights("model_cnn_lstm.h5")

In [48]:
model_cnn_lstm_emb_json = model_cnn_lstm_emb.to_json()
with open ("model_cnn_lstm_emb_json.json","w")as json_file:
    json_file.write(model_cnn_lstm_emb_json)
model_cnn_lstm_emb.save_weights("model_cnn_lstm_emb.h5")

In [49]:
model_gru_json = model_gru.to_json()
with open ("model_gru_json.json","w")as json_file:
    json_file.write(model_gru_json)
model_gru.save_weights("model_gru.h5")

In [50]:
model_gru_emb_json = model_gru_emb.to_json()
with open ("model_gru_emb_json.json","w")as json_file:
    json_file.write(model_gru_emb_json)
model_gru_emb.save_weights("model_gru_emb.h5")
