## Classification of SMS data

### Import Packages

In [1]:
# Import required packages
import pandas as pd
import numpy as np
import re
from nltk import word_tokenize 
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier 
from sklearn.metrics import accuracy_score

### Load Data 

Load HDFC SMS data

In [2]:
df = pd.read_csv("./Sample SMS Data/sms_hdfc.csv")
df.shape

(137189, 6)

Check first few lines of the data

In [3]:
df.head()

Unnamed: 0,Rowid,Cluster,_id,smses_senderId,f0_,smses_smsText
0,1,1.0,5d802fa5f0771c951e7ef8af84b44d7a,DM-HDFCBK,19-09-2017,Balance in A/c XXXXXXXXXX6005 as of 18-SEP-17 ...
1,2,1.0,5d802fa5f0771c951e7ef8af84b44d7a,DM-HDFCBK,15-09-2017,"INR 24,000.00 Dr to A/c No XX6005 towards ECS ..."
2,3,1.0,e830ca63b5d2c92288b1875712b6d934,BZ-HDFCBK,12-09-2017,INR 125.56 deposited to A/C No XXXXXXXXXX5309 ...
3,4,3.0,fe8366890c0556b6f0e2fc49c15c8973,AM-HDFCBK,04-09-2017,"Dear Customer, Update your Aadhar number, Just..."
4,5,3.0,fe8366890c0556b6f0e2fc49c15c8973,AM-HDFCBK,04-09-2017,Excellent Job Opportunity with HDFC Bank for S...


Check if there are any missing values in any of the columns

In [4]:
df.isnull().sum()

Rowid               0
Cluster           751
_id                 0
smses_senderId      0
f0_                 0
smses_smsText       0
dtype: int64

There are missing values in Cluster. <br>
Let us explore Cluster

In [5]:
df.Cluster.value_counts()

3.0    62948
2.0    34313
1.0    23255
4.0    15922
Name: Cluster, dtype: int64

Check sample records for each Cluster

In [6]:
df[df['Cluster'] == 4].sample(10)['smses_smsText']

86150     OTP is 051200 for txn of INR 99.00 at FREECHAR...
114636    OTP is 732354 for txn of INR 50.00 at FREECHAR...
63528     Your OTP for IPIN regeneration is 166771. OTP ...
9383      OTP is 536119 for txn of INR 238.00 at KARNATA...
110462    OTP is 346865 for txn of INR 5000.00 at Wibmo ...
103249    Your OTP for Fund Transfer of Rs.  5,500.00 fr...
8238      Your OTP for IPIN regeneration is 257487. OTP ...
98376     OTP for your Online Purchase of Rs. 17.00 from...
42939     One time password is 846295 for card ending 18...
35171     OTP is 139680 for txn of INR 349.00 at airtel1...
Name: smses_smsText, dtype: object

In [7]:
df['smses_smsText'][98001]

'OTP is 742662 for txn of INR 50000.00 at DBSBANK on card ending 4471. Valid till 13:13:14. Do not share OTP for security reasons.'

In [8]:
df[df['Cluster'] == 3].sample(10)['smses_smsText']

124835    Dear Customer, Now shop with just a few clicks...
16734     Your Customer ID has been enabled for NetBanki...
28068     An amount of Rs.250.00 has been debited from y...
25111     Dear  Customer, Govt. had mandated linking of ...
64232     Your Bank a/c ****3405 will be debited for Rs....
86811     Give missed call on 18002703333 to check HDFC ...
133831    Dear Customer, Govt. mandates linking of Aadha...
136680    An amount of Rs.300.00 has been debited from y...
101639    An amount of Rs.4,000.00 has been debited from...
48448     Dear Customer, enjoy instant fund transfer! Ev...
Name: smses_smsText, dtype: object

In [9]:
df['smses_smsText'][90654]

'HDFC Bank Cr Crd \nending 0003 - Total Out. amt - Rs 21,279.69;Avl.Bal-43,720.31;RwdPnt-5562'

In [10]:
df[df['Cluster'] == 2].sample(10)['smses_smsText']

115510    Thank you for using Debit Card ending 8774 for...
54164     Thank you for using Debit Card ending 2249 for...
78139     Rs.10000.00 was withdrawn using your HDFC Bank...
11199     Rs.2000.00 was withdrawn using your HDFC Bank ...
130142    DEAR CARDMEMBER, PAYMENT OF Rs. 3143.55 RECEIV...
55464     Thank you for using Debit Card ending 8126 for...
96834     Rs.2000.00 was spent on ur HDFCBank CREDIT Car...
118281    Rs.5000.00 was withdrawn using your HDFC Bank ...
92164     Rs.5000.00 was withdrawn using your HDFC Bank ...
16665     Rs.276.00 was spent on ur HDFCBank CREDIT Card...
Name: smses_smsText, dtype: object

In [11]:
df['smses_smsText'][26758]

'Thank you for using Debit Card ending 4761 for Rs.176.00 in PUNE at CLOUDNINE. on 2017-08-30:14:21:51  Avl bal: Rs.42634.37'

In [12]:
df[df['Cluster'] == 1].sample(10)['smses_smsText']

59164     Balance in A/c XXXXXXXXXX4402 as of 25-AUG-17 ...
82681     INR 3,00,000.00 deposited to A/c No XX9049 tow...
18576     INR 5,000.00 deposited to A/c No XX8078 toward...
124652    INR 1,99,999.00 deposited to A/c No XX6562 tow...
68124     INR 1,99,600.00 Dr to A/c No XX5339 towards IM...
124240    Balance in A/c XXXXXXXXXX1336 as of 28-AUG-17 ...
54140     Balance in A/c XXXXXXXXXX5444 as of 21-AUG-17 ...
75823     INR 10,000.00 Dr to A/c No XX8116 towards KISA...
1961      Bal in A/c No XX3287 has gone below your speci...
104248    INR 18,815.74 deposited to A/c No XX8587 towar...
Name: smses_smsText, dtype: object

In [13]:
df['smses_smsText'][29314]

'INR 5,000.00 Dr to A/c No XX8250 towards 50400096559079- RD Installment-Sep 2017 Val 04-SEP-17. Clr Bal INR 53,825.00.'

In [14]:
df[df['Cluster'].isnull()].sample(10)['smses_smsText']

61952     SHG DIRECT LINKAGE of NAGAVENI  V, LOS No. 505...
128801    We are glad to extend 5% Cashback(maxRs.150 p....
62048     SHG DIRECT LINKAGE of SURYA KUMARI R, LOS No. ...
98138                      Your verification code is 169784
99464     प्रिय ग्राहक, हमे आपके मौजूदा एचडीएफसी बैंक टू...
122106    પ્રિય ગ્રાહક, તમારી મોજૂદ એચડીએફસી બેન્ક ટુ વ્...
68772     प्रिय ग्राहक, अपने बिजली व मोबाइल बिलों, बिमा ...
79620                                          5810773416).
119965                     Your verification code is 064214
62513     प्रिय ग्राहक, अपने बिजली व मोबाइल बिलों, बिमा ...
Name: smses_smsText, dtype: object

Inference: Cluster  <br> 1 - Account,<br>  2 - creditcard/debitcard transactions, <br> 3 - creditcard/debitcard/others,<br>  4 - Others

### Data Preprocessing

In [15]:
stopwords_set = set(stopwords.words('english'))

def preprocess(sent):
    sent = sent.lower() # Convert into lower case
    sent = re.sub(" a/c ", " account ", sent) # substitute common abbreviation with correct words
    sent = re.sub("\'ve", " have ", sent)
    sent = re.sub("can't", "can not", sent)
    sent = re.sub("n't", " not ", sent)
    sent = re.sub("i'm", "i am", sent)
    sent = re.sub("\'re", " are ", sent)
    sent = re.sub("\'d", " would ", sent)
    sent = re.sub("\'ll", " will ", sent)
    sent = re.sub('[^A-Za-z]+', ' ', sent) # Remove all digits
    sent = sent.replace('x', '')
    sent = re.sub("inr "," ",sent)
    sent = re.sub("bal "," ",sent)
    sent = re.sub("clr "," ",sent)
    sent = re.sub("hdfc "," ",sent)
    sent = re.sub("jan "," ",
                  re.sub("feb "," ",
                  re.sub("mar "," ",
                  re.sub("apr "," ",
                  re.sub("may "," ", 
                  re.sub("jun "," ", 
                  re.sub("jul "," ", 
                  re.sub("aug "," ", 
                  re.sub("sep "," ", 
                  re.sub("oct "," ", 
                  re.sub("nov "," ", 
                  re.sub("dec "," ", sent))))))))))))
    sent = ' '.join(word for word in sent.split() if word not in stopwords_set) # remove stopwords from sent
    return sent

In [16]:
df['processed_sms'] = df['smses_smsText'].apply(preprocess)
df.head()

Unnamed: 0,Rowid,Cluster,_id,smses_senderId,f0_,smses_smsText,processed_sms
0,1,1.0,5d802fa5f0771c951e7ef8af84b44d7a,DM-HDFCBK,19-09-2017,Balance in A/c XXXXXXXXXX6005 as of 18-SEP-17 ...,balance account eod check account current bala...
1,2,1.0,5d802fa5f0771c951e7ef8af84b44d7a,DM-HDFCBK,15-09-2017,"INR 24,000.00 Dr to A/c No XX6005 towards ECS ...",dr account towards ecs tp fin homes ltd val
2,3,1.0,e830ca63b5d2c92288b1875712b6d934,BZ-HDFCBK,12-09-2017,INR 125.56 deposited to A/C No XXXXXXXXXX5309 ...,deposited account towards dbl subsidy val
3,4,3.0,fe8366890c0556b6f0e2fc49c15c8973,AM-HDFCBK,04-09-2017,"Dear Customer, Update your Aadhar number, Just...",dear customer update aadhar number sms aadhaar...
4,5,3.0,fe8366890c0556b6f0e2fc49c15c8973,AM-HDFCBK,04-09-2017,Excellent Job Opportunity with HDFC Bank for S...,ecellent job opportunity bank sales officer ag...


Bi-grams

In [17]:
#Creating N-Grams
def ngram(text,grams):  
    model=[]
    count=0
    for token in text[:len(text)-grams+1]:  
        model.append(text[count:count+grams])  
        count=count+1  
    return model

In [18]:
#Created Bigrams
df['bigrams'] = df["processed_sms"].apply(lambda x: ngram(x.split(" "), 2))
df['bigrams'][61092]

[['ur', 'transaction'],
 ['transaction', 'bank'],
 ['bank', 'credit'],
 ['credit', 'card'],
 ['card', 'ending'],
 ['ending', 'rs'],
 ['rs', 'credited'],
 ['credited', 'reversed'],
 ['reversed', 'amazon']]

### Deriving Target variable  

From processed data, identify high frequency words for each cluster 

In [19]:
# Others cluster
freq_5 = pd.Series(' '.join(df[df['Cluster'].isnull()]["processed_sms"]).split()).value_counts()
freq_5[:2]

verification    341
code            338
dtype: int64

In [20]:
# Others cluster
freq_4 = pd.Series(' '.join(df[df['Cluster'] == 4]["processed_sms"]).split()).value_counts()
freq_4[:2]

otp      32628
share    15716
dtype: int64

In [21]:
# Others/Debit/Credit
freq_3 = pd.Series(' '.join(df[df['Cluster'] == 3]["processed_sms"]).split()).value_counts()
freq_3[:2]

account    43992
bank       39662
dtype: int64

In [22]:
# Debit/Credit
freq_2 = pd.Series(' '.join(df[df['Cluster'] == 2]["processed_sms"]).split()).value_counts()
freq_2[:2]

rs      65136
card    36493
dtype: int64

In [23]:
# Account
freq_1 = pd.Series(' '.join(df[df['Cluster'] == 1]["processed_sms"]).split()).value_counts()
freq_1[:2]

account    42963
balance    17851
dtype: int64

From each of the identified high frequency words, consider only most repeated words 

In [24]:
freq_1 = freq_1[freq_1 > 100]
freq_2 = freq_2[freq_2 > 500]
freq_3 = freq_3[freq_3 > 1000]
freq_4 = freq_4[freq_4 > 100]
freq_5 = freq_5[freq_5 > 10]

From each cluster of repeated words, identify words which belong to only that particular cluster. <br>
For ex: 'account' may be part of both Cluster 1 and 2. Remove these kind of words from both the clusters and identify only unique words

In [25]:
others_1 = list(set(freq_5.index) - set(list(freq_1.index) + (list(freq_2.index)) + (list(freq_3.index)) + (list(freq_4.index))))
others_2 = list(set(freq_4.index) - set(list(freq_1.index) + (list(freq_2.index)) + (list(freq_3.index)) + (list(freq_5.index))))
drill = list(set(freq_3.index) - set(list(freq_1.index) + (list(freq_2.index)) + (list(freq_5.index)) + (list(freq_4.index))))
cr_db = list(set(freq_2.index) - set(list(freq_1.index) + (list(freq_3.index)) + (list(freq_5.index)) + (list(freq_4.index))))
account = list(set(freq_1.index) - set(list(freq_3.index) + (list(freq_2.index)) + (list(freq_5.index)) + (list(freq_4.index))))

In [26]:
drill.remove("debited")
drill.remove("creditcard")

other_words = others_1 + others_2 + drill
credit = ['credit card','cr card', 'crcard','creditcard','credit']
debit = list(set(cr_db) - set(credit))

In [27]:
len(account)

52

Create target column from the identified words

In [28]:
other_words_re = re.compile("|".join(other_words))
account_words_re = re.compile("|".join(account))
credit_re = re.compile("|".join(credit))
debit_re = re.compile("|".join(debit))

def other(sent):
    if other_words_re.search(sent):
        return "others"
    return np.nan
    
def cr_dr(sent):
    if credit_re.search(sent):
        return "credit"
    
    if debit_re.search(sent):
        return "debit"
    return np.nan
    
def account(sent):
    if account_words_re.search(sent):
        return "account"
    return np.nan

In [29]:
df['target'] = np.where((df['Cluster'] == 4) | (df['Cluster'].isnull()) | (df['Cluster'] == 3), df['processed_sms'].apply(other),
                        np.where(df['Cluster'] == 2, df['processed_sms'].apply(cr_dr), df['processed_sms'].apply(account)))
df['target'] = df['target'].fillna('others')

In [30]:
df.target.value_counts()

others     79767
account    23255
credit     17558
debit      16609
Name: target, dtype: int64

In [31]:
pd.crosstab(df['target'],df['Cluster'], dropna=False, margins=True)

Cluster,1.0,2.0,3.0,4.0,All
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
account,23255,0,0,0,23255
credit,0,17558,0,0,17558
debit,0,16609,0,0,16609
others,0,146,62948,15922,79767
All,23255,34313,62948,15922,137189


Get data ready for model building

In [32]:
df.drop(['Rowid', 'Cluster', '_id', 'smses_senderId', 'f0_', 'smses_smsText'], axis=1, inplace=True)
df.head()

Unnamed: 0,processed_sms,bigrams,target
0,balance account eod check account current bala...,"[[balance, account], [account, eod], [eod, che...",account
1,dr account towards ecs tp fin homes ltd val,"[[dr, account], [account, towards], [towards, ...",account
2,deposited account towards dbl subsidy val,"[[deposited, account], [account, towards], [to...",account
3,dear customer update aadhar number sms aadhaar...,"[[dear, customer], [customer, update], [update...",others
4,ecellent job opportunity bank sales officer ag...,"[[ecellent, job], [job, opportunity], [opportu...",others


### Model building

##### We shall build 3 models - bow, bigram, tfidf

In [33]:
def bow_cv(data):
    vectorizer = CountVectorizer()
    vectorizer = vectorizer.fit(data)
    return vectorizer

def bigram_process(data):
    vectorizer = CountVectorizer(ngram_range=(1,2))
    vectorizer = vectorizer.fit(data)
    return vectorizer

def tfidf_process(data):
    transformer = TfidfTransformer()
    transformer = transformer.fit(data)
    return transformer

In [34]:
X = df['processed_sms']
y = df['target']

# Bag of words approach
bow = bow_cv(X)
X_bow = bow.transform(X)

# Bigram approach
bigram = bigram_process(X)
X_bigram = bigram.transform(X)

# TF-IDF approach
tfidf = tfidf_process(X_bow)
X_tfidf = tfidf.transform(X_bow)

Divide data into train and test

In [35]:
print("Bag of words:")
train_x_bow, test_x_bow, train_y_bow, test_y_bow = train_test_split(X_bow,y, test_size=0.25, random_state=10)
print(train_x_bow.shape, test_x_bow.shape, train_y_bow.shape, test_y_bow.shape)

print("\nBigrams:")
train_x_bigram, test_x_bigram, train_y_bigram, test_y_bigram = train_test_split(X_bigram,y, test_size=0.25, random_state=10)
print(train_x_bigram.shape, test_x_bigram.shape, train_y_bigram.shape, test_y_bigram.shape)

print("\nTFIDF:")
train_x_tfidf, test_x_tfidf, train_y_tfidf, test_y_tfidf = train_test_split(X_tfidf,y, test_size=0.25, random_state=10)
print(train_x_tfidf.shape, test_x_tfidf.shape, train_y_tfidf.shape, test_y_tfidf.shape)

Bag of words:
(102891, 20133) (34298, 20133) (102891,) (34298,)

Bigrams:
(102891, 86887) (34298, 86887) (102891,) (34298,)

TFIDF:
(102891, 20133) (34298, 20133) (102891,) (34298,)


### SGD Classifier models for all three approaches

In [36]:
print("Training BOW model")
clf_bow = SGDClassifier(loss="hinge", penalty="l1", n_iter=20)
clf_bow.fit(train_x_bow, train_y_bow)

print("Training bigrams model")
clf_bigram = SGDClassifier(loss="hinge", penalty="l1", n_iter=20)
clf_bigram.fit(train_x_bigram, train_y_bigram)

print("Training TFIDF model")
clf_tfidf = SGDClassifier(loss="hinge", penalty="l1", n_iter=20)
clf_tfidf.fit(train_x_tfidf, train_y_tfidf)

Training BOW model




Training bigrams model




Training TFIDF model




SGDClassifier(alpha=0.0001, average=False, class_weight=None,
       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
       l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=None,
       n_iter=20, n_iter_no_change=5, n_jobs=None, penalty='l1',
       power_t=0.5, random_state=None, shuffle=True, tol=None,
       validation_fraction=0.1, verbose=0, warm_start=False)

### Model Validation

In [37]:
train_preds_bow = clf_bow.predict(train_x_bow)
test_preds_bow = clf_bow.predict(test_x_bow)
print("BOW train accuracy: ", accuracy_score(train_y_bow, train_preds_bow))
print("BOW test accuracy: ", accuracy_score(test_y_bow, test_preds_bow))

train_preds_bigram = clf_bigram.predict(train_x_bigram)
test_preds_bigram = clf_bigram.predict(test_x_bigram)
print("\nBigram train accuracy: ", accuracy_score(train_y_bigram, train_preds_bigram))
print("Bigram test accuracy: ", accuracy_score(test_y_bigram, test_preds_bigram))

train_preds_tfidf = clf_tfidf.predict(train_x_tfidf)
test_preds_tfidf = clf_tfidf.predict(test_x_tfidf)
print("\nTFIDF train accuracy: ", accuracy_score(train_y_tfidf, train_preds_tfidf))
print("TFIDF test accuracy: ", accuracy_score(test_y_tfidf, test_preds_tfidf))

BOW train accuracy:  0.9983477660825534
BOW test accuracy:  0.9979590646684938

Bigram train accuracy:  0.9986296177508237
Bigram test accuracy:  0.9979882208875153

TFIDF train accuracy:  0.9973661447551292
TFIDF test accuracy:  0.9970552218788268


### Test data set

In [38]:
df_test = pd.read_csv("./Sample SMS Data/sms_kotakb.csv")
df_test.head()

Unnamed: 0,_id,smses_senderId,f0_,smses_smsText
0,0db8e1c4dda063fb70ce414128bfb028,DM-KOTAKB,2017-08-31,"Dear Valued Customer, you recently used a Kota..."
1,0db8e1c4dda063fb70ce414128bfb028,DM-KOTAKB,2017-08-30,"Dear Valued Customer, you recently used a Kota..."
2,0db8e1c4dda063fb70ce414128bfb028,TX-KOTAKB,2017-08-30,INR 17500.00 is debited from your A/c XXXX9823...
3,0db8e1c4dda063fb70ce414128bfb028,TX-KOTAKB,2017-08-29,INR 22000.00 is credited to your A/c XXXX9823 ...
4,4efdf9fc5a26dd28d50f6e214522d044,DM-KOTAKB,2017-08-29,"Dear Customer, your welcome kit sent to your c..."


In [39]:
df_test['processed_sms'] = df_test['smses_smsText'].apply(preprocess)

In [40]:
df_test_bow = bow.transform(df_test['processed_sms'])
X_bigram = bigram.transform(df_test['processed_sms'])
X_tfidf = tfidf.transform(df_test_bow)

In [41]:
df_test['preds_bow'] = clf_bow.predict(df_test_bow)
df_test['preds_bigram'] = clf_bigram.predict(X_bigram)
df_test['preds_tfidf'] = clf_tfidf.predict(X_tfidf)

In [42]:
df_test['preds_tfidf'].value_counts()

others    41756
credit     2627
debit      1624
Name: preds_tfidf, dtype: int64

In [43]:
df_test['preds_bow'].value_counts()

others     39710
debit       3870
credit      2423
account        4
Name: preds_bow, dtype: int64

In [44]:
df_test['preds_bigram'].value_counts()

others     43208
credit      2508
debit        235
account       56
Name: preds_bigram, dtype: int64

## LSTM

In [46]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.layers import Dropout

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [45]:
df.head()

Unnamed: 0,processed_sms,bigrams,target
0,balance account eod check account current bala...,"[[balance, account], [account, eod], [eod, che...",account
1,dr account towards ecs tp fin homes ltd val,"[[dr, account], [account, towards], [towards, ...",account
2,deposited account towards dbl subsidy val,"[[deposited, account], [account, towards], [to...",account
3,dear customer update aadhar number sms aadhaar...,"[[dear, customer], [customer, update], [update...",others
4,ecellent job opportunity bank sales officer ag...,"[[ecellent, job], [job, opportunity], [opportu...",others


In [47]:
MAX_NB_WORDS = 50000
MAX_SEQUENCE_LENGTH = 100
EMBEDDING_DIM = 100

tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(df['processed_sms'].values)

In [49]:
X = tokenizer.texts_to_sequences(df['processed_sms'].values)
X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH)
Y = pd.get_dummies(df['target']).values
X.shape, Y.shape

((137189, 100), (137189, 4))

In [55]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.5, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

(68594, 100) (68594, 4)
(68595, 100) (68595, 4)


In [56]:
model = Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(4, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 100, 100)          5000000   
_________________________________________________________________
spatial_dropout1d_3 (Spatial (None, 100, 100)          0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 404       
Total params: 5,080,804
Trainable params: 5,080,804
Non-trainable params: 0
_________________________________________________________________
None


In [58]:
epochs = 1
batch_size = 32

history = model.fit(X_train, Y_train, 
                    epochs=epochs, 
                    batch_size=batch_size,
                    validation_split=0.1,
                    callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)])

Train on 61734 samples, validate on 6860 samples
Epoch 1/1


In [59]:
accr = model.evaluate(X_test,Y_test)
print('Test set\n  Loss: {:0.3f}\n  Accuracy: {:0.3f}'.format(accr[0],accr[1]))

Test set
  Loss: 0.005
  Accuracy: 0.999
