## APPROACH:
### 1. Loading custom dataset
### 2. NLP pipeline for text preprocessing
### 3. Loading Glove embedding matrix
### 4. Converting sentences to vectors (creating the o/p of embedding layer)
### 5. Applying ML algorithms like SVM, Random Forest, Bernoulli Naive Bayes
### 6. Data visualization and comparison

In [1]:
import re
import numpy as np 
import pandas as pd
# import seaborn as sns
# sns.set_style('whitegrid')
import matplotlib.pyplot as plt
%matplotlib inline

# NLP
from nltk.tokenize.regexp import RegexpTokenizer
#from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer

# import gensim
import spacy
import en_core_web_sm
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Flatten
from keras.layers import Embedding
from keras.layers import Dense, Bidirectional, LSTM, Dropout, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_recall_curve

Using TensorFlow backend.


## Step 1: Load dataset

In [2]:
# from google.colab import files
# uploaded = files.upload()

In [3]:
# emails_df = pd.read_csv('./emails.csv', nrows=20000)
df = pd.read_csv('../emaildataset.csv')
print(df.shape)
df.head()

(951, 7)


Unnamed: 0,From,To,Subject,Body,Class,TransactionID,Payment Date
0,Spike@DEUTSCHEBANK.com,Shaniece@CitiBankPune.com,Transaction no. 072558 is unresolved.,Sorry to inform that there has been only a par...,Pending,72558,04-02-2020
1,Stacy@HDFC.com,Rohan@CitiBankNewYork.com,Order for new Cheque book,"Good morning, I want to place an order for an ...",General,212096,29-05-2020
2,Zachary@HDFC.com,Rishabh@CitiBankPune.com,Required money acquired. Transaction 847047 is...,Hello! This is to inform you that I have recei...,Processing,847047,26-01-2020
3,Stacy@SBI.com,Shai@CitiBankHongKong.com,Asking for the details for transaction 746078,I request you to kindly send the status of my ...,Request,746078,17-06-2019
4,Angela@HDFC.com,Dipesh@CitiBankSingapore.com,Partial payment for transaction 535918,Hello!! Greetings for the day. Status of trans...,Pending,535918,18-02-2020


In [4]:
df.Class.unique()

array(['Pending', 'General', 'Processing', 'Request', 'Complete',
       'Failed'], dtype=object)

## Step 2: NLP Pipeline for text preprocessing

In [5]:
nlp_ = en_core_web_sm.load()

In [6]:
nlp = spacy.load('en')

In [7]:
def clean(text):
    
    text=text.rstrip()
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    text = " ".join([i for i in text.lower().split()])
    print(text)
    
    customize_stop_words = ["cc","subject","http", "gbp", "usd", "eur", "inr", "cad","thanks", "acc", "id", "account", "regards", "hi", "hello", "thank you", "greetings"]
    rem_stop = ["not", "wasn't", "hadn't", "won't", "can't", "didn't"]
    
    for w in customize_stop_words:
        nlp.vocab[w].is_stop = True
    
    for w in rem_stop:
        nlp.vocab[w].is_stop = False
    
    doc = nlp(text)
    
    normalized = " ".join(token.lemma_ for token in doc if not token.is_stop)
    
    doc = " ".join(token.orth_ for token in nlp(normalized) if not token.is_punct | token.is_space)

#     exclude = set(string.punctuation) 
#     lemma = WordNetLemmatizer()
#     porter= PorterStemmer()

    # AMOUNT EXTRACTION  NOT NEEDED
    #amount = "".join([i for i in text.lower().split() if i.isdigit()])
    
#     stop_free = " ".join([i for i in text.lower().split() if((i not in stop) and (not i.isdigit()))])
#     punc_free = ''.join(ch for ch in stop_free if ch not in exclude)
#     doc = lemma(punc_free)
    
#     normalized = " ".join(lemma(word) for token in punc_free.split())
#     stem = " ".join(porter.stem(token) for token in normalized.split())
#     return normalized, amount

#     return normalized
    return doc

In [8]:
print(clean("payment in id 1234 for amount 14859 GBP has been freezed"))

payment in id for amount gbp has been freezed
payment would freeze


In [9]:
for i in range(df.shape[0]):
    # merge subject and body strings
    df['Text_Data'] = (df['Subject'] + " " + df['Body'])

In [10]:
def converter(x):
    try:
        return ' '.join([x.lower() for x in str(x).split()])
    except AttributeError:
        return None  # or some other value

df['Text_Data'] = df['Text_Data'].apply(converter)

In [11]:
df.head()

Unnamed: 0,From,To,Subject,Body,Class,TransactionID,Payment Date,Text_Data
0,Spike@DEUTSCHEBANK.com,Shaniece@CitiBankPune.com,Transaction no. 072558 is unresolved.,Sorry to inform that there has been only a par...,Pending,72558,04-02-2020,transaction no. 072558 is unresolved. sorry to...
1,Stacy@HDFC.com,Rohan@CitiBankNewYork.com,Order for new Cheque book,"Good morning, I want to place an order for an ...",General,212096,29-05-2020,"order for new cheque book good morning, i want..."
2,Zachary@HDFC.com,Rishabh@CitiBankPune.com,Required money acquired. Transaction 847047 is...,Hello! This is to inform you that I have recei...,Processing,847047,26-01-2020,required money acquired. transaction 847047 is...
3,Stacy@SBI.com,Shai@CitiBankHongKong.com,Asking for the details for transaction 746078,I request you to kindly send the status of my ...,Request,746078,17-06-2019,asking for the details for transaction 746078 ...
4,Angela@HDFC.com,Dipesh@CitiBankSingapore.com,Partial payment for transaction 535918,Hello!! Greetings for the day. Status of trans...,Pending,535918,18-02-2020,partial payment for transaction 535918 hello!!...


In [12]:
# sub_df["content"]=sub_df["content"].map(clean)
for i in range(df.shape[0]):
    df.loc[i]['Text_Data'] = clean(df.loc[i]['Text_Data'])

transaction no is unresolved sorry to inform that there has been only a partial payment of amount gbp you are definitely going to receive the rest at a later point of time warm regards
order for new cheque book good morning i want to place an order for an multicity cheque book for account no having atleat leaves kindly send the same to the address mentioned in my account records
required money acquired transaction is in process hello this is to inform you that i have received the amount you transferred to my account and now it is currently in process
asking for the details for transaction i request you to kindly send the status of my transaction with id thanks and regards
partial payment for transaction hello greetings for the day status of transaction usd for account is pending i would be grateful if you could tell me the cause thanks a lot


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


payment done and transaction settled greetings i wanted to let you know that i have acknowledged the payment for transaction in response to your email which confirmed the finalized status of my transaction thanks and regards
failure of transaction this is to notify you that my transaction has failed please reply to me with the cause as soon as possible
send steps to activate online banking hey i have to transfer funds to a different bank account urgently and now due to lockdown i only have the option to do online banking i would appreciate if you send me the instructions for the same thanks in advance
incomplete transaction i regret to inform you that i could only pay the partial amount of eur i will definitely pay the remaining amount as soon as possible thank you for you supreme patience and understanding regards
failure of transaction hey i see my transaction with id has failed i think i did everything right please look into this and reply with the reason urgently thank you and rega

fulfilled transaction having id to whom it may concern i have successfully received payment for the transaction i am grateful for your cooperation thank you so much and regards
incomplete transaction the transaction is taking too long to complete i would request you to kindly guide me through the further steps to be taken in order to complete the transaction
mobile banking steps i would like to know the instructions to be followed to install and setup mobile banking please reply to me with the same thanks in advance
send me transactional details for kindly reply to me at the earliest with the last transactions made with the account no thanks in advance
asking for the details for transaction i request you to kindly send the status of my transaction with id thanks and regards
payment received for transaction and now processing sincere greetings i am glad to tell you that i have finally been granted the pre approved amount through transaction it is now being processed i am grateful for yo

how to create a bank account with citi i have heard a lot about your bank and would like to open an account with citibank for that i would like to know all the documents that are required kindly reply with the same and also the further steps to be taken eagerly waiting for your reply
add one more contact no to account add to my account thanks
transaction has failed this is in response to your email stating that my transaction having id has failed but no reason was mentioned can you please tell me what did i do wrong so that i can create a new one without any errors waiting for your reply thank you in advance
pending payment for transaction having id hello greetings for the day i see that transaction gbp for account is pending i would be grateful if you could tell me the cause thanks a lot
payment done and transaction settled hey i am writing in reference to the transaction i was granted the aforementioned amount within the deadline sincere gratitude for such a quick response kind regar

concluded the transaction greetings i wanted to let you know that i have acknowledged the payment for transaction in response to your email which confirmed the finalized status of my transaction thanks and regards
want to block account hey can you please block my account i no longer want to avail your services
soliciting information for id hey i would be really grateful if you could tell me the details of account no thanks
received full payment for transaction no hello sincere greetings for the day i would like to inform you that my transaction has completed thank you so much for your support looking forward to working more with you in the future regards
urgent transaction ceased i have been your regular client and have followed the procedure for creating a transaction correctly still i received an email saying that the transaction has failed i would like to know the reasoning my transaction id is regards
request to send details of transaction i need details of urgently please provide 

transaction is now being processed this is in response to your email notifying about the transferred amount i am acknowledging it and informing you that it is now in process
partial payment for transaction since my transaction is still pending i wanted to know if there is a problem with the paperwork from my side please let me know at the earliest thanks and regards
soliciting information for id kindly reply to me at the earliest with the last transactions made with the account no thanks in advance
the pending amount for transaction will reach you soon i regret to inform you the i could only pay the partial amount of gbp i will definitely pay the remaining amount as soon as possible thank you for understanding regards
handling the transaction after payment this is in response to your email notifying about the transferred amount i am acknowledging it and informing you that it is now in process
incomplete transaction hey sincere apologies for transferring a fraction of the amount we agre

send steps to activate online banking this is to inform you that i am not able to withdraw money from my account please help
finalized transaction of id glad to let you know that i got the desired payment in reference to transaction i am very happy with your services and will definitely recommend your company to my friends and acquaintances warm regards
dealing with the transaction hello this is to inform you that i have received the amount you transferred to my account and now it is currently in process
payment done and transaction settled greetings i wanted to let you know that i have acknowledged the payment for transaction in response to your email which confirmed the finalized status of my transaction thanks and regards
urgently required update on transaction i want to know status of urgently please provide the same on priority basis
transaction having id has stopped help hey i see my transaction with id has failed i think i did everything right please look into this and reply wit

impetrating details for id i request you to kindly send the status of my transaction with id thanks and regards
add one more contact no to account i have heard a lot about your bank and would like to open an account with citibank for that i would like to know all the documents that are required kindly reply with the same and also the further steps to be taken eagerly waiting for your reply
handling the transaction after payment thank you for transferring the payment to my account yesterday i appreciate the quick response i would like to inform you that is is currently in process thanks and regards
accepted payment transaction currently processing hello this is to inform you that i have received the amount you transferred to my account and now it is currently in process
transaction is complete i deeply appreciate your quick service as i have received the pre approved loan amount of inr
why has my transaction stopped i have been your regular client and have followed the procedure for cre

transaction is now being processed this is in response to your email notifying about the transferred amount i am acknowledging it and informing you that it is now in process
partially paid the required amount for transaction since my transaction is still pending i wanted to know if there is a problem with the paperwork from my side please let me know at the earliest thanks and regards
asking for the details for transaction kindly reply to me at the earliest with the last transactions made with the account no thanks in advance
transaction stalled and payment not received greetings for the day i checked my inbox and found your email stating the failure of my transaction please help me understand why has failed need funds urgently patiently waiting for you reply thanks a lot
incomplete transaction i regret to inform you the i could only pay the partial amount of usd i will definitely pay the remaining amount as soon as possible thank you for understanding regards
pending payment for trans

sorted out the transaction with id greetings i wanted to let you know that i have acknowledged the payment for transaction in response to your email which confirmed the finalized status of my transaction thanks and regards
payment received for transaction and now processing acknowledging the received payment for transaction it is finally in process kind regards
processing transaction having id hello this is to inform you that i have received the amount you transferred to my account and now it is currently in process
why has my transaction stopped greetings for the day i checked my inbox and found your email stating the failure of my transaction please help me understand why has failed need funds urgently patiently waiting for you reply thanks a lot
seeking update on the status of transaction i request you to kindly send the status of my transaction with id thanks and regards
transaction stalled and payment not received greetings for the day i checked my inbox and found your email stati

sorted out the transaction with id hey i am writing in reference to the transaction i was granted the aforementioned amount within the deadline sincere gratitude for such a quick response kind regards
processing transaction having id thank you for transferring the payment to my account yesterday i appreciate the quick response i would like to inform you that is is currently in process thanks and regards
processing transaction having id thank you for transferring the payment to my account yesterday i appreciate the quick response i would like to inform you that is is currently in process thanks and regards
handling the transaction after payment sincere greetings i am glad to tell you that i have finally been granted the pre approved amount through transaction it is now being processed i am grateful for your support warm regards
handling the transaction after payment thank you for transferring the payment to my account yesterday i appreciate the quick response i would like to inform you 

seeking update on the status of transaction please provide a status update on at the earliest
failure of transaction i have been your regular client and have followed the procedure for creating a transaction correctly still i received an email saying that the transaction has failed i would like to know the reasoning my transaction id is regards
processing transaction having id hello this is to inform you that i have received the amount you transferred to my account and now it is currently in process
seeking update on the status of transaction urgently require details of acc reply asap
failure of transaction hey i see my transaction with id has failed i think i did everything right please look into this and reply with the reason urgently thank you and regards
processing transaction having id acknowledging the received payment for transaction it is finally in process kind regards
how to change pin no of atm card hey i think i lost my atm card today so can you please block my account or t

accepted payment transaction currently processing thank you for transferring the payment to my account yesterday i appreciate the quick response i would like to inform you that is is currently in process thanks and regards
payment outstanding for transaction i regret to inform you the i could only pay the partial amount of inr i will definitely pay the remaining amount as soon as possible thank you for understanding regards
add one more contact no to account i need a new cheque book of leaves kindly send the same to my current address which is present in the details of my account
failure of transaction hey i see my transaction with id has failed i think i did everything right please look into this and reply with the reason urgently thank you and regards
payment done and transaction settled greetings i wanted to let you know that i have acknowledged the payment for transaction in response to your email which confirmed the finalized status of my transaction thanks and regards
transaction

why has my transaction stopped this is in response to your email stating that my transaction having id has failed but no reason was mentioned can you please tell me what did i do wrong so that i can create a new one without any errors waiting for your reply thank you in advance
failure of transaction greetings for the day i checked my inbox and found your email stating the failure of my transaction please help me understand why has failed need funds urgently patiently waiting for you reply thanks a lot
urgent transaction ceased this is in response to your email stating that my transaction having id has failed but no reason was mentioned can you please tell me what did i do wrong so that i can create a new one without any errors waiting for your reply thank you in advance
change address for account no kindly add the contact no to my bank account
fulfilled transaction having id to whom it may concern i have successfully received payment for the transaction i am grateful for your cooperat

transaction is now being processed acknowledging the received payment for transaction it is finally in process kind regards
abrupt closure of transaction with id i have been your regular client and have followed the procedure for creating a transaction correctly still i received an email saying that the transaction has failed i would like to know the reasoning my transaction id is regards
payment done and transaction settled to whom it may concern i have successfully received payment for the transaction i am grateful for your cooperation thank you so much and regards
fulfilled transaction having id glad to let you know that i got the desired payment in reference to transaction i am very happy with your services and will definitely recommend your company to my friends and acquaintances warm regards
received full payment for transaction no hello sincere greetings for the day i would like to inform you that my transaction has completed thank you so much for your support looking forward to

processing transaction having id thank you for transferring the payment to my account yesterday i appreciate the quick response i would like to inform you that is is currently in process thanks and regards
failure of transaction greetings for the day i checked my inbox and found your email stating the failure of my transaction please help me understand why has failed need funds urgently patiently waiting for you reply thanks a lot
transaction no is unresolved there has been only a partial payment of amount gbp assuring you that the rest will be paid later warm regards
new cheque book this is to inform you that i am not able to withdraw money from my account please help
request to send details of transaction i need details of urgently please provide the same on priority basis
asking for the details for transaction can you please tell me the amount transferred through transaction id thanks
completed transaction no glad to let you know that i got the desired payment in reference to transa

payment outstanding for transaction hey sincere apologies for transferring a fraction of the amount we agreed on please be assured that the rest is being transferred as of now thanks a lot for understanding
asking for the details for transaction hey i would be really grateful if you could tell me the details of account no thanks
remaining amount for transaction to be paid later hello greetings for the day status of transaction inr for account is pending i would be grateful if you could tell me the cause thanks a lot
the pending amount for transaction will reach you soon hello greetings for the day status of transaction eur for account is pending i would be grateful if you could tell me the cause thanks a lot
accepted payment transaction currently processing thank you for transferring the payment to my account yesterday i appreciate the quick response i would like to inform you that is is currently in process thanks and regards
incomplete transaction the transaction is taking too long t

transaction is complete hey i am writing in reference to the transaction i was granted the aforementioned amount within the deadline sincere gratitude for such a quick response kind regards
urgent transaction ceased this is to notify you that my transaction has failed please reply to me with the cause as soon as possible
seeking update on the status of transaction urgently require details of acc reply asap
want to block account i would like to know the instructions to be followed to install and setup mobile banking please reply to me with the same thanks in advance
upgrade to an account with more benefits i need a new cheque book of leaves kindly send the same to my current address which is present in the details of my account
partially paid the required amount for transaction since my transaction is still pending i wanted to know if there is a problem with the paperwork from my side please let me know at the earliest thanks and regards
required money acquired transaction is in process

want to block account i would like to know the instructions to be followed to install and setup mobile banking please reply to me with the same thanks in advance
the pending amount for transaction will reach you soon i regret to inform you the i could only pay the partial amount of eur i will definitely pay the remaining amount as soon as possible thank you for understanding regards
sorted out the transaction with id hello sincere greetings for the day i would like to inform you that my transaction has completed thank you so much for your support looking forward to working more with you in the future regards
change address for account no i need a new cheque book of leaves kindly send the same to my current address which is present in the details of my account
add one more contact no to account this is to inform you that i am not able to withdraw money from my account please help
urgent transaction ceased greetings for the day i checked my inbox and found your email stating the failure 

upgrade to an account with more benefits kindly add the contact no to my bank account
soliciting information for id can you please tell me the amount transferred through transaction id thanks
payment outstanding for transaction there has been only a partial payment of amount eur assuring you that the rest will be paid later warm regards
change address for account no i need a new cheque book of leaves kindly send the same to my current address which is present in the details of my account
why cant i withdraw money with my atm card this is to inform you that i am not able to withdraw money from my account please help
payment is pending for transaction the transaction is taking too long to complete i would request you to kindly guide me through the further steps to be taken in order to complete the transaction
transaction no is unresolved hello greetings for the day status of transaction inr for account is pending i would be grateful if you could tell me the cause thanks a lot
want to blo

fulfilled transaction having id hey i am writing in reference to the transaction i was granted the aforementioned amount within the deadline sincere gratitude for such a quick response kind regards
abrupt closure of transaction with id i have been your regular client and have followed the procedure for creating a transaction correctly still i received an email saying that the transaction has failed i would like to know the reasoning my transaction id is regards
upgrade to an account with more benefits kindly add the contact no to my bank account
imploring update on transaction hey i would be really grateful if you could tell me the details of account no thanks
why has my transaction stopped i have been your regular client and have followed the procedure for creating a transaction correctly still i received an email saying that the transaction has failed i would like to know the reasoning my transaction id is regards
handling the transaction after payment this is in response to your ema

In [13]:
df.head()

Unnamed: 0,From,To,Subject,Body,Class,TransactionID,Payment Date,Text_Data
0,Spike@DEUTSCHEBANK.com,Shaniece@CitiBankPune.com,Transaction no. 072558 is unresolved.,Sorry to inform that there has been only a par...,Pending,72558,04-02-2020,transaction no. 072558 is unresolved. sorry to...
1,Stacy@HDFC.com,Rohan@CitiBankNewYork.com,Order for new Cheque book,"Good morning, I want to place an order for an ...",General,212096,29-05-2020,"order for new cheque book good morning, i want..."
2,Zachary@HDFC.com,Rishabh@CitiBankPune.com,Required money acquired. Transaction 847047 is...,Hello! This is to inform you that I have recei...,Processing,847047,26-01-2020,required money acquired. transaction 847047 is...
3,Stacy@SBI.com,Shai@CitiBankHongKong.com,Asking for the details for transaction 746078,I request you to kindly send the status of my ...,Request,746078,17-06-2019,asking for the details for transaction 746078 ...
4,Angela@HDFC.com,Dipesh@CitiBankSingapore.com,Partial payment for transaction 535918,Hello!! Greetings for the day. Status of trans...,Pending,535918,18-02-2020,partial payment for transaction 535918 hello!!...


In [14]:
df['Text_Data'][0]

'transaction no. 072558 is unresolved. sorry to inform that there has been only a partial payment of amount gbp 908225. you are definitely going to receive the rest at a later point of time. warm regards.'

In [15]:
# Calculating optimal sentence length according to Gaussian distribution

lengths = []

for i in range(df.shape[0]):
    words = df.values[i][7].split()
    lengths.append(len(words))
    
avg = int(np.mean(lengths) + 2*np.std(lengths))

In [16]:
# df['Subject'][1005] + " " + df['Body'][1005]

## Step 3: Loading word2vec

In [17]:
# !wget http://nlp.stanford.edu/data/glove.6B.zip 

--2020-06-14 12:24:02--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2020-06-14 12:24:04--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2020-06-14 12:24:06--  http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’

glove.6

In [18]:
# !unzip glove*.zip

Archive:  glove.6B.zip
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
unzip:  cannot find zipfile directory in one of glove.6B.zip or
        glove.6B.zip.zip, and cannot find glove.6B.zip.ZIP, period.


In [19]:
# !ls
# !pwd

'Basic UI'			       glove.6B.zip
'Deploying the ML model on web'        Markov_Chain.ipynb
'Directory Watcher'		       model.json
 emaildataset.csv		       ModelValidation.ipynb
 EmailDataSet.ipynb		       msgbody.ipynb
 Email_Preprocessor_BiLSTM.ipynb      'paymentdata (1).csv'
 Email_Preprocessor_RNNLSTM.ipynb      ReadClassify.ipynb
 Email_Preprocessor_Supervised.ipynb   README.md
 emails.txt			       sample_text.txt
 FLASK_connect
/home/aheli/SmartEmailTracker


In [None]:
embeddings_index = {}
with open('../glove.6B.300d.txt',encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coeffs = np.asarray(values[1:],dtype='float32')
        
#         print(word)
#         print(coeffs)
        embeddings_index[word] = coeffs
    f.close()
print(len(embeddings_index))

# embeddings_index = dict()
# f = open('./glove.6B.300d.txt')
# for line in f:
#     values = line.split()
#     word = values[0]
#     coefs = np.asarray(values[1:], dtype='float32')
#     embeddings_index[word] = coefs
# f.close()

In [None]:
print(len(embeddings_index))
print(len(embeddings_index['girl']))

## Step 4: Converting sentences to vectors (creating the o/p of embedding layer)

In [None]:
t = Tokenizer()
l = list(df.Text_Data)
t.fit_on_texts(l)
vocab_size = len(t.word_index) + 1
print(vocab_size)

# integer encode the mails
encoded_mails = t.texts_to_sequences(l)

#post padding
padded_inputs = pad_sequences(encoded_mails, maxlen=avg, padding='post')
# print(padded_inputs)

In [None]:

padded_inputs.shape

In [None]:
# create a weight matrix for words in training docs

print(encoded_mails[0])
embedding_matrix = np.zeros((vocab_size, 300))
for word, i in t.word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector
#         print(i)
#         print(word)
#         print()
embedding_matrix[0] 

In [None]:
embedding_matrix.shape

## Embedding matrix train

In [None]:
def getOutputEmbeddings(X):
    embedding_matrix_output = np.zeros((X.shape[0],avg,300))
    
    for ix in range(X.shape[0]):
#         X[ix] = X[ix].split()
        for jx in range(len(X[ix])):
#             if X[ix][jx] in embeddings:
            embedding_matrix_output[ix][jx] = embedding_matrix[X[ix][jx]]
            
    return embedding_matrix_output

In [None]:
embedding_matrix_output = getOutputEmbeddings(padded_inputs)
embedding_matrix_output.shape

In [None]:
embedding_matrix_output = embedding_matrix_output.reshape(df.shape[0], -1)

In [None]:
# import preprocessing from sklearn
# from sklearn.preprocessing import OneHotEncoder 
# from sklearn.preprocessing import LabelEncoder 

In [None]:
# le = OneHotEncoder()
# classes = df['Class'].values

# classes = classes.reshape(-1, 1)
# Y = le.fit_transform(classes)

In [0]:
# Y = Y.toarray()

In [0]:
Y = list(df['Class'])

In [0]:
# X_train, X_test, Y_train, Y_test = train_test_split(embedding_matrix_output, Y, test_size=0.2, stratify=Y)
X_train, X_test, Y_train, Y_test = train_test_split(embedding_matrix_output, Y, test_size=0.2, stratify=Y)

In [0]:
# Y_test.unique()

## Step 5: Applying ML algorithms

### Support Vector Machine

In [0]:
from sklearn.model_selection import GridSearchCV
from sklearn import svm
from sklearn.model_selection import cross_val_score

In [0]:
import multiprocessing

In [37]:
cpus = multiprocessing.cpu_count()
print(cpus)

2


In [0]:
params = [
    {
        'kernel':['linear','rbf','poly','sigmoid'],
        'C':[0.1,0.2,0.5,1.0,2.0,5.0]
    }   
]

In [39]:
svc = svm.SVC()
svc.fit(X_train,Y_train)
svc.score(X_test, Y_test)

0.9947643979057592

In [40]:
cross_val_score(svc,embedding_matrix_output, Y, scoring="accuracy",cv=5).mean()

0.9853292918159273

In [0]:
# gs = GridSearchCV(estimator=svm.SVC(),param_grid=params,scoring="accuracy",cv=5,n_jobs = cpus)

# gs.fit(embedding_matrix_output, Y)

In [0]:
# gs.best_estimator_
# gs.best_score_

In [0]:
y_pred = svc.predict(X_test)

In [44]:
classification_report(Y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=2, output_dict=False, zero_division='warn')

'              precision    recall  f1-score   support\n\n    Complete       1.00      0.97      0.98        32\n      Failed       1.00      1.00      1.00        36\n     General       1.00      1.00      1.00        24\n     Pending       0.97      1.00      0.99        34\n  Processing       1.00      1.00      1.00        31\n     Request       1.00      1.00      1.00        34\n\n    accuracy                           0.99       191\n   macro avg       1.00      0.99      0.99       191\nweighted avg       0.99      0.99      0.99       191\n'

### Bernoulli Naive Bayes algorithm

In [0]:
from sklearn.naive_bayes import BernoulliNB

In [46]:
BNBmodel = BernoulliNB(alpha = 2)
BNBmodel.fit(X_train, Y_train)
BNBmodel.score(X_test, Y_test)

0.7958115183246073

In [0]:
y_pred = svc.predict(X_test)

In [48]:
classification_report(Y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=2, output_dict=False, zero_division='warn')

'              precision    recall  f1-score   support\n\n    Complete       1.00      0.97      0.98        32\n      Failed       1.00      1.00      1.00        36\n     General       1.00      1.00      1.00        24\n     Pending       0.97      1.00      0.99        34\n  Processing       1.00      1.00      1.00        31\n     Request       1.00      1.00      1.00        34\n\n    accuracy                           0.99       191\n   macro avg       1.00      0.99      0.99       191\nweighted avg       0.99      0.99      0.99       191\n'

### Random forest algorithm

In [0]:
from sklearn.ensemble import RandomForestClassifier

In [50]:
clf = RandomForestClassifier()
clf.fit(X_train, Y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [51]:
clf.score(X_test, Y_test)

0.9947643979057592

In [0]:
y_pred = clf.predict(X_test)

In [53]:
classification_report(Y_test, y_pred, labels=None, target_names=None, sample_weight=None, digits=2, output_dict=False, zero_division='warn')

'              precision    recall  f1-score   support\n\n    Complete       1.00      1.00      1.00        32\n      Failed       0.97      1.00      0.99        36\n     General       1.00      0.96      0.98        24\n     Pending       1.00      1.00      1.00        34\n  Processing       1.00      1.00      1.00        31\n     Request       1.00      1.00      1.00        34\n\n    accuracy                           0.99       191\n   macro avg       1.00      0.99      0.99       191\nweighted avg       0.99      0.99      0.99       191\n'

## Step 6: Visualise our results

In [0]:
# import matplotlib.pyplot as plt