In [67]:
def ifSpam(EmailText):
    """
    Argument: EmailText to check if it is spam or not
    
    Gets Data from the db on sqlite and convert it to dataframe
    Use Multinomial Naive Bayes to Train the Model and predict EmailText as Spam Or Ham 
    Adds EmailText and its label in the db
    
    Returns: True if EmailText is spam else False 
    """
    
    # Importing Required Modules
    
    import numpy as np
    import pandas as pd
    import sklearn
    
    from sklearn.feature_extraction import text  
    from sklearn.model_selection import train_test_split
    from sklearn.naive_bayes import MultinomialNB
    
    import sqlite3
    
    # Establishing connection with db
    
    cnx = sqlite3.connect('db.sqlite3')
    cursor = cnx.cursor()

    # Making dataframe from the data of db
    ## Data is in table "application_data_set" in the db
    
    df = pd.read_sql_query("SELECT * FROM application_data_set",cnx)

    # spliting data into train and test data
    
    train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
    X_train,Y_train,X_test,Y_test = train_set['EmailText'],train_set['Label'],test_set['EmailText'],test_set['Label']

    # Converting text into BOW ( Bags Of Word ) format for use in "Multinomial Naive Bayes"
    ## It makes a dictionary of words containing the frequency of word's occurence in the text
    
    vectorizer = text.CountVectorizer(stop_words="english")
    counts = vectorizer.fit_transform(X_train.values)

    # Training
    
    spam_nb = MultinomialNB()
    spam_nb.fit(counts,Y_train.values)
    
    # To check the acurracy of the model on the test data
    
    def model_accuracy():
        examples = vectorizer.transform(X_test.values)
        predicted = spam_nb.predict(examples)
        prediction_bool = pd.Series(predicted==Y_test.values)
        true_count = prediction_bool[prediction_bool==True].count()
        false_count = prediction_bool[prediction_bool==False].count()
        print((100*true_count)/(true_count+false_count))
    
    # Predicting Label of the Text taken Input as an Argument
    
    test_vect = vectorizer.transform(np.array([EmailText]))
    predict_test_vect = spam_nb.predict(test_vect)

    # The new row to add in Dataframe 
    new_row = {'Label':predict_test_vect[0],'EmailText':EmailText,'id' :df.shape[0]+1}
    df = df.append(new_row,ignore_index=True)
    
    # Replacing The previous table with the new table formed by converting this Dataframe 
    
    df.to_sql(name='application_data_set',con=cnx,if_exists = 'replace',index=False)
    
    ### For Checking
    #print(pd.read_sql('SELECT * from application_data_set', con=cnx))
    
    cnx.close()
    
    return predict_test_vect[0] == 'spam'

In [68]:
text = 'Trying to spam'
ifSpam(text)

False