In [1]:
import email
import pandas as pd
import numpy as np
import re  # Regular expressions
from sklearn.ensemble import StackingClassifier
from flask_cors import CORS
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import joblib

In [2]:

class ReceivedParser(object):
    regexes = [
        ("from\s+(mail\s+pickup\s+service|(?P<from_name>[\[\]\w\.\-]*))\s*(\(\s*\[?(?P<from_ip>[a-f\d\.\:]+)(\%\d+|)\]?\s*\)|)\s*by\s*(?P<by_hostname>[\w\.\-]+)\s*(\(\s*\[?(?P<by_ip>[\d\.\:a-f]+)(\%\d+|)\]?\)|)\s*(over\s+TLS\s+secured\s+channel|)\s*with\s*(mapi|Microsoft\s+SMTP\s+Server|Microsoft\s+SMTPSVC(\((?P<server_version>[\d\.]+)\)|))\s*(\((TLS|version=(?P<tls>[\w\.]+)|)\,?\s*(cipher=(?P<cipher>[\w\_]+)|)\)|)\s*(id\s+(?P<id>[\d\.]+)|)", "MS SMTP Server"), #exchange
        ("(from\s+(?P<from_name>[\[\S\]]+)\s+\(((?P<from_hostname>[\S]*)|)\s*\[(IPv6\:(?P<from_ipv6>[a-f\d\:]+)\:|)((?P<from_ip>[\d\.\:]+)|)\]\s*(\(may\s+be\s+forged\)|)\)\s*(\(using\s+(?P<tls>[\w\.]+)\s+with\s+cipher\s+(?P<cipher>[\w\-]+)\s+\([\w\/\s]+\)\)\s+(\(No\s+client\s+certificate\s+requested\)|)|)|)\s*(\(Authenticated\s+sender\:\s+(?P<authenticated_sender>[\w\.\-\@]+)\)|)\s*by\s+(?P<by_hostname>[\S]+)\s*(\((?P<by_hostname2>[\S]*)\s*\[((?P<by_ipv6>[a-f\:\d]+)|)(?P<by_ip>[\d\.]+)\]\)|)\s*(\([^\)]*\)|)\s*(\(Postfix\)|)\s*(with\s+(?P<protocol>\w*)|)\s*id\s+(?P<id>[\w\-]+)\s*(for\s+\<(?P<envelope_for>[\w\.\@]+)\>|)", "postfix"), #postfix
        ("(from\s+(?P<from_name>[\[\S\]]+)\s+\(((?P<from_hostname>[\S]*)|)\s*\[(IPv6\:(?P<from_ipv6>[a-f\d\:]+)|)\]\)\s*(\(using\s+(?P<tls>[\w\.]+)\s+with\s+cipher\s+(?P<cipher>[\w\-]+)\s+\([\w\/\s]+\)\)\s+(\(No\s+client\s+certificate\s+requested\)|)|)|)\s*(\(Authenticated\s+sender\:\s+(?P<authenticated_sender>[\w\.\-\@]+)\)|)\s*by\s+(?P<by_hostname>[\S]+)\s*(\((?P<by_hostname2>[\S]*)\s*\[((?P<by_ipv6>[a-f\:\d]+)|)(?P<by_ip>[\d\.]+)\]\)|)\s*(\([^\)]*\)|)\s*(\(Postfix\)|)\s*(with\s+(?P<protocol>\w+)|)\s*id\s+(?P<id>[\w\-]+)\s*(for\s+\<(?P<envelope_for>[\w\.\@]+)\>|)", "postfix"),#POSTFIX
        ("\s*from\s+\[?(?P<from_ip>[\d\.\:]+)\]?\s*(\((port=\d+|)\s*helo=(?P<from_name>[\[\]\w\.\:\-]+)\)|)\s+by\s+(?P<by_hostname>[\w\-\.]+)\s+with\s+(?P<protocol>\w+)\s*(\((?P<cipher>[\w\.\:\_\-]+)\)|)\s*(\(Exim\s+(?P<exim_version>[\d\.\_]+)\)|)\s*\(envelope-from\s+<?(?P<envelope_from>[\w\@\-\.]*)>?\s*\)\s*id\s+(?P<id>[\w\-]+)\s*\s*(for\s+<?(?P<envelope_for>[\w\.\@]+)>?|)", "exim"), #exim
        ("\s*from\s+(?P<from_hostname>[\w\.]+)\s+\(\[?(?P<from_ip>[\d\.\:a-f]+)\]?(\:\d+|)\s*(helo\=\[?(?P<from_name>[\w\.\:\-]+)|)\]?\)\s+by\s+(?P<by_hostname>[\w\-\.]+)\s+with\s+(?P<protocol>\w+)\s+(\((?P<cipher>[\w\.\:\_]+)\)|)\s*\(Exim\s+(?P<exim_version>[\d\.\_]+)\)\s*\(envelope-from\s+\<(?P<envelope_from>[\w\@\-\.]+)\>\s*\)\s*id\s+(?P<id>[\w\-]+)\s*(for\s+(?P<envelope_for>[\w\.\@]+)|)", "exim"),# exim
        ("from\s+(?P<from_name>[\w\.\-]+)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+with\s+(?P<protocol>\w+)\s+\(Exim\s+(?P<version>[\d\.]+)\)\s+\(envelope-from\s+<*(?P<envelope_from>[\w\.\-\@]+)>*\)\s+id\s+(?P<id>[\w\.\-]+)\s+for\s+<?(?P<envelope_for>[\w\.\-\@]+)>?", "exim"), #exim
        ("from\s+(?P<from_name>[\[\]\w\-\.]+)\s+\(((?P<from_hostname>[\w\.\-]+)|)\s*\[(?P<from_ip>[\da-f\.\:]+)\]\)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+\(Oracle\s+Communications\s+Messaging\s+Server\s+(?P<oracle_version>[\w\.\-]+)(\([\d\.]+\)|)\s+(32bit|64bit|)\s*(\([^\)]+\)|)\)\s*with\s+(?P<protocol>\w+)\s+id\s+\<?(?P<id>[\w\@\.\-]+)\>?", "Oracle Communication Messaging Server"), #Oracle
        ("from\s+(?P<from_hostname>[\w\-\.]+)\s+\(\[(?P<from_ip>[\d\.\:a-f]+)\]\s+helo=(?P<from_name>[\w\.\-]+)\)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+with\s+(?P<protocol>\w+)\s+\(ASSP\s+(?P<assp_version>[\d\.]+)\s*\)", "ASSP"), #ASSP
        ("from\s+(?P<from_hostname>[\[\]\d\w\.\-]+)\s+\(\[\[?(?P<from_ip>[\d\.]+)(\:\d+|)\]\s*(helo=(?P<from_name>[\w\.\-]+)|)\s*\)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+\(envelope-from\s+\<?(?P<envelope_from>[^>]+)\>?\)\s+\(ecelerity\s+(?P<version>[\d\.]+)\s+r\([\w\-\:\.]+\)\)\s+with\s+(?P<protocol>\w+)\s*(\(cipher=(?P<cipher>[\w\-\_]+)\)|)\s*id\s+(?P<id>[\.\-\w\/]+)", "ecelerity"), #ecelerity
        ("from\s+(?P<from_name>[\[\]\w\.\-]+)\s+\(((?P<from_hostname>[\w\.\-]+)|)\s*(\[(?P<from_ip>[\d\.\:a-f]+)\]|)\)\s*by\s+(?P<by_hostname>[\w\.\-]+)\s+(\([\w\.\-\=]+\)|)\s+with\s+(?P<protocol>\w+)\s+\(Nemesis\)\s+id\s+(?P<id>[\w\.\-]+)\s*(for\s+\<?(?P<envelope_for>[\w\.\@\-]+)\>?|)", "nemesis"), #nemesis
        ("\(qmail\s+\d+\s+invoked\s+(from\s+network|)(by\s+uid\s+\d+|)\)", "qmail"), #WTF qmail
        ("from\s+\[?(?P<from_ip>[\d\.a-f\:]+)\]?\s+\(account\s+<?(?P<envelope_from>[\w\.\@\-]+)>?\s+HELO\s+(?P<from_name>[\w\.\-]+)\)\s+by\s+(?P<by_hostname>[\w\.\-]*)\s+\(CommuniGate\s+Pro\s+SMTP\s+(?P<version>[\d\.]+)\)\s+with\s+(?P<protocol>\w+)\s+id\s+(?P<id>[\w\-\.]+)\s+for\s+<?(?P<envelope_for>[\w\.\-\@]+)>?", "CommuniGate"), #CommuniGate
        ("from\s+(?P<from_ip>[\d\.\:a-f]+)\s+\(SquirrelMail\s+authenticated\s+user\s+(?P<envelope_from>[\w\@\.\-]+)\)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+with\s+(?P<protocol>\w+)", "SquirrelMail"),
        ("by\s+(?P<by_hostname>[\w\.\-]+)\s+\((?P<protocol>\w+)\s+sendmail\s*(emulation|)\)", "sendmail"), #sendmail
        ("from\s+(?P<from_name>[\[\]\w\.\-]+)\s+\(\[(?P<from_hostname>[\w\.\-]+)\]\s+\[(?P<from_ip>[\d\.a-f\:]+)\]\)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+\(Sun\s+Java\(tm\)\s+System\s+Messaging\s+Server\s+(?P<version>[\w\.\-]+)\s+\d+bit\s+\(built\s+\w+\s+\d+\s+\d+\)\)\s+with\s+(?P<protocol>\w+)\s+id\s+<?(?P<id>[\w\.\-\@]+)>?", "Sun Java System Messaging Server"), # Sun Java System Messaging Server
        ("from\s+(?P<from_name>[\w\.\-\[\]]+)\s+\((?P<from_ip>[\d\.a-f\:]+)\)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+\(Axigen\)\s+with\s+(?P<protocol>\w+)\s+id\s+(?P<id>[\w\.\-]+)", "Axigen"), #axigen
        ("from\s+(?P<from_name>[\w\.\-]+)\s+\((?P<from_hostname>[\w\.\-]+)\s+\[(?P<from_ip>[\d\.a-f\:]+)\]\)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+\(Horde\s+MIME\s+library\)\s+with\s+(?P<protocol>\w+)", "Horde MIME library"), #Horde
        ("from\s+(?P<from_name>[\w\.\-\[\]]+)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+\(PGP\s+Universal\s+Service\)", "PGP Universal Service", "local"), # PGP Universal Service
        ("from\s+(?P<from_name>[\w\.\-]+)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+with\s+(?P<protocol>\w+)\s+\(Sophos\s+PureMessage\s+Version\s+(?P<version>[\d\.\-]+)\)\s+id\s+(?P<id>[\w\.\-]+)\s+for\s+(?P<envelope_for>[\w\.\-\@]+)", "Sophos PureMessage"), #Sophos PureMessage
        ("by\s+(?P<by_ip>[\d\.\:a-f]+)\s+with\s+(?P<protocol>\w+)", "unknown"), # other
        ("from\s+(?P<from_name>[\w\.\-]+)\s+\#?\s*(\(|\[|\(\[)\s*(?P<from_ip>[\d\.\:a-f]+)\s*(\]|\)|\]\))\s+by\s+(?P<by_hostname>[\w\.\-]+)(\s+\([\w\.\s\/]+\)|)\s*(with\s+(?P<protocol>\w+)|)\s*(id\s+(?P<id>[\w]+)|)(\(\-\)|)\s*(for\s+\<(?P<envelope_for>[\w\@\.]+)\>?|)", "unknown"), #unknown
        ("from\s+(?P<from_hostname>[\w\.\-]+)\s*\(HELO\s+(?P<from_name>[\w\.\-]+)\)\s*\(\[?(?P<from_ip>[\d\.\:a-f]+)\]?\)\s+by\s+(?P<by_hostname>[\w\.\-]+)(\s+\([\d\.]+\)|)\s*(with\s+(?P<protocol>\w+)|)\s*(id\s+(?P<id>[\w]+)|)(\(\-\)|)", "unknown"), #other other
        ("from\s+([\(\[](?P<from_ip>[\d\.\:a-f]+)[\)\]]|)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+id\s+(?P<id>\w+)\s*(with\s+(?P<protocol>\w+)|)\s*\s*(for\s+\<(?P<envelope_for>[\w\@\.\-]+)\>|)", "unknown"),#other
        ("from\s+(?P<from_hostname>[\w\.]+)\s+(\(HELO\s+(?P<from_name>[\w\.\-]+)\)|)\s*(\((?P<from_ip>[\da-f\.\:]+)\)|)\s*by\s+(?P<by_hostname>[\w\.\-]+)\s+with\s+(?P<cipher>[\w\-]+)\s+encrypted\s+SMTP", "unknown"), #unknown
        ("from\s+(?P<from_hostname>[\w\.\-]+)\s+(\(HELO\s+(?P<from_name>[\w\.\-]+)\)|)\s+\((?P<envelope_from>[\w\.]+\@[\w\.]+)\@(?P<from_ip>[\da-d\.\:]+)\)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+with\s+(?P<protocol>\w+)", "unknown"), #unknown
        ("from\s+(?P<from_hostname>[\w\.\-]+)\s+\(HELO\s+(?P<from_name>[\w\.\-\?]+)\)\s+\(\w+\@[\w\.]+\@(?P<from_ip>[\d\.a-f\-]+)_\w+\)\s+by\s+(?P<by_hostname>[\w\.\-\:]+)\s+with\s+(?P<protocol>\w+)", "unknown"), #unknown
        ("from\s+(?P<from_name>[\w\.\-\[\]]+)\s+\(\[(?P<from_ip>[\da-f\.\:]+)\]\)\s+by\s+(?P<by_hostname>[\w\.\-]+)\s+\(\[(?P<by_ip>[\d\.a-f\:]+)\]\)\s+with\s+(?P<protocol>\w+)", "unknown"), #unknown
        ]
    @staticmethod
    def parse(header):
        parts = header.split(";")
        if len(parts) != 2:
            return None

        data = {}

        # parse the hard part
        found = False
        for regex in ReceivedParser.regexes:
            match = re.match(regex[0], parts[0], re.IGNORECASE)
            if match:
                data['server'] = regex[1]
                found = True
                break

        if not found:
            return None
        return {**data, **match.groupdict()}

In [3]:
def check_if_valid(dict_to_check, str_val):
  if dict_to_check is None:
    return False
  elif str_val not in dict_to_check:
    return False
  elif dict_to_check[str_val] is None:
    return False
  else:
    return True

In [4]:
# emails in brackets '<>' are matched first, and if none, then other emails are matched
def extract_emails(row, col_name):

  in_brackets = re.findall(r'<([a-zA-Z0-9+._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)>', row[col_name])

  if len(in_brackets) == 0:
    not_in_brackets = re.findall(r'([a-zA-Z0-9+._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)', row[col_name])
    if len(not_in_brackets) == 0:
      return []
    else:
      return not_in_brackets
  else:
    return in_brackets

In [5]:
def email_same_check(row, first_col, second_col):
  vals1 = row[first_col]
  vals2 = row[second_col]

  for val1 in vals1:
    for val2 in vals2:
      if val1 == val2:
        return 1

  return 0

In [6]:
def extract_domains(row, col_name):
  emails_list = row[col_name]

  if len(emails_list) == 0:
    return []
  else:
    domains_list = []
    for email in emails_list:
      if len(email.split('.')) < 2:
        continue
      else:
        main_domain = email.split('@')[-1]
        main_domain = main_domain.split('.')[-2:]
        main_domain = main_domain[0] + '.' + re.sub('\W+','', main_domain[1])
        domains_list.append(main_domain.lower())
    return domains_list

In [7]:
def extract_domain_message_id(row):
  val = row['message-id_domains']
  if len(val) == 0:
    return ''
  else:
    return val[0]

In [8]:

def domain_match_check(row, first_col, second_col): 
    first_domain_list = row[first_col]
    second_domain_list = row[second_col]

    if len(first_domain_list) == 0 or len(second_domain_list) == 0:
        return 0
    else:
        for d1 in first_domain_list:
            for d2 in second_domain_list: 
                if d1 == d2:
                    return 1
        return 0


In [9]:


def preProcess(emaill):
    final_features_list = []
    
    initial_features_list = ["from", "message-id", 'return-path', 'reply-to', 'errors-to', 'in-reply-to', 'references',
                            'to', 'cc', 'sender', 'dkim', 'dmarc', 'spf']


    def parse_email_header(email_content):
        # Parse the email content
        msg = email.message_from_string(email_content)

        # Initialize dictionary to store the parsed data
        email_data = {
            'From': [],
            'Message-ID': [],
            'Return-Path': [],
            'Reply-To': [],
            'Errors-To': [],
            'In-Reply-To': [],
            'References': [],
            'To': [],
            'CC': [],
            'Sender': [],
            'DKIM': [],  # Add 'DKIM' key
            'SPF': [],   # Add 'SPF' key
            'DMARC': [],  # Add 'DMARC' key
            'num_hops': []  # Add 'num_hops' key
        }

        # Define the list of header fields to extract
        header_fields = [
            'From',
            'Message-ID',
            'Return-Path',
            'Reply-To',
            'Errors-To',
            'In-Reply-To',
            'References',
            'To',
            'CC',
            'Sender'
        ]

        # Extract header information
        for field in header_fields:
            email_data[field].append(msg.get(field, ''))

        # Handle 'Received' headers
        received_headers = msg.get_all('Received')
        if received_headers:
            email_data['num_hops'] = [len(received_headers)]
            email_data['first_received'] = [received_headers[0]]
            for i, received in enumerate(received_headers[1:], start=2):
                email_data[f'Received{i}'] = [received]
            email_data['last_received'] = [received_headers[-1]]
        else:
            email_data['num_hops'] = [0]
            email_data['first_received'] = ['']
            email_data['last_received'] = ['']

        # Parse Authentication-Results if present
        auth_results = msg.get('Authentication-Results')
        if auth_results:
            # Extract DKIM, SPF, and DMARC results
            dkim = re.search(r'dkim=(\w+)', auth_results)
            spf = re.search(r'spf=(\w+)', auth_results)
            dmarc = re.search(r'dmarc=(\w+)', auth_results)

            email_data['DKIM'].append(dkim.group(1) if dkim else np.nan)
            email_data['SPF'].append(spf.group(1) if spf else np.nan)
            email_data['DMARC'].append(dmarc.group(1) if dmarc else np.nan)
        else:
            # Append '' if Authentication-Results is not found
            email_data['DKIM'].append('')
            email_data['SPF'].append('')
            email_data['DMARC'].append('')


        # Create a DataFrame from the parsed data
        df = pd.DataFrame.from_dict(email_data, orient='index').transpose()

        return df
    
    df = parse_email_header(emaill) 
    
    df.columns = df.columns.str.lower()
    missing_feature_names = []
    final_features_list = []

    for name in initial_features_list:
        missing_feature_names.append('missing_' + name)

    for feature, name in zip(initial_features_list, missing_feature_names):
        df.loc[df[feature].isnull(), name] = 1
        df.loc[~df[feature].isnull(), name] = 0
        # Convert the columns to integers
        df[name] = df[name].astype(int)


    final_features_list.extend(missing_feature_names)
 
    # Append 'num_hops' to the final feature list
    final_features_list.append('num_hops')
    df['num_recipients_to'] = df.apply(lambda x: len(re.findall(
    r'([a-zA-Z0-9+._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)', x['to'])), axis=1)

    df['num_recipients_cc'] = df.apply(lambda x: len(re.findall(
        r'([a-zA-Z0-9+._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)', x['cc'])), axis=1)

    df['num_recipients_from'] = df.apply(lambda x: len(re.findall(
        r'([a-zA-Z0-9+._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)', x['from'])), axis=1)

    final_features_list.append('num_recipients_to')
    final_features_list.append('num_recipients_cc')
    final_features_list.append('num_recipients_from')
    # Convert all column names to lowercase



    df = df.replace(np.nan, '', regex=True)
 
    received_parser = ReceivedParser() 
    emails_from = df.apply(extract_emails, col_name='from', axis=1)
    emails_message_id = df.apply(extract_emails, col_name='message-id', axis=1)
    emails_return_path = df.apply(extract_emails, col_name='return-path', axis=1)
    emails_reply_to = df.apply(extract_emails, col_name='reply-to', axis=1)
    emails_errors_to = df.apply(extract_emails, col_name='errors-to', axis=1)
    emails_in_reply_to = df.apply(extract_emails, col_name='in-reply-to', axis=1)
    emails_references = df.apply(extract_emails, col_name='references', axis=1)
    emails_to = df.apply(extract_emails, col_name='to', axis=1)
    emails_cc = df.apply(extract_emails, col_name='cc', axis=1)
    emails_sender = df.apply(extract_emails, col_name='sender', axis=1)

    #simScores = domains_df[['return', 'from']].apply(lambda x: simScore(*x), axis=1)
    #df['SimScore_return_from'] = simScores
    emails_df = pd.concat([emails_from, emails_message_id, emails_return_path, 
                       emails_errors_to, emails_reply_to, emails_in_reply_to, 
                       emails_references, emails_to, emails_cc, emails_sender], axis=1)

    # Set new column names
    emails_df = emails_df.set_axis(['from', 'message-id', 'return-path', 'errors-to', 'reply-to',
                                    'in-reply-to', 'references', 'to', 'cc', 'sender'], axis=1)
    
    emails_to_check = [('from', 'reply-to')]

    for val in emails_to_check:
        first_field = val[0]
        second_field = val[1]
        new_col_name = 'email_match_' + first_field + '_' + second_field

        df[new_col_name] = emails_df.apply(email_same_check, first_col=first_field, 
                        second_col=second_field, axis=1)
        final_features_list.append(new_col_name)
    
    domains_from = emails_df.apply(extract_domains, col_name='from', axis=1)
    domains_message_id = emails_df.apply(extract_domains, col_name='message-id', axis=1)
    domains_return_path = emails_df.apply(extract_domains, col_name='return-path', axis=1)
    domains_reply_to = emails_df.apply(extract_domains, col_name='reply-to', axis=1)
    domains_errors_to = emails_df.apply(extract_domains, col_name='errors-to', axis=1)
    domains_in_reply_to = emails_df.apply(extract_domains, col_name='in-reply-to', axis=1)
    domains_references = emails_df.apply(extract_domains, col_name='references', axis=1)
    domains_to = emails_df.apply(extract_domains, col_name='to', axis=1)
    domains_cc = emails_df.apply(extract_domains, col_name='cc', axis=1)
    domains_sender = emails_df.apply(extract_domains, col_name='sender', axis=1)


    domains_df = pd.concat([domains_from, domains_message_id, domains_return_path, 
                        domains_errors_to, domains_reply_to, domains_in_reply_to, 
                        domains_references, domains_to, domains_cc, domains_sender], axis=1)

    # Set new column names and assign the result back to domains_df
    domains_df = domains_df.set_axis(['from_domains', 'message-id_domains', 'return-path_domains', 'errors-to_domains', 'reply-to_domains',
                                    'in-reply-to_domains', 'references_domains', 'to_domains', 'cc_domains', 'sender_domains'], axis=1)

    # Concatenate the original dataframe with the domains dataframe
    df = pd.concat([df, domains_df], axis=1)
    df['domain_val_message-id'] = domains_df.apply(extract_domain_message_id, axis=1)
    df['domain_val_message-id'].value_counts()
        
    df.loc[~df['domain_val_message-id'].astype(str).str.contains('uwaterloo.ca'), 'domain_val_message-id'] = 0
    df.loc[df['domain_val_message-id'].astype(str).str.contains('uwaterloo.ca'), 'domain_val_message-id'] = 1

    df['domain_val_message-id'].value_counts()
    final_features_list.append('domain_val_message-id')


    domain_fields_to_check = [('message-id_domains', 'from_domains'), ('from_domains', 'return-path_domains'), ('message-id_domains', 'return-path_domains'), ('message-id_domains', 'sender_domains'), ('message-id_domains', 'reply-to_domains'),
                            ('return-path_domains', 'reply-to_domains'), ('reply-to_domains', 'to_domains'), ('to_domains', 'in-reply-to_domains'), ('errors-to_domains', 'message-id_domains'), ('errors-to_domains', 'from_domains'), ('errors-to_domains', 'sender_domains'),
                            ('errors-to_domains', 'reply-to_domains'), ('sender_domains', 'from_domains'), ('references_domains', 'reply-to_domains'), ('references_domains', 'in-reply-to_domains'), ('references_domains', 'to_domains'), ('from_domains', 'reply-to_domains'),
                            ('to_domains', 'from_domains'), ('to_domains', 'message-id_domains')]


    for val in domain_fields_to_check:
        first_field = val[0].replace('_domains', '')
        second_field = val[1].replace('_domains', '')
        new_col_name = 'domain_match_' + first_field + '_' + second_field 

        df[new_col_name] = domains_df.apply(domain_match_check, first_col = val[0], 
                                    second_col= val[1], axis=1)
        final_features_list.append(new_col_name)

    
    parser = ReceivedParser()

    # df.head(5)
    # df['domain_match_message-id_from']
    def get_for_domain_last_received(row):
        last_received_val = row['last_received']
        parsed_val = parser.parse(last_received_val)

        if check_if_valid(parsed_val, 'envelope_for'):
            main_domain = parsed_val['envelope_for'].split('@')[-1]
            main_domain_parts = main_domain.split('.')[-2:]

            # Check if there are at least two elements in main_domain_parts
            if len(main_domain_parts) >= 2:
                main_domain = main_domain_parts[0] + '.' + re.sub('\W+', '', main_domain_parts[1])
            else:
                main_domain = 'NA'

            return main_domain.lower()

        else:
            return 'NA'
    def check_for_received_domain_equal(row, field_name):
        field_vals = row[field_name]

        for item in field_vals:
            if item == get_for_domain_last_received(row):
                return 1
        return 0
    
    df['domain_match_to_received'] = df.apply(check_for_received_domain_equal, field_name='to_domains', axis=1)
    df['domain_match_to_received'].value_counts()

    df['domain_match_reply-to_received'] = df.apply(check_for_received_domain_equal, field_name='reply-to_domains', axis=1)
    df['domain_match_reply-to_received'].value_counts()

    final_features_list.extend(['domain_match_reply-to_received', 'domain_match_to_received'])

    

    df_filtered = df[final_features_list]
    
    arr = df_filtered.values

    loaded_scaler = joblib.load('scaler_model.joblib')
    array = loaded_scaler.transform(arr)
    
    print("array-> ",final_features_list)


    return array

    


  

In [10]:
def loadPredict(predict):
    
    loaded_model = joblib.load('best_model.pkl')

    label = loaded_model.predict(predict)

    return label

In [11]:
email_content = r"""Delivered-To: awaisamin09@gmail.com
Received: by 2002:a05:6022:2330:b0:4c:23a9:60d with SMTP id cr48csp7805581lab;
        Mon, 1 Jan 2024 11:06:22 -0800 (PST)
X-Received: by 2002:a05:620a:28d4:b0:781:5bfd:5a1b with SMTP id l20-20020a05620a28d400b007815bfd5a1bmr13659863qkp.49.1704135982458;
        Mon, 01 Jan 2024 11:06:22 -0800 (PST)
ARC-Seal: i=3; a=rsa-sha256; t=1704135982; cv=pass;
        d=google.com; s=arc-20160816;
        b=EOElQxEbTb2SPsfOlxcizQQnYUNNe0s+sRGRsZmZ+grCUmGzL5MPAVEbedB9kt/DaO
         vHpehXeMHAKOe9c7n/ZbCMN8lC/9qZqcx+hPk/minsHf4LnCS9TWn5Bsx+PDhb/QDPpp
         EwuLmAFkAb2jb8Yiqtzgxt34+b4/EbQXxCCSUacGExETWE8r0IKBN3jl8MrQiN42Af2M
         MZm2a8YrFjoTtjaawwIMtfm7Asb08/m5R4oku3hpJWuwoMC+qgcEzMNatfPq6yt5ulEI
         pSLo152eyVDeELekSyPw3ZPvjk5fXQTdeh0iiwFTyd2rApA4Gmy+WEn2wisJYyEzILH5
         BuVg==
ARC-Message-Signature: i=3; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816;
        h=list-unsubscribe:list-archive:list-help:list-post:list-id
         :mailing-list:precedence:to:subject:message-id:date:from
         :mime-version:sender:dkim-signature:dkim-signature;
        bh=3YXxusAq+0dyijKQ0WXW1qyWRifRPmxfGDwFd1d0oFw=;
        fh=i1TxEz2KRLm4zvzzJ7Mlg6JuMXm8l6H2CfJRxrWjWVE=;
        b=ci1vlc00wmNU8+8OkjYE/GemxLDVgLlKVK68sKAM5zjIT6tbV/b+YnXZEa/pWkU9Uu
         uEVTuqTkWzuk21nmnZeDz7q5ux+/9XW1ZXP14XLPHkprGxrfAO6aytIGFOznSMD1hb0R
         2zdkUoxj5elfKTVzoUxmA2GP5X3+IY3ESpPAWtmrVBDmnLJEjq4TbL825Vv+EyycAKo0
         OFNAmmU85BEFFtB7aBkF3Pz+bfpEpl70m1LFwp2yHONLE0c3m9Z6UlQoJJFYkP6lW4u/
         FVMZ9lIdVZdR0KXWJXMqJBgy4EMggN3OajuOrD1djaFucCnYwLrEsGtA9fVdc2sv4VK8
         cWgQ==
ARC-Authentication-Results: i=3; mx.google.com;
       dkim=pass header.i=@googlegroups.com header.s=20230601 header.b=dJZ+oDLN;
       dkim=pass header.i=@gmail.com header.s=20230601 header.b=JU2dtXdq;
       arc=pass (i=2 spf=pass spfdomain=gmail.com dkim=pass dkdomain=gmail.com dmarc=pass fromdomain=gmail.com);
       spf=pass (google.com: domain of fresh--02--912454564564+bncbcn6za6uuaorblm2zswamgqeek2hdoi@googlegroups.com designates 209.85.220.55 as permitted sender) smtp.mailfrom=fresh--02--912454564564+bncBCN6ZA6UUAORBLM2ZSWAMGQEEK2HDOI@googlegroups.com;
       dmarc=pass (p=NONE sp=QUARANTINE dis=NONE) header.from=gmail.com
Return-Path: <fresh--02--912454564564+bncBCN6ZA6UUAORBLM2ZSWAMGQEEK2HDOI@googlegroups.com>
Received: from mail-sor-f55.google.com (mail-sor-f55.google.com. [209.85.220.55])
        by mx.google.com with SMTPS id d15-20020a05620a204f00b00781706d5b0asor6846807qka.9.2024.01.01.11.06.22
        for <awaisamin09@gmail.com>
        (Google Transport Security);
        Mon, 01 Jan 2024 11:06:22 -0800 (PST)
Received-SPF: pass (google.com: domain of fresh--02--912454564564+bncbcn6za6uuaorblm2zswamgqeek2hdoi@googlegroups.com designates 209.85.220.55 as permitted sender) client-ip=209.85.220.55;
Authentication-Results: mx.google.com;
       dkim=pass header.i=@googlegroups.com header.s=20230601 header.b=dJZ+oDLN;
       dkim=pass header.i=@gmail.com header.s=20230601 header.b=JU2dtXdq;
       arc=pass (i=2 spf=pass spfdomain=gmail.com dkim=pass dkdomain=gmail.com dmarc=pass fromdomain=gmail.com);
       spf=pass (google.com: domain of fresh--02--912454564564+bncbcn6za6uuaorblm2zswamgqeek2hdoi@googlegroups.com designates 209.85.220.55 as permitted sender) smtp.mailfrom=fresh--02--912454564564+bncBCN6ZA6UUAORBLM2ZSWAMGQEEK2HDOI@googlegroups.com;
       dmarc=pass (p=NONE sp=QUARANTINE dis=NONE) header.from=gmail.com
ARC-Seal: i=2; a=rsa-sha256; t=1704135982; cv=pass;
        d=google.com; s=arc-20160816;
        b=Ez1HT3w8EL4opJYexetYvKyRBwQO7hVuEm6SOT714VJYotP7gj71JxX5TW1xTB9ET6
         T9WtwJt1T1Q1oo6/JAhXQZOgAQjqOOihQM1nbIPZmACJZLI2/sQitCh0ADsaRKLaMYWg
         FhuC0eR8uaCHZPZx7vJRTJy6wdv0kCQc0fdVkr4P2vKoiu1jRFi4V2aV3YMIhwcxSVrB
         NxccH/M1UJkj8hW6TNNcxbDas51SgXIK0mBMkn1R4VzYfFLq7nqJSF+trvj0H2y7AI/m
         kvPIUD7/rex6RDkdsyvN0YVF3OUSKiz0guvgSXBJmPnJ05t+9w/0yTjvHuyNRBRTh+YD
         4ilQ==
ARC-Message-Signature: i=2; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816;
        h=list-unsubscribe:list-archive:list-help:list-post:list-id
         :mailing-list:precedence:to:subject:message-id:date:from
         :mime-version:sender:dkim-signature:dkim-signature;
        bh=3YXxusAq+0dyijKQ0WXW1qyWRifRPmxfGDwFd1d0oFw=;
        fh=i1TxEz2KRLm4zvzzJ7Mlg6JuMXm8l6H2CfJRxrWjWVE=;
        b=O0pphO0DAMqfQwFy2hQItv5lqM4dH8egr7p3TQpS/LP6gtcbwg5hz2FyqJmW552PF5
         qWqfXvLn8A2dUqdS9LR8tRvT6JSnMVyVDPKiY6ylcoIrUAiEb5eNXhp0SjEkkWm/jpsW
         yg/R2XNqUJFNzeohj4oSgCJSt0F7Wq2yRvi25pnDiE461zXt67kCVxl/KWo/1XchogZI
         A3j9AoML6D606cYM4yFcNUdkU+saMyIkt12x1V4lLWh3741TAg7gS7B+nqCPpq8eUi5e
         5PiFOptej1XRGbYOcJ4lKzBnjSXjYd1FesEPt8oc1westidsL9O96k2fj8zdQpbCHAao
         PtPA==
ARC-Authentication-Results: i=2; gmr-mx.google.com;
       dkim=pass header.i=@gmail.com header.s=20230601 header.b=I2c5nTNj;
       spf=pass (google.com: domain of aminayaya42@gmail.com designates 2607:f8b0:4864:20::829 as permitted sender) smtp.mailfrom=aminayaya42@gmail.com;
       dmarc=pass (p=NONE sp=QUARANTINE dis=NONE) header.from=gmail.com
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=googlegroups.com; s=20230601; t=1704135982; x=1704740782; dara=google.com;
        h=list-unsubscribe:list-archive:list-help:list-post:list-id
         :mailing-list:precedence:x-original-authentication-results
         :x-original-sender:to:subject:message-id:date:from:mime-version
         :sender:from:to:cc:subject:date:message-id:reply-to;
        bh=3YXxusAq+0dyijKQ0WXW1qyWRifRPmxfGDwFd1d0oFw=;
        b=dJZ+oDLNou5LukcY7nzoqXw8fQRUyXDGp7TFV/FSxITCtVIUCvnH1xjBzg/9lTGHxw
         ePoebr3NH0PWQ3mmo5O3jFJKTwDCRqYrkdZdwaZ6JiBPLvLLHkPpWov/vLsstvL26H0f
         l/xp44HVNSTeCc1dxqxi5Xgf7dS4GF7m2Ir5wkInpPQzNYppSW+bRRENuFXYB2j6aWJq
         YlSHnUI1L/qxDij9uE+DSBljW6OTzBO4NHTJV/kAmZJZDpK/Ni3FasBQo2KsbUW8PXn8
         GzThylkPMlKG5/k7av8SALveEB1U+us6qVhNwGSporuRT8sPYJcMXm7CUZqEjlrx7R69
         Hklw==
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=gmail.com; s=20230601; t=1704135982; x=1704740782; dara=google.com;
        h=list-unsubscribe:list-archive:list-help:list-post:list-id
         :mailing-list:precedence:x-original-authentication-results
         :x-original-sender:to:subject:message-id:date:from:mime-version:from
         :to:cc:subject:date:message-id:reply-to;
        bh=3YXxusAq+0dyijKQ0WXW1qyWRifRPmxfGDwFd1d0oFw=;
        b=JU2dtXdqgI3RhqbBwqLJrHmPFonW3hxtqbWcvQTwFaNdU2hPo7/X2DMpIVtSS4C2Tk
         dwqKlSg8ihQ8/yNrC9uq/cG6Db3M9wLifpwmEOsdAjD6QnnWZfkmrs07973MtKtItj6T
         QhscvPCncUJBQ87zxDaLwas7VSRTG3d/Kob5nfymRcF4OuKeuyRoo1bms7geeGDqyaUw
         6HaMlQWz9x3QOuhJHqEkAoyFdTet8yLj7QbsydwJqBG2xzEuB3wCG5IIjPlUvY9+gMXa
         WgXrK+OYvv3ckb5yfSAKhzQSvdr9cE1oLxSMCC+tBN5dU2pQvLL52oWTuRaVwrk395RW
         i9DQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1704135982; x=1704740782;
        h=list-unsubscribe:list-archive:list-help:list-post
         :x-spam-checked-in-group:list-id:mailing-list:precedence
         :x-original-authentication-results:x-original-sender:to:subject
         :message-id:date:from:mime-version:x-beenthere:x-gm-message-state
         :sender:from:to:cc:subject:date:message-id:reply-to;
        bh=3YXxusAq+0dyijKQ0WXW1qyWRifRPmxfGDwFd1d0oFw=;
        b=tRarqoV3IcZRWcr6khXp4JVtipjXhiqMUfraepYdikX206TDRwyDFkIO5GCG7ueDmm
         ACjUttjggdWlgD6GTEbhBZFP7pPC/OZDKMei1I5AJxCvYI4MSKyh4FP2OkyYPVO0zvXv
         86b3gYU7Kwp1s7yd3Ub1sFPztVkWobggrgl5SXttrE26ifG5bqCT0PWzMuiLIdAh7UbH
         Agii0bm5Hm96OQLdagtFI5AUCo6mNFic+f00PgP4OfgiVDjWHZOMNePPOFIkIOakDFMt
         O1GdDkj8Ky89usvcfAdoqvNTRnHg3HWESvoQoRDwosPB2N0txs5xnLTRzB+BtuUVWHj3
         YP9g==
Sender: fresh--02--912454564564@googlegroups.com
X-Gm-Message-State: AOJu0YzHHdTfhi/ltcJewAixmy/RXtCRr4t7KKy7XRN4uuA933RV+GH1 g3YiCT8RuvqzlJTqP7fddnI=
X-Google-Smtp-Source: AGHT+IGumDtZ3J0UjDuaW0hyznou/hO5JSjKh+ExnPq/D2T2odKTcYCP6TW2o/b4DIYN/2VsTQ/jLg==
X-Received: by 2002:ad4:5b8b:0:b0:67f:2b0d:c62 with SMTP id 11-20020ad45b8b000000b0067f2b0d0c62mr25072937qvp.3.1704135981921;
        Mon, 01 Jan 2024 11:06:21 -0800 (PST)
X-BeenThere: fresh--02--912454564564@googlegroups.com
Received: by 2002:a05:6214:ac3:b0:67a:1a58:78fc with SMTP id g3-20020a0562140ac300b0067a1a5878fcls4888111qvi.1.-pod-prod-07-us; Mon, 01 Jan 2024 11:06:21 -0800 (PST)
X-Received: by 2002:a0c:e844:0:b0:67f:a0a6:4cfa with SMTP id l4-20020a0ce844000000b0067fa0a64cfamr17527907qvo.112.1704135980987;
        Mon, 01 Jan 2024 11:06:20 -0800 (PST)
ARC-Seal: i=1; a=rsa-sha256; t=1704135980; cv=none;
        d=google.com; s=arc-20160816;
        b=0qMSYpODlLXsE5A2rn30C5zl0+OUNPS6C66zRBZqxzH7RlMgByrYnHldJwYhNypYdi
         kn9Epu51GEdLx4Wp/YZ2Ad1/scaQCdEx+luUJYvSHugpoTVsANCyvwCUvJs7uhGozNpG
         sSezfmI0afSKyeHadXVEem8tVGBF95ugifmyGe8JI+6Bas10jcfiyre2FaSqmcOvZg23
         nLtoa0e035XKhFsC+Qozzn6ZqjXLYfknk/8dkaNdccXXkuc86hD3jxve/a72gJ5u96Bg
         yvQ0d6l8UP3RwGCeO/yBNiFxvyy3Vc1IjoMuDAxl416pf3LuVpcelMy+Is+neDNPkexS
         Me8w==
ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816;
        h=to:subject:message-id:date:from:mime-version:dkim-signature;
        bh=BjZDdUuMZ/FZgHtkthxzSELhzKzXuW6bkkRUyesbLcY=;
        fh=i1TxEz2KRLm4zvzzJ7Mlg6JuMXm8l6H2CfJRxrWjWVE=;
        b=GNnmR3CUBNk4WedcRqK7bCarLjemDaLHKCZh06imS73oMGBtdEah/SxbtEEOtJD73J
         Bw8+Vtc9CQR0vryKPxy2HfXf209KaJgVY8Jx9giAF/KyvAZC+K3C1yKaX1I/cq6ab87u
         AZMbCpWxnK1P/RBXYrZjh6SzAsLEIBR5vNSnR0nYtND53TISNwz39ADOmftRRJk8HKdc
         HJA5H+bQb1RQ750l8DfrQQDRnSVbSkdTwDN1QaxRriO2L3aHxUUzo1BuumrX0IpOF5sV
         YhEL1Lhcnii3UNGx/5qS9xfgWISIbAgyiL+Yn4/hsDA679/cRrN+G5SrQ7/6tafAzxeg
         qn5Q==
ARC-Authentication-Results: i=1; gmr-mx.google.com;
       dkim=pass header.i=@gmail.com header.s=20230601 header.b=I2c5nTNj;
       spf=pass (google.com: domain of aminayaya42@gmail.com designates 2607:f8b0:4864:20::829 as permitted sender) smtp.mailfrom=aminayaya42@gmail.com;
       dmarc=pass (p=NONE sp=QUARANTINE dis=NONE) header.from=gmail.com
Received: from mail-qt1-x829.google.com (mail-qt1-x829.google.com. [2607:f8b0:4864:20::829])
        by gmr-mx.google.com with ESMTPS id gs4-20020a056214226400b0065afd3576a7si1910157qvb.3.2024.01.01.11.06.20
        (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128);
        Mon, 01 Jan 2024 11:06:20 -0800 (PST)
Received-SPF: pass (google.com: domain of aminayaya42@gmail.com designates 2607:f8b0:4864:20::829 as permitted sender) client-ip=2607:f8b0:4864:20::829;
Received: by mail-qt1-x829.google.com with SMTP id d75a77b69052e-42786514fe6so78088151cf.0;
        Mon, 01 Jan 2024 11:06:20 -0800 (PST)
X-Received: by 2002:a05:622a:30c:b0:428:2041:18c3 with SMTP id q12-20020a05622a030c00b00428204118c3mr3284720qtw.69.1704135980409; Mon, 01 Jan 2024 11:06:20 -0800 (PST)
MIME-Version: 1.0
From: Amina Yaya <aminayaya42@gmail.com>
Date: Tue, 2 Jan 2024 01:06:09 +0600
Message-ID: <CAFy5gB5PFah7bwvfCxQ_+X9jVj85XTdb9PL5z9pkobTvDHvNLw@mail.gmail.com>
Subject: Come over!! Home alone now
To: fresh--02--912454564654@googlegroups.com, fresh--02--912454564564@googlegroups.com, fresh--02--912456457887@googlegroups.com, fresh--02--912765465465545@googlegroups.com, fresh--02--91248654654@googlegroups.com, fresh--02--9124654565445@googlegroups.com, fresh--02--9124554554665@googlegroups.com, fresh--02--912546945645445@googlegroups.com, fresh--02--912454564556565@googlegroups.com, fresh--02--912476896566@googlegroups.com
Content-Type: multipart/alternative; boundary="000000000000c2e7c3060de713c6"
X-Original-Sender: aminayaya42@gmail.com
X-Original-Authentication-Results: gmr-mx.google.com;
       dkim=pass header.i=@gmail.com header.s=20230601 header.b=I2c5nTNj;
       spf=pass (google.com: domain of aminayaya42@gmail.com designates 2607:f8b0:4864:20::829 as permitted sender) smtp.mailfrom=aminayaya42@gmail.com;
       dmarc=pass (p=NONE sp=QUARANTINE dis=NONE) header.from=gmail.com
Precedence: list
Mailing-list: list fresh--02--912454564564@googlegroups.com; contact fresh--02--912454564564+owners@googlegroups.com
List-ID: <fresh--02--912454564564.googlegroups.com>
X-Spam-Checked-In-Group: fresh--02--912454564564@googlegroups.com
X-Google-Group-Id: 1010715269527
List-Post: <https://groups.google.com/group/fresh--02--912454564564/post>, <mailto:fresh--02--912454564564@googlegroups.com>
List-Help: <https://groups.google.com/support/>, <mailto:fresh--02--912454564564+help@googlegroups.com>
List-Archive: <https://groups.google.com/group/fresh--02--912454564564
List-Unsubscribe: <mailto:googlegroups-manage+1010715269527+unsubscribe@googlegroups.com>, <https://groups.google.com/group/fresh--02--912454564564/subscribe>

--000000000000c2e7c3060de713c6
Content-Type: text/plain; charset="UTF-8"

Hey, My home is empty now, I feel incredibly lonely. Please come over. I'll
be ready when you call.  Now I'm online.
Pics>>     sites.google.com/view/wtw8e

it's very cool & free to use..... So don't worry about C_C submit.






















------------------------//////////

-- 
You received this message because you are subscribed to the Google Groups "Fresh--02--912454564564" group.
To unsubscribe from this group and stop receiving emails from it, send an email to fresh--02--912454564564+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/fresh--02--912454564564/CAFy5gB5PFah7bwvfCxQ_%2BX9jVj85XTdb9PL5z9pkobTvDHvNLw%40mail.gmail.com.

--000000000000c2e7c3060de713c6
Content-Type: text/html; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable

<div dir=3D"ltr">Hey, My home is empty now, I feel incredibly lonely. Pleas=
e come over. I&#39;ll be ready when you call.=C2=A0 Now I&#39;m online.<br>=
Pics&gt;&gt; =C2=A0 =C2=A0 <a href=3D"http://sites.google.com/view/wtw8e">s=
ites.google.com/view/wtw8e</a><br><br>it&#39;s very cool &amp; free to use.=
.... So don&#39;t worry about C_C submit.<br><br><br><br><br><br><br><br><b=
r><br><br><br><br><br><br><br><br><br><br><br><br><br><br>-----------------=
-------//////////<br></div>

<p></p>

-- <br />
You received this message because you are subscribed to the Google Groups &=
quot;Fresh--02--912454564564&quot; group.<br />
To unsubscribe from this group and stop receiving emails from it, send an e=
mail to <a href=3D"mailto:fresh--02--912454564564+unsubscribe@googlegroups.=
com">fresh--02--912454564564+unsubscribe@googlegroups.com</a>.<br />
To view this discussion on the web visit <a href=3D"https://groups.google.c=
om/d/msgid/fresh--02--912454564564/CAFy5gB5PFah7bwvfCxQ_%2BX9jVj85XTdb9PL5z=
9pkobTvDHvNLw%40mail.gmail.com?utm_medium=3Demail&utm_source=3Dfooter">http=
s://groups.google.com/d/msgid/fresh--02--912454564564/CAFy5gB5PFah7bwvfCxQ_=
%2BX9jVj85XTdb9PL5z9pkobTvDHvNLw%40mail.gmail.com</a>.<br />

--000000000000c2e7c3060de713c6--
"""

In [12]:
def main(email_content):
    
    df=preProcess(email_content)     
    prad = loadPredict(df)
    print('Prad-> ',prad)
    return prad

In [13]:
from flask import Flask,render_template, request
from flask_cors import CORS

app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/predict', methods=['GET'])
def predict():
    # Get the 'email' query parameter from the request
    email_content = request.args.get('email', '')

    # Call your processing function
    prediction_result = main(email_content)

    # Render the result template with the prediction result
    return render_template('result.html', prediction_result=prediction_result)


if __name__ == '__main__':
    # Run the app
    app.run(host='0.0.0.0', port=8080, debug=False, use_reloader=False)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8080
 * Running on http://192.168.100.11:8080
Press CTRL+C to quit
