# Imports

In [234]:
import re
import datetime
from dateutil.parser import parse
import ahocorasick as aho

# 1. Email Address Extractor

In [235]:
def email_ext(sentence):
    ''' Find Email Adresses '''
    expression = re.compile(r"(\S+)@(\S+)")
    result = expression.findall(sentence)
    if result != []:
        return result    
    else:
        return "None"

# 2. DateTime Extractor

### 2.1 Old DateTime Extractor

In [236]:
def time_date_ext(sentence):
    ''' Find Dates and Times '''
    datetime_keyword_lst =(':','today','tomorrow','yesterday','am','a.m','a.m.','pm','p.m','p.m.','january','february','march','april','may','june','july','august','september','october','november','december')
    try:
        for keyword in datetime_keyword_lst:
            if keyword in sentence:
                p = parse(sentence, fuzzy=True)
                td_lst =  str(p).split(" ")
                date_lst = td_lst[0].split("-")
                time_lst = td_lst[1].split(":")
                if "tomorrow" in sentence:
                    date_lst[2] = int(date_lst[2]) + 1
                elif "yesterday" in sentence:
                    date_lst[2] = int(date_lst[2]) - 1

                dt = datetime.datetime(int(date_lst[0]),int(date_lst[1]),int(date_lst[2]),int(time_lst[0]),int(time_lst[1]),int(time_lst[2]))
                p = [str(dt.year) + "-" + str(dt.month) + "-" + str(dt.day) +
                     " " + str(dt.hour) + ":" + str(dt.minute) + ":" + str(dt.second)]
                break
            else:
                p = "None"
        return p
    
    except ValueError:
        return "None"


### 2.2 New DateTime Extractor

In [237]:
# Keyword String Matcher

A = aho.Automaton()
datetime_keyword_lst =(':','today','tomorrow', 'tmrw','yesterday','am','a.m','a.m.','pm','p.m','p.m.','january','february','march','april','may','june','july','august','september','october','november','december')

for i in datetime_keyword_lst:
    A.add_word(i, i)
A.make_automaton()

In [281]:
def time_date_ext_t(sentence):
    
    datetime_results = []
    
    # keywords
    datetime_keyword_lst =(':','today','tomorrow','yesterday','am','a.m','a.m.','pm','p.m','p.m.','january','february','march','april','may','june','july','august','september','october','november','december')
    
    for keyword in datetime_keyword_lst:
        
        if keyword in sentence:
            # checking for tomorrow and yesterday in the sentence and adding a datetime object accordingly
            one_day = datetime.timedelta(days=1)
            if "tomorrow" in sentence:
                datetime_results.append(datetime.datetime.now() + one_day)
            elif "yesterday" in sentence:
                datetime_results.append(datetime.datetime.now() - one_day)
            
            # extracting datetime object from sentence using dateutil.parser.parse
            try:
                datetime_results.append(parse(sentence, fuzzy=True))
            except ValueError:
                if not datetime_results:
                    datetime_results = ["val None"]
            break
                    
    if datetime_results:
        return datetime_results
    else:
        return ["None"]
    

In [297]:
trial = ["Ill just do it on the 4th of march 3000", "I have a meeting at 5:00pm"]

In [298]:
# %%timeit
[time_date_ext(i) for i in trial]

[['3000-3-4 0:0:0'], ['2018-9-3 17:0:0']]

In [299]:
[time_date_ext_t(i) for i in trial]

[[datetime.datetime(3000, 3, 4, 0, 0)], [datetime.datetime(2018, 9, 3, 17, 0)]]

# 3. Phone Number Extractor

In [242]:
def phone_num_ext(sentence):
    ''' Find Phone Numbers '''
    reg = re.compile(".*?(\(?\d{3}\D{0,3}\d{3}\D{0,3}\d{4}).*?", re.S)
    num = reg.findall(sentence)
    return [re.sub(r'[^\w\d]', '', i) for i in num]
    

In [243]:
ph_trial = "I am earning $70,000,0000 dollars but i can give you some if you call at 1 (800) 424-6888"

In [244]:
phone_num_ext(ph_trial)

['8004246888']