### Tokenizer

In [1]:
import spacy


nlp = spacy.load("en_core_web_sm")

### Pretty print token table for debugging

In [2]:
from tabulate import tabulate


def print_token_table(sentence, pos=False, tag=True, dependency=True, lemma=False):
    """Pretty print the linguistics features of each word in a sentence.
    If pos is True, then print the part-of-speech (POS). Defaults to True.
    If tag is True, then print the tag. Defaults to True.
    If dependency is True, then print the dependencies. Defaults to True.

    Given a string, return None.
    Depends on tabulate.
    """

    # Print the sentence
    print(sentence + "\n")

    # Create the table headers
    headers = []
    headers.append("Word")
    if pos:
        headers.append("POS")
        headers.append("POS Definition")
    if tag:
        headers.append("Tag")
        headers.append("Tag Definition")
    if dependency:
        headers.append("Dep.")
        headers.append("Dep. Definition")
    if lemma:
        headers.append("Lemma.")

    # Create the table data
    tagged_words = nlp(sentence)
    data = []
    for word in tagged_words:
        entry = []
        entry.append(word.text)
        if pos:
            entry.append(word.pos_)
            entry.append(spacy.explain(word.pos_))
        if tag:
            entry.append(word.tag_)
            entry.append(spacy.explain(word.tag_))
        if dependency:
            entry.append(word.dep_)
            entry.append(spacy.explain(word.dep_))
        if lemma:
            entry.append(word.lemma_)
        data.append(entry)

    # Print the table
    print(tabulate(data, headers=headers, tablefmt="github") + "\n\n")

### Verb tense patterns

In [3]:
from enum import Enum


class Tense(Enum):
    PRESENT_SIMPLE = "present simple"
    PRESENT_SIMPLE_3 = "present simple 3rd"
    PAST_SIMPLE = "past simple"
    FUTURE_SIMPLE = "future simple"
    FUTURE_SIMPLE_WILL = "future simple will"
    FUTURE_SIMPLE_BE_GOING_TO = "future simple be-going-to"
    FUTURE_SIMPLE_BE_GOING_TO_3 = "future simple be-going-to 3rd"
    PRESENT_CONT = "present cont"
    PRESENT_CONT_3 = "present cont 3rd"
    PAST_CONT = "past cont"
    FUTURE_CONT = "future cont"
    PRESENT_PERF = "present perf"
    PRESENT_PERF_3 = "present perf 3rd"
    PAST_PERF = "past perf"
    FUTURE_PERF = "future perf"
    PRESENT_PERF_CONT = "present perf cont"
    PRESENT_PERF_CONT_3 = "present perf cont 3rd"
    PAST_PERF_CONT = "past perf cont"
    FUTURE_PERF_CONT = "future perf cont"


class Pattern:
    name = "untitled"
    tokens = []

    def __init__(self, name, tokens):
        self.name = name
        self.tokens = tokens
        
        
class PatternSet:
    name = "untitled"
    patterns = None

    def __init__(self, name):
        self.name = name
        self.patterns = {}

    # Always 1:1
    def create(self, name, tokens):
        self.patterns[name] = Pattern(name, tokens)

    def find(self, name):
        return self.patterns[name].tokens

    def find_all(self):
        return [self.patterns[key] for key in self.patterns]

In [4]:
pattern_set = PatternSet("verb tenses")

# Simple
pattern_set.create(Tense.PRESENT_SIMPLE.value, [
    { "TAG": "VBP", "DEP": "ROOT", "OP": "+" },
])
pattern_set.create(Tense.PRESENT_SIMPLE_3.value, [
    { "TAG": "VBZ", "DEP": "ROOT", "OP": "+" },
])
pattern_set.create(Tense.PAST_SIMPLE.value, [
    { "TAG": "VBD", "DEP": "ROOT", "OP": "+" },
])
pattern_set.create(Tense.FUTURE_SIMPLE_WILL.value, [
    { "TAG": "MD", "DEP": "aux", "OP": "+", "LOWER": "will" },
    { "TAG": "VB", "DEP": "ROOT", "OP": "+" }
])
pattern_set.create(Tense.FUTURE_SIMPLE_BE_GOING_TO.value, [
    { "TAG": "VBP", "DEP": "aux", "OP": "+" },
    { "TAG": "VBG", "DEP": "ROOT", "OP": "+", "LEMMA": "go" },
    { "TAG": "TO", "DEP": "aux", "OP": "+" },
    { "TAG": "VB", "DEP": "xcomp", "OP": "+" },
])
pattern_set.create(Tense.FUTURE_SIMPLE_BE_GOING_TO_3.value, [
    { "TAG": "VBZ", "DEP": "aux", "OP": "+" },
    { "TAG": "VBG", "DEP": "ROOT", "OP": "+", "LEMMA": "go" },
    { "TAG": "TO", "DEP": "aux", "OP": "+" },
    { "TAG": "VB", "DEP": "xcomp", "OP": "+" },
])

# Continuous/progressive
# TODO fix tokens 3-4
pattern_set.create(Tense.PRESENT_CONT.value, [
    { "TAG": "VBP", "DEP": "aux", "OP": "+" },
    { "TAG": "VBG", "DEP": "ROOT", "OP": "+" },
    { "TAG": "TO", "DEP": "aux", "OP": "?" },
    { "TAG": "VB", "DEP": "xcomp", "OP": "!" },
])
# TODO fix tokens 3-4
pattern_set.create(Tense.PRESENT_CONT_3.value, [
    { "TAG": "VBZ", "DEP": "aux", "OP": "+" },
    { "TAG": "VBG", "DEP": "ROOT", "OP": "+" },
    { "TAG": "TO", "DEP": "aux", "OP": "?" },
    { "TAG": "VB", "DEP": "xcomp", "OP": "!" },
])
pattern_set.create(Tense.PAST_CONT.value, [
    { "TAG": "VBD", "DEP": "aux", "OP": "+" },
    { "TAG": "VBG", "DEP": "ROOT", "OP": "+" },
])
pattern_set.create(Tense.FUTURE_CONT.value, [
    { "TAG": "MD", "DEP": "aux", "OP": "+", "LEMMA": "will" },        
    { "TAG": "VB", "DEP": "aux", "OP": "+", "LEMMA": "be" },
    { "TAG": "VBG", "DEP": "ROOT", "OP": "+" },
])

# Perfect
pattern_set.create(Tense.PRESENT_PERF.value, [
    { "TAG": "VBP", "DEP": "aux", "OP": "+", "LEMMA": "have"  },
    { "TAG": "VBN", "DEP": "ROOT", "OP": "+" },
])
pattern_set.create(Tense.PRESENT_PERF_3.value, [
    { "TAG": "VBZ", "DEP": "aux", "OP": "+", "LEMMA": "have" },
    { "TAG": "VBN", "DEP": "ROOT", "OP": "+" },
])
pattern_set.create(Tense.PAST_PERF.value, [
    { "TAG": "VBD", "DEP": "aux", "OP": "+", "LEMMA": "have" },
    { "TAG": "VBN", "DEP": "ROOT", "OP": "+" },
])
pattern_set.create(Tense.FUTURE_PERF.value, [
    { "TAG": "MD", "DEP": "aux", "OP": "+", "LEMMA": "will" },        
    { "TAG": "VB", "DEP": "aux", "OP": "+" },
    { "TAG": "VBN", "DEP": "ROOT", "OP": "+" },
])

# Perfect continuous/progressive
pattern_set.create(Tense.PRESENT_PERF_CONT.value, [
    { "TAG": "VBP", "DEP": "aux", "OP": "+", "LEMMA": "have"  },
    { "TAG": "VBN", "DEP": "aux", "OP": "+" },
    { "TAG": "VBG", "DEP": "ROOT", "OP": "+" },
])
pattern_set.create(Tense.PRESENT_PERF_CONT_3.value, [
    { "TAG": "VBZ", "DEP": "aux", "OP": "+", "LEMMA": "have"  },
    { "TAG": "VBN", "DEP": "aux", "OP": "+", "LEMMA": "be"  },
    { "TAG": "VBG", "DEP": "ROOT", "OP": "+" },
])
pattern_set.create(Tense.PAST_PERF_CONT.value, [
    { "TAG": "VBD", "DEP": "aux", "OP": "+", "LEMMA": "have"  },
    { "TAG": "VBN", "DEP": "aux", "OP": "+", "LEMMA": "be"  },
    { "TAG": "VBG", "DEP": "ROOT", "OP": "+" },
])
pattern_set.create(Tense.FUTURE_PERF_CONT.value, [
    { "TAG": "MD", "DEP": "aux", "OP": "+", "LEMMA": "will" },        
    { "TAG": "VB", "DEP": "aux", "OP": "+", "LEMMA": "have"  },
    { "TAG": "VBN", "DEP": "aux", "OP": "+", "LEMMA": "be" },
    { "TAG": "VBG", "DEP": "ROOT", "OP": "+" },
])

### Verb tense matcher

In [5]:
from spacy.matcher import Matcher


def create_verb_tense_matcher(nlp):
    matcher = Matcher(nlp.vocab)

    # Simple
    matcher.add(Tense.PRESENT_SIMPLE.value, [
        pattern_set.find(Tense.PRESENT_SIMPLE.value),
        pattern_set.find(Tense.PRESENT_SIMPLE_3.value),
    ])
    matcher.add(Tense.PAST_SIMPLE.value, [
        pattern_set.find(Tense.PAST_SIMPLE.value),
    ])
    matcher.add(Tense.FUTURE_SIMPLE.value, [
        pattern_set.find(Tense.FUTURE_SIMPLE_WILL.value),
        pattern_set.find(Tense.FUTURE_SIMPLE_BE_GOING_TO.value),
        pattern_set.find(Tense.FUTURE_SIMPLE_BE_GOING_TO_3.value),
    ])
    return matcher

#     # Continuous
#     matcher.add(Tense.PRESENT_CONT.value, [
#         pattern_set.find(Tense.PRESENT_CONT.value),
#         pattern_set.find(Tense.PRESENT_CONT_3.value),
#     ])
#     matcher.add(Tense.PAST_CONT.value, [
#         pattern_set.find(Tense.PAST_CONT.value),
#     ])
#     matcher.add(Tense.FUTURE_CONT.value, [
#         pattern_set.find(Tense.FUTURE_CONT.value),
#     ])

#     # Perfect
#     matcher.add(Tense.PRESENT_PERF.value, [
#         pattern_set.find(Tense.PRESENT_PERF.value),
#         pattern_set.find(Tense.PRESENT_PERF_3.value),
#     ])
#     matcher.add(Tense.PAST_PERF.value, [
#         pattern_set.find(Tense.PAST_PERF.value),
#     ])
#     matcher.add(Tense.FUTURE_PERF.value, [
#         pattern_set.find(Tense.FUTURE_PERF.value),
#     ])

#     # Perfect continuous
#     matcher.add(Tense.PRESENT_PERF_CONT.value, [
#         pattern_set.find(Tense.PRESENT_PERF_CONT.value),
#         pattern_set.find(Tense.PRESENT_PERF_CONT_3.value),
#     ])
#     matcher.add(Tense.PAST_PERF_CONT.value, [
#         pattern_set.find(Tense.PAST_PERF_CONT.value),
#     ])
#     matcher.add(Tense.FUTURE_PERF_CONT.value, [
#         pattern_set.find(Tense.FUTURE_PERF_CONT),
#     ])
#     return matcher



# def create_verb_tense_matcher(nlp):
#     matcher = Matcher(nlp.vocab)

#     # Simple
#     matcher.add(Tense.PRESENT_SIMPLE.value, [
#         pattern_set.find(Tense.PRESENT_SIMPLE.value),
#         pattern_set.find(Tense.PRESENT_SIMPLE_3.value),
#     ])
#     matcher.add(Tense.PAST_SIMPLE.value, [
#         pattern_set.find(Tense.PAST_SIMPLE.value),
#     ])
#     matcher.add(Tense.FUTURE_SIMPLE.value, [
#         pattern_set.find(Tense.FUTURE_SIMPLE_WILL.value),
#         pattern_set.find(Tense.FUTURE_SIMPLE_BE_GOING_TO.value),
#         pattern_set.find(Tense.FUTURE_SIMPLE_BE_GOING_TO_3.value),
#     ])

#     # Continuous
#     matcher.add(Tense.PRESENT_CONT, [
#         pattern_set.find(Tense.PRESENT_CONT),
#         pattern_set.find(Tense.PRESENT_CONT_3),
#     ])
#     matcher.add(Tense.PAST_CONT, [
#         pattern_set.find(Tense.PAST_CONT),
#     ])
#     matcher.add(Tense.FUTURE_CONT, [
#         pattern_set.find(Tense.FUTURE_CONT),
#     ])

#     # Perfect
#     matcher.add(Tense.PRESENT_PERF, [
#         pattern_set.find(Tense.PRESENT_PERF),
#         pattern_set.find(Tense.PRESENT_PERF_3),
#     ])
#     matcher.add(Tense.PAST_PERF, [
#         pattern_set.find(Tense.PAST_PERF),
#     ])
#     matcher.add(Tense.FUTURE_PERF, [
#         pattern_set.find(Tense.FUTURE_PERF),
#     ])

#     # Perfect continuous
#     matcher.add(Tense.PRESENT_PERF_CONT, [
#         pattern_set.find(Tense.PRESENT_PERF_CONT),
#         pattern_set.find(Tense.PRESENT_PERF_CONT_3),
#     ])
#     matcher.add(Tense.PAST_PERF_CONT, [
#         pattern_set.find(Tense.PAST_PERF_CONT),
#     ])
#     matcher.add(Tense.FUTURE_PERF_CONT, [
#         pattern_set.find(Tense.FUTURE_PERF_CONT),
#     ])
#     return matcher


# class PatternSetMatcher(Matcher):
#     pattern_set = None

#     def __init__(self, vocab, pattern_set):
#         Matcher.__init__(self, vocab)
#         self.pattern_set = pattern_set

#     def __repr__(self):
#         return self.pattern_set.name

#     def add(self, rulename, rules):
#         hashed_rulename = hash(rulename)
#         super(PatternSetMatcher, self).add(hashed_rulename, rules)


# pattern_set_matcher = PatternSetMatcher(nlp.vocab, pattern_set)
# pattern_set_matcher.add(Tense.PRESENT_SIMPLE, [
#     pattern_set.find(Tense.PRESENT_SIMPLE),
#     pattern_set.find(Tense.PRESENT_SIMPLE_3),
# ])


def get_best_match(doc, matches):
    best_match_rulename = ""
    best_match_span = ""
    for (match_id, start, end) in matches:
        rulename = nlp.vocab.strings[match_id]
        span = doc[start:end]

        if len(best_match_span) < len(span):
            best_match_rulename = rulename
            best_match_span = span

### Detect the verb tense

In [6]:
def detect_verb_tense(sentence):
    """Return a sentence's verb and its tense.

    Given a string, return a tuple of (rulename, match_text)
    """

    if not isinstance(sentence, str):
        raise TypeError("determine_verb_tense arg[0] is not a string")

    if not bool(sentence):
        raise ValueError("determine_verb_tense arg[0] is not truthy")

    doc = nlp(sentence)
    matcher = create_verb_tense_matcher(nlp)
    matches = matcher(doc)

    if not matches:
        return None

    return get_best_match(doc, matches)


def print_sentence_and_tense(sentence):
    match = detect_verb_tense(sentence)
    rulename = "???"
    span = "???"

    if match:
        (rulename, span) = match

    print(f"{rulename:18} => {sentence}")

### Do it

In [7]:
sentences = []

# Simple present
sentences += [
    "I am silly.", 
    "You are silly.",
    "She is silly.",
    "This is silly.",
    "I go there.", 
    "You go there.",
    "She goes there.",
    "This goes there.",
]

# Simple past
sentences += [
    "I was silly.", 
    "You were silly.",
    "She was silly.",
    "This was silly.",
    "I went there.", 
    "You went there.",
    "She went there.",
    "This went there.",
]

# Simple future *will*
sentences += [
    "I will be silly.", 
    "You will be silly.",
    "She will be silly.",
    "This will be silly.",
    "I will go there.",
    "You will go there.",
    "She will go there.",
    "This will go there.",
]

# Simple future *be going to*
sentences += [
    "I am going to be silly.", 
    "You are going to be silly.",
    "She is going to be silly.",
    "This is going to be silly.",
    "I am going to go there.",
    "You are going to go there.",
    "She is going to go there.",
    "This is going to go there.",
]

# # Present continuous
# sentences += [
#     "I am being silly.", 
#     "You are being silly.",
#     "She is being silly.",
#     "This is being silly.",
#     "I am going there.",
#     "You are going there.",
#     "She is going there.",
#     "This is going there.",
# ]

# # Past continuous
# sentences += [
#     "I was being silly.", 
#     "You were being silly.",
#     "She was being silly.",
#     "This was being silly.",
#     "I was going there.",
#     "You were going there.",
#     "She was going there.",
#     "This was going there.",
# ]

# # Future continuous
# sentences += [
#     "I will be being silly.", 
#     "You will be being silly.",
#     "She will be being silly.",
#     "This will be being silly.",
#     "I will be going there.",
#     "You will be going there.",
#     "She will be going there.",
#     "This will be going there.",
# ]

# # Present perfect
# sentences += [
#     "I have been silly.",
#     "You have been silly.", 
#     "She has been silly.", 
#     "This has been silly.", 
#     "I have gone there.",
#     "You have gone there.",
#     "She has gone there.",
#     "This has gone there.",
# ]

# # Past perfect
# sentences += [
#     "I had been silly.",
#     "You had been silly.", 
#     "She had been silly.", 
#     "This had been silly.",
#     "I had gone there.",
#     "You had gone there.",
#     "She had gone there.",
#     "This had gone there.",
# ]

# # Future perfect
# sentences += [
#     "I will have been silly.",
#     "You will have been silly.", 
#     "She will have been silly.",
#     "This will have been silly.",
#     "I will have gone there.",
#     "You will have gone there.",
#     "She will have gone there.",
#     "This will have gone there.",
# ]

# # Present perfect continuous
# sentences += [
#     "I have been being silly.",
#     "You have been being silly.",
#     "She have been being silly.",
#     "This have been being silly.",
#     "I have been going there.",
#     "You have been going there.",
#     "She has been going there.",
#     "This has been going there.",
# ]

# # Past perfect continuous
# sentences += [
#     "I had been being silly.", 
#     "You had been being silly.",
#     "She had been being silly.",
#     "This had been being silly.",
#     "I had been going there.",
#     "You had been going there.",
#     "She had been going there.",
#     "This had been going there.",
# ]

# # Future perfect continuous
# sentences += [
#     "I will have been being silly.", 
#     "You will have been being silly.",
#     "She will have been being silly.",
#     "This will have been being silly.",
#     "I will have been going there.",
#     "You will have been going there.",
#     "She will have been going there.",
#     "This will have been going there.",
# ]


for sentence in sentences:
    print_sentence_and_tense(sentence)

???                => I am silly.
???                => You are silly.
???                => She is silly.
???                => This is silly.
???                => I go there.
???                => You go there.
???                => She goes there.
???                => This goes there.
???                => I was silly.
???                => You were silly.
???                => She was silly.
???                => This was silly.
???                => I went there.
???                => You went there.
???                => She went there.
???                => This went there.
???                => I will be silly.
???                => You will be silly.
???                => She will be silly.
???                => This will be silly.
???                => I will go there.
???                => You will go there.
???                => She will go there.
???                => This will go there.
???                => I am going to be silly.
???                => You are going to