From f00c62c230fe28bb5b498f41264db14c14ee31e5 Mon Sep 17 00:00:00 2001
From: MukundVarmaT <mukundvarmat@gmail.com>
Date: Fri, 16 Jul 2021 13:55:41 +0530
Subject: [PATCH 1/2] add tense tense transform

---
 transformations/tense/README.md         |  53 ++++++++
 transformations/tense/__init__.py       |   1 +
 transformations/tense/requirements.txt  |   1 +
 transformations/tense/test.json         |  75 +++++++++++
 transformations/tense/transformation.py | 167 ++++++++++++++++++++++++
 5 files changed, 297 insertions(+)
 create mode 100644 transformations/tense/README.md
 create mode 100644 transformations/tense/__init__.py
 create mode 100644 transformations/tense/requirements.txt
 create mode 100644 transformations/tense/test.json
 create mode 100644 transformations/tense/transformation.py

diff --git a/transformations/tense/README.md b/transformations/tense/README.md
new file mode 100644
index 000000000..ccd08d489
--- /dev/null
+++ b/transformations/tense/README.md
@@ -0,0 +1,53 @@
+# Tense Tranformation 🦎  + ⌨️ → 🐍
+This transformation converts sentences from one tense to the other, example: simple present to simple past. 
+
+Author name: Tanay Dixit, Mukund Varma T
+
+## What type of a transformation is this?
+
+In this transformation, we convert a sentence into the target tense based on a verb, subject conjugation. 
+This ensures that the context of the given sentence remains the same while the attribute of time changes. 
+
+The following are some representative examples:
+
+    Input: I can come to the party
+    Target Tense: past
+    Transformed Text: I can came to the party
+
+    Input: I went to the park
+    Target Tense: future
+    Transformed Text: I will go to the park
+
+    Input: I will go to the park.
+    Target Tense: present
+    Transformed Text: I go to the park.
+
+## What tasks does it intend to benefit?
+
+The task is designed to measure the capacity of language understanding in language models, specifically to understand the given tense of a sentence. 
+This task is nominally simple for humans, since we have an understanding of time / a sequence of events but is difficult for a language model as they do not have any prior information about time. 
+There have been a couple of attempts to perform controlled attribute text transformation (Logeswaran et. al) but is yet to be seen on language models trained in a general setting.  
+
+## Citations
+
+```bibtex
+@article{DBLP:journals/corr/abs-1811-01135,
+    author    = {Lajanugen Logeswaran and
+                Honglak Lee and
+                Samy Bengio},
+    title     = {Content preserving text generation with attribute controls},
+    journal   = {CoRR},
+    volume    = {abs/1811.01135},
+    year      = {2018},
+    url       = {http://arxiv.org/abs/1811.01135},
+    archivePrefix = {arXiv},
+    eprint    = {1811.01135},
+    timestamp = {Thu, 22 Nov 2018 17:58:30 +0100},
+    biburl    = {https://dblp.org/rec/journals/corr/abs-1811-01135.bib},
+    bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+```
+
+## What are the limitations of this transformation?
+
+The transformation is not robust to all complex cases and is limited to only simple past/present/future tense conversions.
diff --git a/transformations/tense/__init__.py b/transformations/tense/__init__.py
new file mode 100644
index 000000000..0a79241bb
--- /dev/null
+++ b/transformations/tense/__init__.py
@@ -0,0 +1 @@
+from .transformation import *
\ No newline at end of file
diff --git a/transformations/tense/requirements.txt b/transformations/tense/requirements.txt
new file mode 100644
index 000000000..2a1eb2862
--- /dev/null
+++ b/transformations/tense/requirements.txt
@@ -0,0 +1 @@
+pattern @ git+https://github.com/tanay2001/pattern.git
\ No newline at end of file
diff --git a/transformations/tense/test.json b/transformations/tense/test.json
new file mode 100644
index 000000000..d0cf429b0
--- /dev/null
+++ b/transformations/tense/test.json
@@ -0,0 +1,75 @@
+{
+   "type": "tense_transformation",
+   "test_cases": [
+      {
+         "class": "TenseTransformation",
+         "args": {
+            "to_tense": "past"
+         },
+         "inputs": {
+            "sentence": "I will go to the park."
+         },
+         "outputs": [
+            {
+               "sentence": "I went to the park."
+            }
+         ]
+      },
+      {
+         "class": "TenseTransformation",
+         "args": {
+            "to_tense": "past"
+         },
+         "inputs": {
+            "sentence": "I can come to the party"
+         },
+         "outputs": [
+            {
+               "sentence": "I can came to the party"
+            }
+         ]
+      },
+      {
+         "class": "TenseTransformation",
+         "args": {
+            "to_tense": "past"
+         },
+         "inputs": {
+            "sentence": "I will go to the park"
+         },
+         "outputs": [
+            {
+               "sentence": "I went to the park"
+            }
+         ]
+      },
+      {
+         "class": "TenseTransformation",
+         "args": {
+            "to_tense": "past"
+         },
+         "inputs": {
+            "sentence": "I go to the park."
+         },
+         "outputs": [
+            {
+               "sentence": "I went to the park."
+            }
+         ]
+      },
+      {
+         "class": "TenseTransformation",
+         "args": {
+            "to_tense": "past"
+         },
+         "inputs": {
+            "sentence": "I visit the hospital"
+         },
+         "outputs": [
+            {
+               "sentence": "I visited the hospital"
+            }
+         ]
+      }
+   ]
+}
\ No newline at end of file
diff --git a/transformations/tense/transformation.py b/transformations/tense/transformation.py
new file mode 100644
index 000000000..d11c30268
--- /dev/null
+++ b/transformations/tense/transformation.py
@@ -0,0 +1,167 @@
+from interfaces.SentenceOperation import SentenceOperation
+from tasks.TaskTypes import TaskType
+import string
+from pattern.en import conjugate, PAST, PRESENT, SINGULAR, PLURAL
+import spacy
+from spacy.symbols import NOUN
+import random
+
+SUBJ_DEPS = {'agent', 'csubj', 'csubjpass', 'expl', 'nsubj', 'nsubjpass'}
+
+def _get_conjuncts(tok):
+    """
+    Return conjunct dependents of the leftmost conjunct in a coordinated phrase,
+    e.g. "Burton, [Dan], and [Josh] ...".
+    """
+    return [right for right in tok.rights
+            if right.dep_ == 'conj']
+
+
+def is_plural_noun(token):
+    """
+    Returns True if token is a plural noun, False otherwise.
+    Args:
+        token (``spacy.Token``): parent document must have POS information
+    Returns:
+        bool
+    """
+    if token.doc.is_tagged is False:
+        raise ValueError('token is not POS-tagged')
+    return True if token.pos == NOUN and token.lemma != token.lower else False
+
+
+def get_subjects_of_verb(verb):
+    if verb.dep_ == "aux" and list(verb.ancestors):
+        return get_subjects_of_verb(list(verb.ancestors)[0])
+    """Return all subjects of a verb according to the dependency parse."""
+    subjs = [tok for tok in verb.lefts if tok.dep_ in SUBJ_DEPS]
+    # get additional conjunct subjects
+    subjs.extend(tok for subj in subjs for tok in _get_conjuncts(subj))
+    if not len(subjs):
+        ancestors = list(verb.ancestors)
+        if len(ancestors) > 0:
+            return get_subjects_of_verb(ancestors[0])
+    return subjs
+
+
+def is_plural_verb(token):
+    if token.doc.is_tagged is False:
+        raise ValueError('token is not POS-tagged')
+    subjects = get_subjects_of_verb(token)
+    if not len(subjects):
+        return False
+    plural_score = sum([is_plural_noun(x) for x in subjects])/len(subjects)
+
+    return plural_score > .5
+
+def preserve_caps(word, newWord):
+    """Returns newWord, capitalizing it if word is capitalized."""
+    if word[0] >= 'A' and word[0] <= 'Z':
+        newWord = newWord.capitalize()
+    return newWord
+
+'''
+change tense function borrowed from https://github.com/bendichter/tenseflow/blob/master/tenseflow/change_tense.py
+'''
+
+class TenseTransformation(SentenceOperation):
+    tasks = [
+        TaskType.TEXT_CLASSIFICATION,
+        TaskType.TEXT_TO_TEXT_GENERATION,
+        TaskType.TEXT_TAGGING,
+    ]
+    languages = ["en"]
+
+    def __init__(self, to_tense):
+        super().__init__()
+        assert to_tense in ['past', 'present', 'future', 'random']
+        self.to_tense = to_tense
+        self.nlp = spacy.load('en_core_web_sm')
+
+    def change_tense(self, text, to_tense):
+        """Change the tense of text.
+        Args:
+            text (str): text to change.
+            to_tense (str): 'present','past', or 'future'
+            npl (SpaCy model, optional):
+        Returns:
+            str: changed text.
+        """
+        tense_lookup = {'future': 'inf', 'present': PRESENT, 'past': PAST}
+        tense = tense_lookup[to_tense]
+
+        doc = self.nlp(text)
+
+        out = list()
+        out.append(doc[0].text)
+        words = []
+        for word in doc:
+            words.append(word)
+            if len(words) == 1:
+                continue
+            if (words[-2].text == 'will' and words[-2].tag_ == 'MD' and words[-1].tag_ == 'VB') or \
+                words[-1].tag_ in ('VBD', 'VBP', 'VBZ', 'VBN') or \
+                (not words[-2].text in ('to', 'not') and words[-1].tag_ == 'VB'):
+
+                if words[-2].text in ('were', 'am', 'is', 'are', 'was') or \
+                    (words[-2].text == 'be' and len(words) > 2 and words[-3].text == 'will'):
+                    this_tense = tense_lookup['past']
+                else:
+                    this_tense = tense
+
+                subjects = [x.text for x in get_subjects_of_verb(words[-1])]
+                if ('I' in subjects) or ('we' in subjects) or ('We' in subjects):
+                    person = 1
+                elif ('you' in subjects) or ('You' in subjects):
+                    person = 2
+                else:
+                    person = 3
+                if is_plural_verb(words[-1]):
+                    number = PLURAL
+                else:
+                    number = SINGULAR
+                if (words[-2].text == 'will' and words[-2].tag_ == 'MD') or words[-2].text == 'had':
+                    out.pop(-1)
+                if to_tense == 'future':
+                    if not (out[-1] == 'will' or out[-1] == 'be'):
+                        out.append('will')
+                    # handle will as a noun in future tense
+                    if words[-2].text == 'will' and words[-2].tag_ == 'NN':
+                        out.append('will')
+                oldWord = words[-1].text
+                out.append(preserve_caps(oldWord, conjugate(oldWord, tense=this_tense, person=person, number=number)))
+            else:
+                out.append(words[-1].text)
+
+            # negation
+            if words[-2].text + words[-1].text in ('didnot', 'donot', 'willnot', "didn't", "don't", "won't"):
+                if tense == PAST:
+                    out[-2] = 'did'
+                elif tense == PRESENT:
+                    out[-2] = 'do'
+                else:
+                    out.pop(-2)
+
+            # future perfect
+            if words[-1].text in ('have', 'has') and len(list(words[-1].ancestors)) and words[-1].dep_ == 'aux':
+                out.pop(-1)
+
+        text_out = ' '.join(out)
+
+        # Remove spaces before/after punctuation:
+        for char in string.punctuation:
+            if char in """(<['""":
+                text_out = text_out.replace(char+' ', char)
+            else:
+                text_out = text_out.replace(' '+char, char)
+
+        for char in ["-", "“", "‘"]:
+            text_out = text_out.replace(char+' ', char)
+        for char in ["…", "”", "'s", "n't"]:
+            text_out = text_out.replace(' '+char, char)
+
+        return text_out
+
+    def generate(self, sentence: str): 
+        perturbed_texts = self.change_tense(sentence, to_tense = random.choice(['past', 'present', 'future']) if self.to_tense == 'random' else self.to_tense)
+        return [perturbed_texts]
\ No newline at end of file

From eca93fdfe6ae9b4cf6213fdf7b3b89bafd34d860 Mon Sep 17 00:00:00 2001
From: tanay2001 <dixittanay@gmail.com>
Date: Wed, 28 Jul 2021 21:56:14 +0530
Subject: [PATCH 2/2] addressed issues

---
 transformations/tense/README.md         | 10 ++++--
 transformations/tense/test.json         | 42 +++++++++++++++++++++++++
 transformations/tense/transformation.py | 13 +++++---
 3 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/transformations/tense/README.md b/transformations/tense/README.md
index ccd08d489..ecec749dc 100644
--- a/transformations/tense/README.md
+++ b/transformations/tense/README.md
@@ -10,9 +10,9 @@ This ensures that the context of the given sentence remains the same while the a
 
 The following are some representative examples:
 
-    Input: I can come to the party
+    Input: My father goes to gym every day
     Target Tense: past
-    Transformed Text: I can came to the party
+    Transformed Text: My father went to gym every day
 
     Input: I went to the park
     Target Tense: future
@@ -47,7 +47,13 @@ There have been a couple of attempts to perform controlled attribute text transf
     bibsource = {dblp computer science bibliography, https://dblp.org}
 }
 ```
+### Data and Source Code
+change tense and verb infliction borrowed from https://github.com/bendichter/tenseflow
 
 ## What are the limitations of this transformation?
 
 The transformation is not robust to all complex cases and is limited to only simple past/present/future tense conversions.
+Examples where it fails: <br>
+Input: I will go for dinner after I am done playing tennis.
+to_tense: past
+Output: I went for dinner after I was did playing tennis.
\ No newline at end of file
diff --git a/transformations/tense/test.json b/transformations/tense/test.json
index d0cf429b0..8651468b8 100644
--- a/transformations/tense/test.json
+++ b/transformations/tense/test.json
@@ -15,6 +15,20 @@
             }
          ]
       },
+      {
+         "class": "TenseTransformation",
+         "args": {
+            "to_tense": "past"
+         },
+         "inputs": {
+            "sentence": "It smells very delicious in the kitchen, what are you cooking?"
+         },
+         "outputs": [
+            {
+               "sentence": "It smelt very delicious in the kitchen, what were you cooking?"
+            }
+         ]
+      },
       {
          "class": "TenseTransformation",
          "args": {
@@ -70,6 +84,34 @@
                "sentence": "I visited the hospital"
             }
          ]
+      },
+      {
+         "class": "TenseTransformation",
+         "args": {
+            "to_tense": "past"
+         },
+         "inputs": {
+            "sentence": "I will go for dinner after I am done playing tennis"
+         },
+         "outputs": [
+            {
+               "sentence": "I went for dinner after I was did playing tennis"
+            }
+         ]
+      },
+      {
+         "class": "TenseTransformation",
+         "args": {
+            "to_tense": "past"
+         },
+         "inputs": {
+            "sentence": "My father goes to gym every day"
+         },
+         "outputs": [
+            {
+               "sentence": "My father went to gym every day"
+            }
+         ]
       }
    ]
 }
\ No newline at end of file
diff --git a/transformations/tense/transformation.py b/transformations/tense/transformation.py
index d11c30268..b22630287 100644
--- a/transformations/tense/transformation.py
+++ b/transformations/tense/transformation.py
@@ -5,6 +5,7 @@
 import spacy
 from spacy.symbols import NOUN
 import random
+from initialize import spacy_nlp
 
 SUBJ_DEPS = {'agent', 'csubj', 'csubjpass', 'expl', 'nsubj', 'nsubjpass'}
 
@@ -67,8 +68,7 @@ def preserve_caps(word, newWord):
 class TenseTransformation(SentenceOperation):
     tasks = [
         TaskType.TEXT_CLASSIFICATION,
-        TaskType.TEXT_TO_TEXT_GENERATION,
-        TaskType.TEXT_TAGGING,
+        TaskType.TEXT_TO_TEXT_GENERATION
     ]
     languages = ["en"]
 
@@ -76,7 +76,7 @@ def __init__(self, to_tense):
         super().__init__()
         assert to_tense in ['past', 'present', 'future', 'random']
         self.to_tense = to_tense
-        self.nlp = spacy.load('en_core_web_sm')
+        self.nlp = spacy_nlp if spacy_nlp else spacy.load("en_core_web_sm")
 
     def change_tense(self, text, to_tense):
         """Change the tense of text.
@@ -91,7 +91,7 @@ def change_tense(self, text, to_tense):
         tense = tense_lookup[to_tense]
 
         doc = self.nlp(text)
-
+        print(doc[0], doc)
         out = list()
         out.append(doc[0].text)
         words = []
@@ -163,5 +163,8 @@ def change_tense(self, text, to_tense):
         return text_out
 
     def generate(self, sentence: str): 
+        """
+        takes in a input sentence and transforms it's tense to the target tense
+        """
         perturbed_texts = self.change_tense(sentence, to_tense = random.choice(['past', 'present', 'future']) if self.to_tense == 'random' else self.to_tense)
-        return [perturbed_texts]
\ No newline at end of file
+        return [perturbed_texts]