#### RULE BASED APPROCH TO A BASE CASE PROBLEM - CONVERT PRESENT CONTINOUS TENSE TO SIMPLE PRESENT TENSE AND FURTHER ENHANCING IT TO OTHER TENSES ASWELL

##### Import Libraries

In [31]:
import nltk
from nltk.tokenize import word_tokenize as wt
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
lem = WordNetLemmatizer()

[nltk_data] Downloading package wordnet to /home/nlplab3/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


#### Tokenising the sentences

In [32]:
cont_sent = 'Harry Potter is coming to Hogwarts'
cont_sent_token = wt(cont_sent)

In [33]:
print(cont_sent_token)

['Harry', 'Potter', 'is', 'coming', 'to', 'Hogwarts']


### Checking the POS Tags

In [34]:
cont_sent_tag = nltk.pos_tag(cont_sent_token)

In [35]:
print(cont_sent_tag)

[('Harry', 'NNP'), ('Potter', 'NNP'), ('is', 'VBZ'), ('coming', 'VBG'), ('to', 'TO'), ('Hogwarts', 'VB')]


In [36]:
nltk.download('tagsets')

[nltk_data] Downloading package tagsets to /home/nlplab3/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!


True

In [37]:
nltk.help.upenn_tagset('NNP')

NNP: noun, proper, singular
    Motown Venneboerger Czestochwa Ranzer Conchita Trumplane Christos
    Oceanside Escobar Kreisler Sawyer Cougar Yvette Ervin ODI Darryl CTCA
    Shannon A.K.C. Meltex Liverpool ...


In [38]:
nltk.help.upenn_tagset('VBZ')

VBZ: verb, present tense, 3rd person singular
    bases reconstructs marks mixes displeases seals carps weaves snatches
    slumps stretches authorizes smolders pictures emerges stockpiles
    seduces fizzes uses bolsters slaps speaks pleads ...


In [39]:
nltk.help.upenn_tagset('VBG')

VBG: verb, present participle or gerund
    telegraphing stirring focusing angering judging stalling lactating
    hankerin' alleging veering capping approaching traveling besieging
    encrypting interrupting erasing wincing ...


In [40]:
nltk.help.upenn_tagset('TO')

TO: "to" as preposition or infinitive marker
    to


In [41]:
nltk.help.upenn_tagset('VB')

VB: verb, base form
    ask assemble assess assign assume atone attention avoid bake balkanize
    bank begin behold believe bend benefit bevel beware bless boil bomb
    boost brace break bring broil brush build ...


### Lemmatizing the verbs

In [42]:
lem.lemmatize("are", pos="v")

'be'

In [43]:
def verb_to_wordnet(verb_tag):
    if verb_tag.startswith('V') or verb_tag.startswith('JJ'):
        return 'v'

In [44]:
cont_sent_lem = []
for i in cont_sent_tag:
    wordnet_verb = verb_to_wordnet(i[1])
    
    if wordnet_verb is not None:
        cont_sent_lem.append(lem.lemmatize(i[0], wordnet_verb))
    else:
        cont_sent_lem.append(i[0])



In [45]:
print(cont_sent_lem)

['Harry', 'Potter', 'be', 'come', 'to', 'Hogwarts']


### Dependency Parsing approach - either syntaction parsing or semantic parsing 

In [46]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [47]:
from spacy import displacy
doc = nlp('Harry Potter is coming to Hogwarts')
displacy.render(doc,style="dep")

In [48]:
for token in doc:
    print(token.i, token, token.dep_, token.head.i, token.head)

0 Harry compound 1 Potter
1 Potter nsubj 3 coming
2 is aux 3 coming
3 coming ROOT 3 coming
4 to prep 3 coming
5 Hogwarts pobj 4 to


In [49]:
from spacy import displacy

In [50]:
displacy.render(doc, style='dep', options={'compact': True})

In [51]:
spacy.explain('pobj')

'object of preposition'

In [52]:
from beautifultable import BeautifulTable

In [53]:
table = BeautifulTable()

In [54]:
table.columns.header = ['text','POS','TAG','Explain_tag','Dep','Shape','is_alpha','is_stop']
for token in doc:
    table.rows.append([token.text,token.pos_,token.tag_,spacy.explain(token.tag_),token.dep_,token.shape_,token.is_alpha,token.is_stop])

In [55]:
print(table)

+------+---------+-----+--------------------------+------+-------+------+------+
| text |   POS   | TAG |       Explain_tag        | Dep  | Shape | is_a | is_s |
|      |         |     |                          |      |       | lpha | top  |
+------+---------+-----+--------------------------+------+-------+------+------+
| Harr |  PROPN  | NNP |  noun, proper singular   | comp | Xxxxx |  1   |  0   |
|  y   |         |     |                          | ound |       |      |      |
+------+---------+-----+--------------------------+------+-------+------+------+
| Pott |  PROPN  | NNP |  noun, proper singular   | nsub | Xxxxx |  1   |  0   |
|  er  |         |     |                          |  j   |       |      |      |
+------+---------+-----+--------------------------+------+-------+------+------+
|  is  |   AUX   | VBZ | verb, 3rd person singula | aux  |  xx   |  1   |  1   |
|      |         |     |        r present         |      |       |      |      |
+------+---------+-----+----

#### Correcting singularity and plurality of verbs according to the subject

In [56]:
def correct_singular_plural(cont_sent_tag, cont_sent_lem):
    index = 0
    for i in range(len(cont_sent_lem)):
        if cont_sent_lem[i] == 'be':
            index = i
            if 'not' in cont_sent_lem:
                pos_neg = 'neg'
            else:
                pos_neg = 'pos'
        else:
            pos_neg = 'pos' if cont_sent_tag[index - 1][1] in ['NN', 'NNP', 'JJ'] or cont_sent_tag[index - 1][0].capitalize() in ['He', 'She', 'It'] else 'neg'
            
    if pos_neg == 'pos':
        if cont_sent_lem[index + 1][-1] in ['o', 's', 'z'] or cont_sent_lem[index + 1][-2:] in ['ch', 'sh', 'x']:
            cont_sent_lem[index + 1] += 'es'
        elif cont_sent_lem[index + 1][-1] == 'y':
            if cont_sent_lem[index + 1][-2] not in ['a', 'e', 'i', 'o', 'u']:
                cont_sent_lem[index + 1] = cont_sent_lem[index + 1][:-1] + 'ies'
            else:
                cont_sent_lem[index + 1] += 's'
        elif cont_sent_lem[index + 1] == 'have':
            cont_sent_lem[index + 1] = 'has'
        else:
            cont_sent_lem[index + 1] += 's'
        cont_sent_lem.remove('be')
    else:
        cont_sent_lem = ['does' if word == 'be' else word for word in cont_sent_lem]
        
    return cont_sent_lem


In [57]:
def prescont_to_simpress(prescont):
    
    # Tokenizing
    cont_sent_token = wt(prescont)
    cont_sent_tag = nltk.pos_tag(cont_sent_token)
    
    # Lemmatizing
    cont_sent_lem = []
    for i in cont_sent_tag:
        wordnet_verb = verb_to_wordnet(i[1])
    
        if wordnet_verb is not None:
            cont_sent_lem.append(lem.lemmatize(i[0], wordnet_verb))
        else:
            cont_sent_lem.append(i[0])
       
    # Correcting singular and plural
    cont_sent_lem = correct_singular_plural(cont_sent_tag, cont_sent_lem)
    
    #  tense form
    print(' '.join(cont_sent_lem))


In [102]:
from spacy import displacy
doc = nlp('She is studying for the exam')
displacy.render(doc,style="dep")

In [103]:
prescont_to_simpress("She is studying for the exam")

She studies for the exam


In [106]:
prescont_to_simpress("An Indian is arriving in India")
prescont_to_simpress("South Africans are winning the match")
prescont_to_simpress("He is waiting for the bus")
prescont_to_simpress("The king is kissing the queen gently")
prescont_to_simpress("Bees are buzzing around the honeycomb")
prescont_to_simpress("Parrots are flying around the village peacefully")
prescont_to_simpress("A carpenter is building a bookcase")
prescont_to_simpress("We are enjoying ourselves")
prescont_to_simpress("The pianist is playing Fur Elise softly")
prescont_to_simpress("Flowers are blooming in the morning")
prescont_to_simpress("A tree is falling down in the storm")
prescont_to_simpress("My cousin is going to school on a bicycle")

An Indian arrives in India
South Africans does win the match
He waits for the bus
The king kisses the queen gently
Bees does buzz around the honeycomb
Parrots does fly around the village peacefully
A carpenter builds a bookcase
We does enjoy ourselves
The pianist plays Fur Elise softly
Flowers does bloom in the morning
A tree falls down in the storm
My cousin goes to school on a bicycle


In [104]:
prescont_to_simpress("John is not watching TV")


John nots watch TV


In [124]:
import spacy

# Load the SpaCy English model
nlp = spacy.load("en_core_web_sm")

def identify_tense(sentence):
    # Parse the sentence using SpaCy
    doc = nlp(sentence)
    
    # Extract the tense from the parsed sentence
    tense = None
    for token in doc:
        if token.tag_ == 'VBD':
            tense = 'Past'
        elif token.tag_ == 'VBP':
            tense = 'Present'
        elif token.tag_ == 'VBZ':
            tense = 'Present'
        elif token.tag_ == 'VBG':
            tense = 'Present Continuous'
        elif token.tag_ == 'VBN':
            tense = 'Past Participle'
    
    return tense

# Example usage
sentence = "He eat apples."
print("Tense:", identify_tense(sentence))


Tense: Present


In [131]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

# Sample dataset with labeled sentences and their corresponding tenses
training_data = [
    ("He eats apples.", "Present"),
    ("He ate apples.", "Past"),
    ("He is eating apples.", "Present Continuous"),
    ("He has eaten apples.", "Past Participle"),
]

# Preparing training data
X_train = [sentence for sentence, _ in training_data]
y_train = [tense for _, tense in training_data]

# Define a simple pipeline
pipeline = Pipeline([
    ('vectorizer', CountVectorizer()),  # Convert text to numerical features
    ('classifier', SVC())  # Classifier for tense prediction
])

# Train the pipeline
pipeline.fit(X_train, y_train)

# Function to predict tense of a new sentence
def predict_tense(sentence):
    predicted_tense = pipeline.predict([sentence])
    return predicted_tense[0]

# Test the model
test_sentence = "i ate orange."
predicted_tense = predict_tense(test_sentence)
print("Predicted tense:", predicted_tense)


Predicted tense: Past


### Identification of the tense - PAST TENSE EXCLUSIVELY

In [69]:
import spacy
nlp = spacy.load('en_core_web_sm')

def detect_past_sentece(sentence):
    sent = list(nlp(sentence).sents)[0]
    return (
        sent.root.tag_ == "VBD" or
        any(w.dep_ == "aux" and w.tag_ == "VBD" for w in sent.root.children))

In [97]:
# Importing Required libraries

import spacy  # Spacy for text preprocessing
import pyinflect  # A python module for word inflections that works as a spaCy extension.

# Load small english model
nlp = spacy.load("en_core_web_sm")

# Parse text through the 'nlp' model
text = "The man asks what I am doing there."
doc = nlp(text)

def past_tensifier(doc, text):
    '''
    function to convert any type of sentence into past tense sentence.
    '''
    for i in range(len(doc)):
        token = doc[i]
        if token.tag_ in ['VBP', 'VBZ']:
            text = text.replace(token.text, token._.inflect("VBD"))
    return text


if __name__ == '__main__':
    past_sentence = past_tensifier(doc, text)
    print(past_sentence)

The man asked what I was doing there.


In [94]:
pip install pyinflect

Defaulting to user installation because normal site-packages is not writeable
Collecting pyinflect
  Downloading pyinflect-0.5.1-py3-none-any.whl (703 kB)
[K     |████████████████████████████████| 703 kB 17.6 MB/s eta 0:00:01
[?25hInstalling collected packages: pyinflect
Successfully installed pyinflect-0.5.1
Note: you may need to restart the kernel to use updated packages.


In [107]:
detect_past_sentece("she was shocked")

True

In [119]:
import nltk
from nltk.tokenize import sent_tokenize
from nltk.tag import pos_tag
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer

# Download required NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Function to change tense using NLTK
def change_tense_nltk(text, to_tense):
    # Tokenize the text into sentences
    sentences = sent_tokenize(text)
    out = []
    for sentence in sentences:
        tokens = nltk.word_tokenize(sentence)
        tagged_tokens = pos_tag(tokens)
        for word, tag in tagged_tokens:
            if tag.startswith('VB'):  # If word is a verb
                lemma = WordNetLemmatizer().lemmatize(word, 'v')
                print("Original word:", word)
                print("Lemma:", lemma)
                if to_tense == 'present':
                    # Check if present tense form exists in WordNet
                    present_forms = wordnet.morphy(lemma, wordnet.VERB)
                    print("Present forms:", present_forms)
                    if present_forms:
                        out.append(present_forms[0])
                    else:
                        out.append(lemma)
                elif to_tense == 'past':
                    # Check if past tense form exists in WordNet
                    past_forms = wordnet.morphy(lemma, wordnet.VERB)
                    print("Past forms:", past_forms)
                    if past_forms:
                        out.append(past_forms[0])
                    else:
                        out.append(lemma)
                elif to_tense == 'future':
                    out.append('will ' + lemma)
            else:
                out.append(word)
    return ' '.join(out)

# Example usage
text = "i ate an apple."
print(change_tense_nltk(text, 'past'))


[nltk_data] Downloading package punkt to /home/nlplab3/nltk_data...


Original word: ate
Lemma: eat
Past forms: eat
i e an apple .


[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/nlplab3/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /home/nlplab3/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [89]:
import nltk
from nltk.tokenize import sent_tokenize
from nltk.tag import pos_tag
from nltk.stem import WordNetLemmatizer

# Download NLTK resources if not already downloaded
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Function to convert verb tense using NLTK
def change_tense_nltk(text, to_tense):
    # Tokenize the text into sentences
    sentences = sent_tokenize(text)
    out = []
    for sentence in sentences:
        tokens = nltk.word_tokenize(sentence)
        tagged_tokens = pos_tag(tokens)
        for word, tag in tagged_tokens:
            if tag.startswith('VB'):  # If word is a verb
                # Lemmatize the verb
                lemma = WordNetLemmatizer().lemmatize(word, 'v')
                # Convert to specified tense
                if to_tense == 'present':
                    out.append(lemma)  # No tense conversion needed for present tense
                elif to_tense == 'past':
                    # Use past tense form if available
                    past_form = nltk.corpus.wordnet.morphy(lemma, nltk.corpus.wordnet.VERB)
                    out.append(past_form[0] if past_form else lemma)
                elif to_tense == 'future':
                    out.append('will ' + lemma)  # Add "will" for future tense
            else:
                out.append(word)  # Append non-verb tokens unchanged
    return ' '.join(out)

# Example usage
text = ""
print(change_tense_nltk(text, 'present'))


Alice be begin to get very tired of sit by her sister on the bank .


[nltk_data] Downloading package punkt to /home/nlplab3/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/nlplab3/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /home/nlplab3/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [118]:
import spacy

# Load the English language model for spaCy
nlp = spacy.load("en_core_web_sm")

# Function to convert verb tense using spaCy
def change_tense_spacy(text, to_tense):
    # Parse the text using spaCy
    doc = nlp(text)
    out = []
    
    for token in doc:
        # If the token is a verb and is not an auxiliary verb (e.g., "will")
        if token.pos_ == 'VERB' and token.dep_ != 'aux':
            # Convert to the specified tense
            if to_tense == 'present':
                # Use present tense form
                out.append(token.lemma_)
            elif to_tense == 'past':
                # Use past tense form
                out.append(token._.inflect('VBD'))
            elif to_tense == 'future':
                # Add "will" and use base form (infinitive) of the verb
                out.append('will')
                out.append(token.lemma_)
        else:
            # Append non-verb tokens unchanged
            out.append(token.text)
    
    return ' '.join(out)

# Example usage
text = "he is bad"
print(change_tense_spacy(text, 'present'))


he is bad
