In [2]:
import re
import contractions
import spacy
import unidecode
import nltk
from nltk.corpus import stopwords
from spacy.matcher import Matcher
from spacy.attrs import IS_PUNCT, LOWER, POS
# for Word tokenization import
from spacy.lang.en import English
nlp = spacy.load("en_core_web_sm")
nlp1 = spacy.load('en_core_web_sm', disable=['parser', 'ner']) #for lemmatization purpose

class TextProcessing:

    def __init__(self):
        self.nlp = spacy.load('en_core_web_sm')
        self.sentence_filter = ["www", "http", "https", ".com", ".de"]
        self.sentence_filter_quotes = ["\"", "“", "”"]
        self.email_pattern = "r'[\w\.-]+@[\w\.-]+'"
        self.stoplist = stopwords.words('english')
        self.stoplist.append("own")
        self.stoplist.append("%")
        self.stoplist.append("anyone")
        self.stoplist.append("want")
        self.stoplist.append("period")
        self.stoplist.remove('with')
        self.stoplist.remove('more')
        self.stoplist.remove('and')
        self.stoplist.remove('or')
        self.stoplist.remove('on')
        self.stoplist=set(self.stoplist)
#         self.stoplist = set(stopwords.words('english'))
#         self.stoplist = {"own","%"}

    def process_text(self, text):
        
        text1 = self.unidecode(text)
        text2 = self.remove_extra_whitespaces(text1)
        text3 = self.expand_contraction(text2)
        text4 = self.remove_stopwords(text3)
        text5 = self.filter_sentences(text4)
        text6 = self.lower_case(text5)
        text7 = self.Complex_sentence_cases(text6)
        text8 = self.print_final_features_from_text(text6)
        #text8 = self.Case_01(text6)
        #text = self.list_To_String(text)
        
        return text7,text8

    
    def filter_sentences(self, text):
        # remove sentences that e.g. contain URLs because they are used to address the contact to the publisher
        # remove sentences that contain email addresses as they are used for providing a contact to the publisher
        text = re.sub('\"','',text) #remove qoutaion marks
        text= re.sub(r'''(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]
        +\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))''', " ", text ,flags=re.MULTILINE)
        #print(text)
        return text
    
    
    def expand_contraction(self, text):
        
        text=contractions.fix(text)
        return text
    
    def remove_stopwords(self, text):
        
        text_without_sw = [word for word in text.split() if word.lower() not in self.stoplist]
        return " ".join(text_without_sw)
        


    def remove_extra_whitespaces(self, text):
        """
        Removes extra whitespaces from the text
        Args:
            text: text to be processed
        """
        return text.strip()
    
   
    def unidecode(self, text):
        """
        unidecodes the text
        Args:
            text: text to be processed
        """
        return unidecode.unidecode(text.lower())

    def lower_case(self, text):
        """
        lower cases the text
        Args:
            text: text to be processed

        """
        return text.lower()
    
    def print_final_features_from_text(self, text):
        doc = nlp (text)
        for token in doc:
            #print(token.text, token.pos_ ,token.tag_)
            
            matcher = Matcher(nlp.vocab)
            #{"POS": {"IN": ["NOUN", "ADJ"]}}
            ptrn_for_POS = [{"TAG": "VB", 'OP':"1"},{"TAG": {"IN": ["NN", "NNS"]}}]
            pattern = [{"TAG": "NNP", 'OP':"1"}, {"TAG": "NN", 'OP':"1"}, {"TAG": "NN", 'OP':"1"}]
            p1= [{"TAG": "VB", 'OP':"1"},{"TAG": "JJ", 'OP':"1"},{"TAG": {"IN": ["NN", "NNS"]}}]
            p2= [{"TAG": "VBP", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": "NN", 'OP':"1"}]
            p3= [{"TAG": "JJ", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": "NN", 'OP':"1"} ,{"TAG": "NN", 'OP':"1"}]
            p4= [{"TAG": "VB", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": {"IN": ["NN", "NNS"]}}]
            p5= [{"TAG": "VBG", 'OP':"1"},{"TAG": "JJ", 'OP':"1"},{"TAG": "NN", 'OP':"1"}]
            p6= [{"TAG": "NNP", 'OP':"1"}, {"TAG": "NNP", 'OP':"1"}, {"TAG": "NNP", 'OP':"1"}]
            p7= [{"TAG": "JJ", 'OP':"?"},{"TAG": "NN", 'OP':"1"},{"TAG": "NN", 'OP':"1"} ,{"TAG": "NNS", 'OP':"1"}]
            p8= [{"TAG": "JJ", 'OP':"1"},{"TAG": "NNS", 'OP':"1"},{"TAG": "NN", 'OP':"1"} ,{"TAG": "NN", 'OP':"1"}]
            p9= [{"TAG": "VBP", 'OP':"?"},{"TAG": "JJ", 'OP':"1"},{"TAG": "NN", 'OP':"1"} ,{"TAG": "NN", 'OP':"1"}]
            q1= [{"TAG": "VB", 'OP':"1"},{"TAG": "NNP", 'OP':"1"},{"TAG": "IN", 'OP':"1"},{"TAG": "JJ", 'OP':"1"},{"TAG": "NN", 'OP':"1"}]
            #q2= [{"TAG": "NN", 'OP':"1"}, {"TAG": "VBZ", 'OP':"1"}]
            #DISCARDED  q3= [{"TAG": "NN", 'OP':"1"}, {"TAG": "NNP", 'OP':"1"}]
            q4= [{"TAG": "VB", 'OP':"1"},{"TAG": "IN", 'OP':"1"},{"TAG": "NN", 'OP':"1"} ,{"TAG": "NN", 'OP':"1"}]
            q5= [{"TAG": "NN", 'OP':"1"},{"TAG": "IN", 'OP':"1"},{"TAG": "JJ", 'OP':"1"} ,{"TAG": "NNS", 'OP':"1"}]
            new1=[{"TAG": "VBD", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": "NN", 'OP':"1"}]
            new2=[{"TAG": "NNP", 'OP':"1"},{"TAG": "NNS", 'OP':"1"},{"TAG": "NN", 'OP':"1"}]
            new3=[{"TAG": "VBP", 'OP':"1"},{"TAG": "JJ", 'OP':"1"},{"TAG": "JJ", 'OP':"1"}]
            #Don't consider new 4 yet
            #new4=[{"TAG": "VB", 'OP':"1"},{"TAG": "JJ", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": "NNS", 'OP':"1"}]
            new5=[{"TAG": "VBG", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": "NN", 'OP':"1"}]
            new6=[{"TAG": "VBZ", 'OP':"1"},{"TAG": "JJ", 'OP':"1"},{"TAG": "NN", 'OP':"1"}]
            new7=[{"TAG": "VBZ", 'OP':"1"},{"TAG": "NNP", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": "NNS", 'OP':"1"}]
            new8=[{"TAG": "VBZ", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": "NNS", 'OP':"1"}]
            new9=[{"TAG": "VBZ", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": "VBZ", 'OP':"1"}]
            app5= [{"TAG": "VB", 'OP':"1"},{"TAG": "VBN", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": "NN", 'OP':"1"}]
            app5_01=[{"TAG": "VBG", 'OP':"1"},{"TAG": "IN", 'OP':"1"},{"TAG": "NNS", 'OP':"1"}]
            app6= [{"TAG": "JJ", 'OP':"1"},{"TAG": "NN", 'OP':"1"},{"TAG": "JJ", 'OP':"?"},{"TAG": "NNS", 'OP':"1"}]
            app6_01= [{"TAG": "JJ", 'OP':"1"},{"TAG": "JJ", 'OP':"1"},{"TAG": "NNP", 'OP':"?"},{"TAG": "NNS", 'OP':"1"}]
            app6_02= [{"TAG": "VB", 'OP':"1"},{"TAG": "NNP", 'OP':"1"},{"TAG": {"IN": ["NN", "NNP","VBG"]}}]
            app6_03= [{"TAG": "NNP", 'OP':"1"}, {"TAG": "NNP", 'OP':"1"}, {"TAG": "NN", 'OP':"1"}]
            app7= [{"TAG": "VB", 'OP':"1"}, {"TAG": "JJ", 'OP':"1"}, {"TAG": "NNP", 'OP':"1"}]
            
            
            matcher.add('TIME', None,  ptrn_for_POS, pattern, p1, p2, p3, p4, p5, p6, p7, p8, p9, q1, q4, q5, new1, new2, new3, new5, new6, new7, new8, new9, app5, app5_01, app6, app6_01, app6_02, app6_03, app7)
            #ptrn_for_POS, pattern, p1, p2, p3, p4
            #, p5, p6, p7, p8, p9, q1, q4, q5, new1, new2, new3, new5, new6, new7, new8, new9, app5, app5_01, app6, app6_01, app6_02, app6_03, app7
            

            matches = matcher(doc)
            
            text= [doc[start:end] for match_id, start, end in matches]
            #Removing duplicates value
            mylist = text
            text = list(dict.fromkeys(mylist))
        print("\n")
        print("---------------> features from Non-complex sentences <---------------")
        print("Total matches found:", len(matches))
        return text
     
    def Case_01(self, text):
        doc = nlp(text)
        #displacy.render(doc, style="dep")
        for token in doc:
            #print(token.text,token.pos_,token.dep_)
            # Check if the token resembles a conj
            if token.text == "and":
                previous_token = doc[token.i - 1]
                next_token = doc[token.i + 1]
                next_to_next= doc[next_token.i + 1]
                # Check if the next token's text equals "verb"
                if previous_token.pos_ == "VERB" and previous_token.dep_ == "nmod" and next_token.pos_ == "VERB" and next_to_next.pos_ == "NOUN":
                    print(previous_token,next_to_next)
                    print(next_token,next_to_next)
            
            if token.text == "and":
                # Get the previous and next token in the document
                previous_token = doc[token.i - 1]
                previous_to_previous=doc[previous_token.i - 1]
                next_token = doc[token.i + 1]
                # Check the required tokens
                if previous_to_previous.pos_ == "VERB" and previous_token.pos_ == "NOUN" and previous_to_previous.dep_ in ["ROOT","conj"] and next_token.pos_ == "NOUN" and next_token.dep_ == "conj":
                    print(previous_to_previous,previous_token)
                    print(previous_to_previous,next_token)
                    
            if token.text == "and":
                # Get the previous and next token in the document
                previous_token = doc[token.i - 1]
                dep_4_previous = previous_token.dep_
                previous_to_previous= doc[token.i - 3]
                dep_4_previous_to_previous=previous_to_previous.dep_
                next_token = doc[token.i + 1]
                next_to_next= doc[next_token.i + 1]

                # Check if the next token's text equals "verb"
                if next_token.pos_ == "NOUN" and next_token.dep_ == "conj" and next_to_next.pos_ == "NOUN" and previous_token.pos_ == "ADJ" and previous_to_previous.pos_ == "VERB" and previous_to_previous.dep_ == "ROOT":
                    print(previous_to_previous,next_to_next)
                    print(previous_token,next_to_next)
                    print(next_token,next_to_next)
            
            if token.text == "or":
                previous_token = doc[token.i - 2]
                previous_to_previous= doc[previous_token.i - 2]
                next_token = doc[token.i + 1]
                next_plus_one= doc[next_token.i + 1]
                # Check if the next token's text equal to "verb"
                if next_token.pos_ == "VERB" and next_plus_one.pos_ == "NOUN" and previous_token.pos_ =="NOUN" and previous_to_previous.pos_ in ["VERB","NOUN"] and previous_to_previous.dep_ == "dobj":#orig=verb
                    print(previous_to_previous,next_plus_one)
                    print(previous_token,next_plus_one)
                    print(next_token,next_plus_one)
            
            if token.text == "and":
                previous_token = doc[token.i - 2]
                previous_to_previous= doc[previous_token.i - 2]
                next_token = doc[token.i + 1]
                next_plus_one= doc[next_token.i + 1]
                next_plus_two= doc[next_plus_one.i + 1]
                next_plus_three=doc[next_plus_one.i + 2]
                next_plus_four=doc[next_token.i + 4]
                # Check the required next tokens pos
                if next_token.pos_ == "VERB" and next_plus_one.pos_ == "ADP" and next_plus_three.pos_ == "ADP" and next_plus_four.pos_ == "NOUN" and previous_token.pos_ =="NOUN":
                    print(previous_to_previous,next_plus_four)
                    print(previous_token,next_plus_four)
                    print(next_token,next_plus_four)
                if next_token.pos_ == "VERB" and next_plus_one.pos_ == "PROPN" and next_plus_two.pos_ in["NOUN","PROPN"] and previous_to_previous.pos_ in["ADJ","VERB"] and previous_token.pos_ =="NOUN":
                    print(previous_to_previous,next_plus_one,next_plus_two,next_plus_three)
                    print(previous_token,next_plus_one,next_plus_two,next_plus_three)
                    print(next_token,next_plus_one,next_plus_two,next_plus_three)
    
    def Case_02(self, text):
        doc = nlp(text)
        #displacy.render(doc, style="dep")
        # Iterate over the tokens in the doc
        for token in doc:
            #print(token.text,token.pos_,token.dep_)
            # Check if the token resembles a conj
            
            if token.text == "and":
                # Get the previous and next token in the document
                previous_token = doc[token.i - 1]
                previous_to_previous= doc[previous_token.i - 1]
                next_token = doc[token.i + 1]
                next_to_next= doc[next_token.i + 1]
                # Check if the next token's text equals "verb"
                
                if next_token.pos_ == "NOUN" and next_to_next.pos_ == "NOUN" and previous_token.dep_ == "dobj" and previous_to_previous.pos_ == "NOUN":#orig noun
                    print(previous_to_previous,previous_token,next_to_next)
                    print(previous_to_previous,next_token,next_to_next)
                    
            if token.pos_ == "CCONJ":
                # Get the previous and next token in the document
                previous_token = doc[token.i - 1]
                previous_to_previous= doc[previous_token.i - 1]
                next_token = doc[token.i + 1]
                next_plus_one= doc[next_token.i + 1]
                next_plus_two= doc[next_plus_one.i + 1]
                # Check if the next token's text equals "verb"
                if next_token.pos_ == "ADJ" and next_plus_one.pos_ == "ADJ" and previous_to_previous.pos_ == "VERB":#orig noun
                    print(previous_to_previous,previous_token,next_plus_one,next_plus_two)
                    print(previous_to_previous,next_token,next_plus_one,next_plus_two)
    
    
    def Case_03(self, text):
        doc = nlp(text)
        #displacy.render(doc, style="dep")
        # Iterate over the tokens in the doc
        for token in doc:
            #print(token.text,token.pos_,token.dep_)
            # Check if the token resembles a conj
            
            if token.text == "and":
                # Get the previous and next token in the document
                previous_token = doc[token.i - 1]
                previous_to_previous= doc[previous_token.i - 1]
                next_token = doc[token.i + 1]
                next_plus_one= doc[next_token.i + 1]
                # Check if the next token's text equals "verb"
                
                if previous_token.pos_ == "NOUN" and previous_token.dep_ in ["dobj","nmod"] and previous_to_previous.pos_ == "VERB" and previous_to_previous.dep_ in ["ROOT","conj"] and next_token.pos_ in ["NOUN", "VERB"] and next_plus_one.pos_ in ["NOUN", "PROPN"] and next_plus_one.dep_ in ["dobj","amod"]:
                    print(previous_to_previous,previous_token,next_plus_one)
                    print(previous_to_previous,next_token,next_plus_one)
                    
            if token.text == "and":
                # Get the previous and next token in the document
                previous_token = doc[token.i - 1]
                next_token = doc[token.i + 1]
                next_plus_one= doc[next_token.i + 1]
                next_plus_two= doc[next_plus_one.i + 1]
                next_plus_three=doc[next_plus_two.i + 1]
                # Check if the next token's text equals "verb"
                if previous_token.pos_ == "NOUN" and previous_token.dep_ == "nsubj" and next_token.pos_ == "VERB" and next_plus_one.pos_ == "NOUN" and next_plus_one.dep_ =="amod" and next_plus_two.pos_ == "NOUN" and next_plus_two.dep_ =="dobj":
                    print(previous_token,next_plus_one,next_plus_two)
                    print(next_token,next_plus_one,next_plus_two)
                if previous_token.pos_ == "VERB" and previous_token.dep_ in ["dobj","ROOT"] and next_token.pos_ == "VERB" and next_plus_one.pos_ in ["PROPN","ADJ"] and next_plus_one.dep_ =="amod" and next_plus_two.pos_ == "NOUN" and next_plus_two.dep_ in ["dobj","conj"]:
                    print(previous_token,next_plus_one,next_plus_two)
                    print(next_token,next_plus_one,next_plus_two)
                if previous_token.pos_ == "VERB" and previous_token.dep_== "conj" and next_token.pos_ == "VERB" and next_plus_one.pos_ == "PROPN" and next_plus_two.pos_ in ["NOUN","PROPN"] and next_plus_three.pos_ in ["VERB","NOUN"]:
                    print(previous_token,next_plus_one,next_plus_two,next_plus_three)                        
                    print(next_token,next_plus_one,next_plus_two,next_plus_three)
    
    
    def Case_04(self, text):
        doc = nlp(text)
        #displacy.render(doc, style="dep")
        # Iterate over the tokens in the doc
        for token in doc:
            #print(token.text,token.pos_,token.dep_)
            # Check if the token resembles a conj
            if token.text== "and":
                # Get the previous and next token in the document
                previous_token = doc[token.i - 1]
                previous_minus_one= doc[previous_token.i - 1]
                previous_minus_two= doc[previous_minus_one.i - 1]
                previous_minus_three=doc[previous_minus_two.i - 1]
                next_token = doc[token.i + 1]

                # Check if the next token's text equals "verb"
                if previous_token.pos_ == "NOUN" and previous_minus_one.pos_ == "NOUN" and previous_minus_two.pos_ == "VERB" and previous_minus_two.dep_ == "parataxis" and next_token.pos_ == "NOUN":
                    print(previous_minus_two,previous_minus_one,previous_token)
                    print(previous_minus_two,previous_minus_one,next_token)
                if previous_token.pos_ == "NOUN" and previous_minus_one.pos_ == "ADP" and previous_minus_two.pos_ == "VERB" and next_token.pos_ == "NOUN":
                    print(previous_minus_two,previous_minus_one,previous_token)
                    print(previous_minus_two,previous_minus_one,next_token)
                
            if token.text== "or":
                previous_token = doc[token.i - 1]
                previous_minus_one= doc[previous_token.i - 1]
                previous_minus_two= doc[previous_minus_one.i - 1]
                previous_minus_three=doc[previous_minus_two.i - 1]
                next_token = doc[token.i + 1]
                if previous_token.pos_ == "NOUN" and previous_minus_one.pos_ == "ADJ" and previous_minus_two.pos_ == "VERB" and previous_minus_two.dep_ == "advcl" and previous_minus_three.pos_ == "NOUN" and next_token.pos_ == "NOUN":
                    print(previous_minus_three,previous_minus_two,previous_minus_one,previous_token)
                    print(previous_minus_three,previous_minus_two,next_token)
            
            # Check the required tokens
            if token.text == "and":
                # Get the previous and next token in the document
                previous_token = doc[token.i - 1]
                previous_minus_one= doc[previous_token.i - 2]
                previous_minus_two= doc[previous_minus_one.i - 2]
                previous_minus_three= doc[previous_minus_two.i - 1]
                previous_minus_four= doc[previous_minus_three.i - 1]
                add_for_upload_sent= doc[previous_minus_one.i - 1]  #added for sentence upload
                next_token = doc[token.i + 1]
                #Check if the next token's text equals "verb"
                if previous_token.pos_ == "NOUN" and previous_minus_one.pos_ == "NOUN" and previous_minus_two.pos_ == "NOUN" and previous_minus_three.pos_ == "ADP" and previous_minus_four.pos_ == "NOUN":
                    print( previous_minus_four, previous_minus_three,previous_minus_two)
                    print( previous_minus_four, previous_minus_three,previous_minus_one)
                    print( previous_minus_four, previous_minus_three,previous_token)
                    print( previous_minus_four, previous_minus_three,next_token)
        
                if previous_token.pos_ == "NOUN" and previous_minus_one.pos_ == "NOUN" and add_for_upload_sent.pos_ == "ADJ" and add_for_upload_sent.dep_ == "compound":
                    print( add_for_upload_sent, previous_minus_one)
                    print( add_for_upload_sent, previous_token)
                    print( add_for_upload_sent, next_token)
            if token.text == "and":
                previous_token = doc[token.i - 2]
                previous_minus_one= doc[previous_token.i - 2]
                previous_minus_two= doc[previous_minus_one.i - 1]
                previous_minus_three= doc[previous_minus_two.i - 1]
                next_token = doc[token.i + 1]
                #Check for required pos_tags
                if previous_token.pos_ == "PROPN" and previous_minus_one.pos_ == "NOUN" and previous_minus_two.pos_ == "ADP" and previous_minus_three.pos_ == "VERB":
                    print( previous_minus_three,previous_minus_two, previous_minus_one)
                    print( previous_minus_three,previous_minus_two, previous_token)
                    print( previous_minus_three,previous_minus_two, next_token)
                # Check for required pos_tags in (set dates,times,and geofences)
                if previous_token.pos_ == "NOUN" and previous_minus_one.pos_ in ["NOUN","VERB"] and previous_minus_two.pos_ == "VERB" and next_token.pos_ == "NOUN":
                    print( previous_minus_two, previous_minus_one)
                    print( previous_minus_two, previous_token)
                    print( previous_minus_two, next_token)
            
    
    def Case_05(self, text):
        doc = nlp(text)
        #displacy.render(doc, style="dep")
        # Iterate over the tokens in the doc
        for token in doc:
            #print(token.text,token.pos_,token.dep_)
            # Check if the token resembles a conj
            if token.pos_ == "CCONJ":
                # Get the previous and next token in the document
                previous_token = doc[token.i - 1]
                previous_minus_one= doc[previous_token.i - 1]
                previous_minus_two= doc[previous_minus_one.i - 2]
                previous_minus_three= doc[previous_minus_two.i - 2]
                previous_minus_four= doc[previous_minus_three.i - 1]
                next_token = doc[token.i + 1]
        
                # Check if the next token's text equals "verb"
                if previous_token.pos_ == "NOUN" and previous_minus_one.pos_ == "NOUN" and previous_minus_two.pos_ == "NOUN" and previous_minus_three.pos_ in ["NOUN","VERB"] and previous_minus_four.pos_ == "NOUN" and next_token.pos_ in ["ADJ","ADV"]:#orig ADV
                    print( previous_minus_four, previous_minus_three)
                    print( previous_minus_four, previous_minus_two)
                    print( previous_minus_four, previous_minus_one,previous_token)
        
            if token.tag_ == "CC":
                # Get the previous and next token in the document
                previous_token = doc[token.i - 2]
                previous_minus_one= doc[previous_token.i - 2]
                previous_minus_two= doc[previous_minus_one.i - 2]
                previous_minus_three= doc[previous_minus_two.i - 2]
                previous_minus_four= doc[previous_minus_three.i - 1]
                next_token = doc[token.i + 1]
        
                if previous_token.pos_ == "NOUN" and previous_minus_one.pos_ == "NOUN" and previous_minus_two.pos_ in ["NOUN","PROPN"] and previous_minus_three.pos_ == "NOUN" and previous_minus_four.pos_ in ["NOUN","PROPN"] and next_token.pos_ == "ADJ": #original of all given above
                    print( previous_minus_four, previous_minus_three)
                    print( previous_minus_four, previous_minus_two)
                    print( previous_minus_four, previous_minus_one)
                    print( previous_minus_four, previous_token)
                    
            
            if token.pos_ == "CCONJ":
                # Get the previous and next token in the document
                previous_token = doc[token.i - 1]
                previous_minus_one= doc[previous_token.i - 2]
                previous_minus_two= doc[previous_minus_one.i - 2]
                previous_minus_three= doc[previous_minus_two.i - 2]
                minus_three_for_translation_sent=doc[previous_minus_two.i - 1]#this is previous_minus_three for "send translation" sentence
                previous_minus_four= doc[previous_minus_three.i - 1]
                minus_four_for_translation_sent=doc[minus_three_for_translation_sent.i - 1]
                previous_minus_five= doc[previous_minus_four.i - 1]
                minus_five_for_translation_sent=doc[minus_four_for_translation_sent.i -1]
                next_token = doc[token.i + 1]
                next_plus_one = doc[next_token.i + 1]
        
                # Check the required tokens
                if previous_token.pos_ == "NOUN" and previous_minus_one.pos_ == "NOUN" and previous_minus_two.pos_ == "NOUN" and previous_minus_three.pos_ == "VERB" and previous_minus_four.pos_ == "NOUN" and previous_minus_four.dep_ == "nsubj" and previous_minus_five.pos_ == "VERB" and next_token.pos_ in ["VERB","NOUN"]:#orig Verb
                    print( previous_minus_five,previous_minus_four, previous_minus_three)
                    print( previous_minus_five,previous_minus_four, previous_minus_two)
                    print( previous_minus_five,previous_minus_four, previous_minus_one)
                    print( previous_minus_five,previous_minus_four, previous_token)
                    print( previous_minus_five,previous_minus_four, next_token)
                #check the required pos tags for send translation sent
                if previous_token.pos_ in ["NOUN","PROPN"] and previous_minus_one.pos_ in ["NOUN","PROPN"] and previous_minus_two.pos_ in ["NOUN","PROPN"] and minus_three_for_translation_sent.pos_ == "ADP" and minus_four_for_translation_sent.pos_ == "NOUN" and minus_five_for_translation_sent.pos_ == "VERB" and next_token.pos_ == "PROPN":
                    print( minus_five_for_translation_sent,minus_four_for_translation_sent,minus_three_for_translation_sent,previous_minus_two)
                    print( minus_five_for_translation_sent,minus_four_for_translation_sent,minus_three_for_translation_sent,previous_minus_one)
                    print( minus_five_for_translation_sent,minus_four_for_translation_sent,minus_three_for_translation_sent,previous_token)
                    print( minus_five_for_translation_sent,minus_four_for_translation_sent,minus_three_for_translation_sent,next_token)
            
            if token.tag_ == "CC":
                previous_token = doc[token.i - 1]
                previous_minus_one= doc[previous_token.i - 2]
                previous_minus_two= doc[previous_minus_one.i - 1]
                previous_minus_three= doc[previous_minus_two.i - 1]
                previous_minus_four= doc[previous_minus_three.i - 1]
                next_token = doc[token.i + 1]
                #check required pos tags
                if previous_token.pos_ == "NOUN" and previous_minus_one.pos_ == "NOUN" and previous_minus_two.pos_ == "VERB" and previous_minus_three.pos_ == "ADV" and previous_minus_four.pos_ == "VERB" and next_token.pos_ =="NOUN":
                    print( previous_minus_four,previous_minus_three,previous_minus_two,previous_minus_one)
                    print( previous_minus_four,previous_minus_three,previous_minus_two,previous_token)
                    print( previous_minus_four,previous_minus_three,previous_minus_two,next_token)  
    
    
    def Complex_sentence_cases(self, text):
        print("---------------> features from complex sentences <---------------")
        self.Case_01(text)
        self.Case_02(text)
        self.Case_03(text)
        self.Case_04(text)
        self.Case_05(text)
    
    def list_To_String(self,text):  
        # if you want output in string form rather than list
        text = ','.join(str(v) for v in text)
        
        return text
    
if __name__ == "__main__":

    processor = TextProcessing()

    text = """The best cure for phone addiction.\n\n## Featured by Apple as \"Best New Apps\" and \"Amazing Apps\" !\n## Top 5 Productivity apps in over 85 countries !\n\nHave you ever been addicted to your phone and just cannot put it down? Forest provides an interesting solution that beats your phone addiction. You can plant a seed in Forest. In the following time, this seed will gradually grow into a tree. With this interesting mechanism, the sense of achievement and responsibility will drive our users to stay away from their phone with no pain.\n\nFeatures:\n\u2022 A self-motivated and interesting way to beat phone addiction\n\u2022 Stay focused and get more things done\n\u2022 Turn your focused time into a lovely forest\n\u2022 Manage your own tags and view the detailed statistics about how you allocate your time\n\u2022 Compete with friends and users around the world\n\u2022 Earn reward and unlock more tree species\n\u2022 Track your focused time\n\u2022 Plant real trees on the Earth and protect our environment\n\nWe also provide browser extensions. Find out more on www.forestapp.cc!\n\nCredits\n*Sound Design : ShiKuang Lee\n*Some icons appearing in the app were made by Freepik from www.flaticon.com and are licensed under CC BY 3.0 .
With its beautiful design, easy-to-use interface, and lightning fast speed, this app makes reading, organizing, and sending emails across your inboxes (Gmail, Outlook, AOL and more) easier than ever. Yahoo Mail offers 1000GB of free cloud storage, so you never have to delete a message again. \n\nWith this app you can:\n\n- SEND MULTIPLE ATTACHMENTS - Easily add photos, videos, and documents to an email, all at once. \n- QUICKLY SEARCH - Search across all of your messages, contacts, files and photos, faster than ever. \n- SWITCH BETWEEN ACCOUNTS - Add your Gmail, Outlook, or second Yahoo account right in the app. \n- NEVER LOSE AN EMAIL - Keep all of your emails with 1000GB of free cloud space. \n- QUICKLY DELETE MULTIPLE EMAILS \u2013 Press and hold an email to select and delete multiple messages.\n- CHOOSE A THEME - Select different backgrounds and text colors for your Yahoo inbox.\n- DITCH PASSWORDS - Enable Account Key for secure password-free sign in.\n- SAY IT WITH A GIF - Can\u2019t find the right words? Send an animated GIF.\n- SEND STATIONERY - Send emails with artist-designed stationery from Paperless Post. \n- CONNECT GOOGLE DRIVE AND DROPBOX - Easily access and send photos and documents you\u2019ve stored in the cloud. \n- BE IN THE KNOW - Get news from Yahoo and the Yahoo Magazines right in the app.\n\nNotes: \n- To see all the new features, upgrade your device to iOS. \n- Optimized for use with VoiceOver.
Print attachments, documents, web pages and more right from your iPhone and iPad to any Wi-Fi or USB printer.\n\nTRY BEFORE YOU BUY - just download Printer Pro Lite to check how our application works with your printer.\n\nPrinter Pro lets you wirelessly print from the iPhone. It can print directly to many Wi-Fi printers or any printer attached to your Mac or PC via helper application installed on your computer.\n\nOnce installed, Printer Pro appears in the \"Open In...\" list on your device. This lets you print documents from Mail, PDF Expert and many other applications on your iPhone or iPad that support this function.\n\nUsing \"Open In...\" approach you can print files from many popular online storages: Dropbox and Google Drive. It just a matter of many taps to download your file via free Dropbox or may be Google Drive application and send it to printer.\n\nTo print a web page, just change \"http\" to \"phttp\" in Safari address bar and tap Go. The page will immediately be opened in the Printer Pro with print button just above your finger. You can print web based documents as well using this approach.\n\nWith Printer Pro you can print:\n\n- Email Attachments\n- iWork documents\n- Web pages\n- Files from other applications\n- Clipboard content\n- Photos\n- Documents on Dropbox and Google Drive\n- Contacts\n\n\u25c6 Printer Pro Desktop\nGet the free application for your computer to print more document types and with better quality. You can download it at www.readdle.com/printerpro\n\n\u25c6 List of supported document formats\nPDF, Word, Excel, Powerpoint, Pages, Numbers, Keynote, TXT, HTML, JPG, Safari webarchive\n\nFeel free to contact us If you have any suggestions, questions or issues at http://readdle.com/contact .
The official Gmail brings the best of Gmail to your iPhone or iPad with real-time notifications, multiple account support and search that works across all your mail.\n\nWith the Gmail app, you can:\n- Undo Send, to prevent embarrassing mistakes\n- Switch between multiple accounts\n- Get notified of new mail fast, with notification center, badge and lock screen options\n- Search your mail faster with instant results, predictions as you type & spelling suggestions \n- Swipe to archive/delete, to quickly clear out your inbox\n- Read your mail with threaded conversations\n- Auto-complete contact names as you type from your Google contacts or your phone\n- Respond to Google Calendar invites from the app\n- Organize your mail by archiving, labeling, starring, deleting and reporting spam\n- Send and receive attachments\n- See profile pictures as part of the conversation.
Get started with Google Drive for free and have all your files within reach from any smartphone, tablet, or computer. All your files in Drive \u2013 like your videos, photos, and documents \u2013 are backed up safely so you can\u2019t lose them. Easily invite others to view, edit, or leave comments on any of your files or folders.\n\nWith Drive, you can:\n\u2022 View documents, PDFs, photos, videos, and more\n\u2022 Search for files by name and content\n\u2022 Easily share files and folders with others\n\u2022 Set sharing permissions to view, comment, or edit\n\u2022 Quickly access recent files\n\u2022 See file details and activity \n\u2022 Enable viewing of files offline.
CloudApp makes image and file sharing on iOS devices a more enjoyable experience. CloudApp finds the balance between convenience, functionality, and charm \u2014 traits that are not normally associated with file sharing.\n\n*BEST FEATURE*\nCloudApp supports iOS Share extensions, so you\u2019ll see Send to CloudApp links when working with Contacts, Maps, etc. and opening from Mail Attachments to PDFs.\n\nOTHER FEATURES:\n- Upload your images, videos and text effortlessly. \n-Select one photo or several. \n-Upload as a grouped ZIP archive, or individually with optional compressing prior to upload to keep file sizes small.\n- Manage files more efficiently or filter by type. \n-CloudApp sorts your files by date uploaded, showing your latest files (and their views) at a glance. \n-Tap through, to preview files. \n-CloudApp will even open the contents of ZIP files. \n-Contextually-relevant statistics (file size, image dimensions, word count, duration and more) are displayed based on the document to eliminate clutter.\n-Customized thumbnail view, for images, that makes it easy to scan and flick through your photos.\n Sharing via email, Facebook, and Twitter is straightforward and seamless.\n-Restore items from previously deleted drops. \n-Add file from apps that support document interaction (\"open in..\").\n-Even mundane operations like renaming files are made enjoyable on the iPhone with CloudApp.\n-And much much more!.
Create, edit, and collaborate with others on documents from your iPod, iPhone, or iPad with the free Google Docs. With Googl
e Docs you can:\n\n* Create new documents or edit any that were created on the web or on another device \n* Share documents and work together with others in the same document at the same time\n* Open, edit, and save Microsoft Word documents\n* Get stuff done anytime -- even without an internet connection\n* Add and respond to comments\n* Never worry about losing your work -- everything is automatically saved as you type \n* Protect your documents with a 4-digit passcode.
Dropbox is the place for your photos, docs, videos, and other files. Files you keep in Dropbox are safely backed up and you can access them from all your devices. It\u2019s easy to send large files to anyone, even if they don\u2019t have a Dropbox account.\n\nFeatures:\n Access your files on any device, even if you\u2019re offline\n \u2022 Create and edit Microsoft Office files from your iPhone or iPad\n \u2022 Share links to your largest files without using email attachments\n\nWe offer in-app purchases for Dropbox Pro, which includes 1 TB of storage and additional features. The amount will be charged to your iTunes account and will vary by plan and country. You\u2019ll see the total price before payment.\nDropbox Pro subscriptions purchased in-app renew monthly or yearly depending on your plan. Plans are priced at $9.99USD monthly and $99.99USD yearly (pricing may change in various countries other than the U.S.). To avoid renewal, you must turn off auto-renew at least 24 hours before your subscription ends. Dropbox also offers one-month trials for monthly subscriptions.\n\nTerms of Service: https://www.dropbox.com/pricing_terms\nPrivacy Policy: https://www.dropbox.com/privacy .
** 20% OFF - LIMITED TIME HOLIDAY SALE **\n\nFantastical 2 is the award winner calendar app with features such as natural language parsing, reminders, a beautiful week view, and much more!\n\n\"Great iOS update to my favorite iPhone calendar app. Looks great, works great, and now integrates iOS reminders.\" - John Gruber (Daring Fireball) \n\u201c...if you need a pure replacement for the iPhone's stock calendar-app, Fantastical 2 is your best bet.\u201d - The Verge\n\"Fantastical is an app that just works. Love it.\" - Jim Dalrymple (The Loop)\n\"Fantastical is my new favorite iPhone app. It looks great, it works reliably, and, more importantly, it made managing my schedule better.\" - MacStories\n\u201cFantastical 2 is not only the absolute best calendar app on iOS, but the best reminders app as well.\u201d - iMore\n\nJust enter and Fantastical 2 will schedule it! Or type in \"todo buy milk at 5pm\" and a reminder will alert you at 5PM! If your device supports dictation, you can speak the details for your event or reminder and Fantastical 2 will handle the rest.\n\nONE NEW APP, VARIOUS NEW FEATURES\n\u2022 Reminders!\n\u00a0\u00a0\u00a0- See your events and dated reminders together in the main list\n\u00a0\u00a0\u00a0- Add reminders directly from the Reminders list or new event screen - just flip the switch to toggle between events and reminders\n\u00a0\u00a0\u00a0-\u00a0Set dates, times, and geofences (when I arrive/when I leave)\n\n\u2022 Significant new parser features, including:\n\u00a0\u00a0\u00a0- Create reminders by starting your sentence with \"reminder\", \"todo\", \"task\", or \"remind me to\"\n\u00a0\u00a0\u00a0- Expanded, expressive repeating events such as third Thursday of every month, every weekend, last weekday of the month, and more\n\u00a0\u00a0\u00a0- Create alerts with phrases such as \"remind me tomorrow at 3PM\", \"alert 1 hour before\", or \"alarm 3PM\"\n\n\u2022 All-new event details, map to show your event\u2019s locations and better repeating event options\n\u2022 An elegant week view when you rotate your iPhone to landscape\n\u2022 Background app updating allows events, reminders, and alerts to be pushed to Fantastical 2 even if you don't launch the app very often\n\u2022 An extended keyboard when creating new events or reminders, providing instant access to numbers and symbols for dates and times (only for 4\" screens or larger)\n\u2022 Birthday options - tap on a birthday to see contact information or send a quick greeting\n\u2022 TextExpander support\n\u2022 Many other refinements and improvements\n\nADDING NEW EVENTS OR REMINDERS IS FUN\n\u2022 The easiest and fastest way to add new events or reminders\n\u2022 Just type in your details or use dictation and watch your words magically turn into an actual event or reminder!\n\nTHE FUTURE LOOKS BRIGHT \n\u2022 Fantastical 2's DayTicker is the efficient and enjoyable way to see your schedule\n\u2022 The expansive and beautiful event list makes it easy to find your events\n\u2022 Tap an event or reminder to show and edit its details\n\u2022 Tap and hold an event to duplicate, move, or share\n\u2022 Use search to instantly locate specific events or reminders\n\u2022 View your Facebook events\n\nFAST, FRIENDLY, AND FLEXIBLE\n\u2022 Supports the same calendar services as the built-in Calendar app, including iCloud, Google Calendar, Exchange, and more! \n\u2022 Fantastical 2 is fully localized in English, French, German, Italian, and Spanish\n\u2022 Just type or speak your event or reminder in any of these languages and Fantastical will automatically understand\n\u2022 3D Touch, including Peek and Pop\n. Accessibility support.
Instantly speak over 40 languages.\n\nCan you imagine talking into your phone in one language and immediately hearing yourself in another? That\u2019s exactly what iTranslate Voice does. Just speak into your phone and it immediately replies in one of our 42 languages.\nFeatured on the App Store:\n \u2022 \"Editor's Choice\"\n \u2022 \"Best of 2013\"\n \u2022 \"Best of 2012\"\n \u2022 \"10 Apps that WOW\"\n \u2022 \"App Store\"\nWith iTranslate Voice you can:\n \u2022 Instantly speak over 40 languages\n \u2022 No typing needed. Look up definitions and translations for common words & phrases, just using your voice.\n \u2022 Use AirTranslate application to connect devices together and easily engage in conversation with other people.\n \u2022 Simply send translations via Email, SMS, Twitter or Facebook.\nIntroducing Phrasebook:\nWith Phrasebook you finally have a way to save frequently used phrases, sentences or questions. If you are a doctor and need to ask patients with different native languages the same 5 questions over and over again. Or you travel to different countries and want to have a list of the 10 most important phrases always at hand?\n\nWith Phrasebook you can now save those phrases and instantly translate them into whatever language we currently offer. Of course your phrases are synced with iCloud, so you\u2019ll never lose them.\n\nWhat the press say:\n \n \"Voice translation executed perfectly\"\n - Kilian Bell, CultofMac\n \n \"Spectacular, real-time language translation by simply speaking into your iPhone\"\n - Brent Dirks, AppAdvice.com\n \n \"iTranslate Voice is impressive\"\n - Federico Viticci, Macstories.net\n \n \"If you travel, iTranslate Voice may be the most important app in the App Store\"\n - Best iPhone apps of 2012, Gizmag\n\n\nSupported languages & dialects:\nArabic (Saudi), Arabic (UAE), Arabic (Egypt), Catalan, Chinese (Mandarin), Chinese (Taiwan), Chinese (Cantonese), Croatian, Czech, Danish, Dutch, English (GB), English (USA), English (Australia), Finnish, French, French (Canada), German, Greek, Hebrew, Hindi, Hungarian, Indonesian, Italian, Japanese, Korean, Malay, Norwegian, Polish, Portuguese (Portugal), Portuguese (Brazil), Romanian, Russian, Slovak, Spanish (Spain), Spanish (United States), Spanish (Mexico), Swedish, Thai, Turkish, Ukrainian, Vietnamese\n\nAdditional languages with partial support:\nCroatian, Malay, Ukrainian, Vietnamese\n\n\nVisit our Website to find out more: http://itranslatevoice.com/\n\nIf you have any suggestions, questions or need some help, just write us at feedback@sonicomobile.com. We're always happy to provide quick and useful help.\n\n* iTranslate Voice requires an Internet connection.
"""

    text = processor.process_text(text)

    print(text)
        
    

---------------> features from complex sentences <---------------
send photos
send documents
send attachments
receive attachments
view comments
edit comments
leave comments
share files
share folders
create documents
edit documents
collaborate documents
open microsoft word documents
edit microsoft word documents
save microsoft word documents
add comments
respond comments
access numbers dates
access symbols dates
create microsoft office files
edit microsoft office files
compete with friends
compete with users
add photos
add videos
add documents
search across messages
search across contacts
search across files
search across photos
see file details
see file activity
upload images
upload videos
upload text
sharing via email
sharing via facebook
sharing via twitter
set dates
set times
set geofences
allows events
allows reminders
allows alerts
print attachments
print documents
print web pages
organize mail archiving
organize mail labeling
organize mail starring
organize mail deleting
organize