### Important notes

1. Please, install the ```plyplus``` and ```nltk``` libraries using the cell below if you don't have it:

In [1]:
!pip install plyplus
!pip install nltk



2. Format of input lines (the ones that go in ```text``` variable):
    1. For **deftemplate** statement:
        1. ___ template has properties ___ , ___ , ..., and ___ .
        2. ___ template has property ___ 
        3. ___ template has properties ___ and ___ .
    2. For **assert** statements:
        1. There exists ___ with ___ ___ , ___ ___ , ... ... , and ___ ___ .
        2. There exists ___ with ___ ___ and ___ ___ .
        3. THere exists ___ with ___ ___ .
    3. For **defrule** statement:
        1. If there exists ___ with ___ ___ then there exists a ___ with ___ ___ .
        2. If there exists a ___ with ___ ___ , ___ ___ then there exists a ___ with ___ ___ , ... ... , ___ ___ and ___ ___ .
    4. Also please separate new english-like-structure with a **new line** character
    
3. THe output of the program was tested on [CLIPS 6.24 online compiler](https://www.codechef.com/ide)

In [2]:
'''
    (1) Removing the stop words is atapted from NLTK usage example:
    
        https://www.geeksforgeeks.org/removing-stop-words-nltk-python/
        
    (2) Grammar usage of plyplus is taken from:
    
        https://github.com/erezsh/plyplus/blob/master/docs/tutorial.md
'''

from nltk.corpus import stopwords
from plyplus import Grammar
from plyplus.strees import STree
    
class EmptyTextError(Exception):
    '''
    A dummy class for representing the error which should arise
    when the input to translator is empty
    '''
    pass

class UnknownConstruction(Exception):
    '''
    A dummy class for representing the error which should arise
    when we can't assign a keyword to a particular line of text
    '''
    pass
    
class CLIPSTranslator():
    '''
    Class which is responsible for translating the English-language-like
    constructions into the compilable CLIPS code
    '''
    
    def __init__(self, text: str) -> None:
        '''
        Splitting the text into lines and saving them
        '''
        
        # saving the lines if they are not empty
        self.lines = [line for line in text.replace(',',' ').replace('.', '').split('\n') if not line.isspace() and line]
        
        # letting the user know that something is wrong with the input
        if not self.lines:
            raise EmptyTextError('no text provided')
            
        # getting the stop words from nltk
        self.stop_words = stopwords.words('english')
        
        # do the tokenization of obtained lines
        self.tokenize()
        
        # initializing the amount of current rules
        self.rules = 1
        
    def tokenize(self) -> None:
        '''
        Function which transforms the initial lines of English language into the tokens
        with removal of stop words ('a', 'with', etc.)
        '''
        
        # structure to hold all the tokens and corresponding strings
        self.tokens = {}
        
        # going through each line, splitting and removing stop words
        for line in self.lines:
            self.tokens[line] = [token for token in line.split() if token.lower() not in self.stop_words]
            
    def translate_deftemplate(self, tree: STree) -> str:
        '''
        Translate tree representation of deftemplate command into CLIPS language
        '''
        
        # getting the name of tempalte
        alias = tree[0].tail[0].lower()
        
        # getting the list of attributes of template
        atts  = [t.tail[0].lower() for t in tree[1:]]
        
        # gluing together the parts of command
        res = f'(deftemplate {alias}\n   '
        for att in atts:
            res += f" (slot {att})"
        res += ')'
        
        return res
    
    def translate_assert(self, tree: STree) -> str:
        '''
        Translate tree representation of assert command into CLIPS language
        '''
        
        # getting the name of entity
        alias = tree[0].tail[0].lower()
        
        # getting the pairs of attributes and their values 
        
        # gluing the command together
        res = f'(assert ({alias}'
        for t in tree[1:]: 
            
            # name of the attribute
            att = t.tail[0].tail[0].lower()
            
            # handling seperately string attributes and integers
            val = f'"{t.tail[1].tail[0]}"' if t.tail[1].tail[0].isalpha() else t.tail[1].tail[0] 
            
            # appending the pair into the command
            res += f' ({att} {val})'
        res += '))'
        
        return res
    
    def translate_defrule(self, tree: STree) -> str:
        '''
        Translate tree representation of defrule command into CLIPS language
        '''
        
        # gluing the result using the translating of assert commands
        res = f'(defrule rule{self.rules}\n    {self.translate_assert(tree[0].tail)[7:-1]} => {self.translate_assert(tree[1].tail)})'
        
        # incrementing the amount of existing rules
        self.rules += 1
        
        return res
            
    def translated_lines(self) -> str:
        '''
        Using the3 grammar functionality translate the given string input
        into the compilable CLIPS code
        '''
        
        # string to hold the output
        res = ''
        
        # initializing the rule for grammar along with parser
        rules = """
            // Everything has to have a start
            @start: deftemplate | assert | defrule;

            // Main rules
            deftemplate: alias 'template' 'propert(ies|y)' attribute+ ;
            assert: 'exists' alias tuple+ ;
            defrule: assert assert ;

            // Additional helper-types
            @string: '\w+' ;
            @number: '\d+' ;
            tuple: attribute value ;
            value: string | number ;
            alias: string ;
            attribute: string ;
            SPACES: '[ ]+' (%ignore) ;   

        """
        parser = Grammar(rules)
        
        # goign through each line an assigning a classified keyword
        for i, (line, tokens) in enumerate(self.tokens.items()):
            try:
                
                # parsing the tokens
                tree = parser.parse(" ".join(tokens))
                
                # determining the rule and building the corresponding CLIPS code
                if tree.head == 'deftemplate':
                    res += self.translate_deftemplate(tree.tail)
                elif tree.head == 'assert':
                    res += self.translate_assert(tree.tail)
                elif tree.head == 'defrule':
                    res += self.translate_defrule(tree.tail)
                else:
                    UnknownConstruction(f"line {i + 1} doesn't contain a known construction")
            except:
                raise UnknownConstruction(f"line {i + 1} doesn't contain a known construction")
            res += '\n'
            
        res += '(exit)\n;' # to make the code compilable
            
        return res

In [3]:
text = """
    Course template has property of name
    Professor template has properties of name and course
    Student template has properties of name and age
    
    There exists a course with name IR
    
    If there exists a course with name IR then there exists student with name Ruslan and age 20
    If there exists a course with name IR then there exists professor with name Stanislav and course IR
"""

translator = CLIPSTranslator(text)
clips_code = translator.translated_lines()

print(clips_code)

(deftemplate course
    (slot name))
(deftemplate professor
    (slot name) (slot course))
(deftemplate student
    (slot name) (slot age))
(assert (course (name "IR")))
(defrule rule1
     (course (name "IR")) => (assert (student (name "Ruslan") (age 20))))
(defrule rule2
     (course (name "IR")) => (assert (professor (name "Stanislav") (course "IR"))))
(exit)
;
