Algorithm to work with:
1. Create templates for 3 types
2. Take a sentence and define if it is defrule, deftemplate or assert
3. Take the needed parts from sentence
4. Print sentence in CLIPS form

Libraries:

In [1]:
import spacy
import en_core_web_sm
from spacy.matcher import Matcher
import nltk
from nltk.tokenize import word_tokenize

nlp = en_core_web_sm.load()
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

Functions to print different types of structure in CLIPS:

In [2]:
def deftemplate(arr):
  print("(deftemplate {}".format(arr[0]))
  print("\t", end = '')
  if len(arr) > 1:
    for word in arr[1:]:
      print("(slot {}) ".format(word), end = '')
    print(")")


def assert_(arr):
  if len(arr) > 2:
    print("(assert ({} ({} {})))".format(arr[0], arr[1], arr[2]))
  else:
    print("No translation for this assert")


# For simplicity: If assert then assert
def defrule(arr):
  part1 = arr[0]
  part2 = arr[1]
  print("(defrule someRule")
  print("\t({} (".format(part1[0]), end = '')
  if len(part1) > 1:
    for word in part1[1:]:
      print("{} ".format(word), end = '')
  print(")) => ", end = '')
  assert_(part2)

Patterns for template, assert and rule:

In [3]:
template_patterns = []
for pos in ['PROPN', 'NOUN']:
  for lemma in ['be', 'have']:
    template_pattern = [{'OP': '*'},
                        {'POS': pos},
                        {'OP': '*'},
                        {'LEMMA': lemma},
                        {'OP': '*'},
                        {'POS': 'NOUN', 'OP': '+'}]
    template_patterns.append(template_pattern)


assert_patterns = []
for pos in ['PROPN', 'NOUN']:
  for lemma in ['be', 'exist']:
    assert_pattern = [{'LEMMA': 'there'},
                    {'OP': '*'},
                    {'LEMMA': lemma},
                    {'OP': '*'},
                    {'POS': pos},
                    {'OP': '*'},
                    {'LEMMA': 'with', 'OP': '?'},
                    {'OP': '*'},
                    {'POS': 'NOUN', 'OP': '*'}]
    assert_patterns.append(assert_pattern)


rule_patterns = []
for assert_pattern in assert_patterns:
  rule_pattern = [{'LEMMA': 'if'}]+assert_pattern+[{'LEMMA': 'then'}]+assert_pattern
  rule_patterns.append(rule_pattern)

Function which determines if sentence matches with given pattern:

In [4]:
def isMatch(sent, patterns):
  matcher = Matcher(nlp.vocab)
  if patterns == template_patterns:
    matcher.add("Template_pattern", patterns)
  elif patterns == assert_patterns:
    matcher.add("Assert_pattern", patterns)
  elif patterns == rule_patterns:
    matcher.add("Rule_pattern", patterns)
  doc = nlp(sent)
  matches = matcher(doc)
  if len(matches) != 0:
    return True
  else:
    return False

The function which removes all the unneeded words from sentence:

In [5]:
def simplify(sentence, debug=False):
  doc = nlp(sentence)
  nouns = []
  for chunk in doc.noun_chunks:
    c = chunk.text
    c = c.split(" ")
    nouns+= c

  arr = []
  for token in doc:
    if (token.text in nouns) or (token.head.text in arr):
      arr.append(token.text)
  new = ' '.join(arr)
  doc = nlp(new)
  
  news = " ".join([token.lemma_ for token in doc])
  tokens = word_tokenize(news)
  tagged = nltk.pos_tag(tokens)
  filtered = []
  for i in tagged:
    if i[1] in ['NN', 'NNP', 'NNS', 'VBN', 'JJ', 'VBD']:
      filtered.append(i[0])

  if debug:
    print(new)
    print(tagged)

  return filtered

Main function which determines the sentence structure, simplifies it, then passes to print:

In [6]:
def translate(sentence):
  if isMatch(sentence, rule_patterns):
    sent = sentence.split("then")
    s1, s2 = sent[0], sent[1]
    defrule([simplify(s1), simplify(s2)])
  elif isMatch(sentence, template_patterns):
    s = simplify(sentence)
    deftemplate(s)
  elif isMatch(sentence, assert_patterns):
    s = simplify(sentence)
    assert_(s)
  else:
    print("The sentence doesn't match any pattern")

Examples:

In [7]:
s1 = "Cat template has properties of color, age, and name."
s2 = "There exists a cat with the name Bob."
s3 = "If there exists cat named Bor then there exists a cat named Tom."

Their outputs:

In [8]:
translate(s1)
print("\n")
translate(s2)
print("\n")
translate(s3)

(deftemplate Cat
	(slot template) (slot property) (slot color) (slot age) (slot name) )


(assert (cat (name Bob)))


(defrule someRule
	(cat (name Bor )) => (assert (cat (name Tom)))


More examples:

In [9]:
templates = [
             "A dog has tail, 4 legs and voice",
             "Car is engine, 4 wheels and color"
]
asserts = [
           "There is a car of white color",
           "There is dog named Jo"
]
rules = [
         "If there is car colored white then there is dog named Bob"
]

Outputs:

In [10]:
translate(templates[0])
print("\n")
translate(templates[1])
print("\n")
translate(asserts[0])
print("\n")
translate(asserts[1])
print("\n")
translate(rules[0])

(deftemplate dog
	(slot tail) (slot leg) (slot voice) )


(deftemplate car
	(slot engine) (slot wheel) (slot color) )


(assert (car (white color)))


(assert (dog (name Jo)))


(defrule someRule
	(car (white )) => (assert (dog (name Bob)))


Interaction with user:

In [13]:
# try to use similar constructions
while True:
  s = input("Write your sentence:")
  if s == 'q':
    break
  translate(s)

Write your sentence:q
