# Notebook for CCG Generation

## Defining a lexicon

In [1]:
class Lexicon:
    def __init__(self):
        # Dictionary to store lexical entries, where the key is a word
        # and the value is a list of categories (with optional semantics).
        self.entries = {}


    def add_entry(self, word, category, semantics=None):
        # Add a lexical entry to the lexicon.
        if word not in self.entries:
            self.entries[word] = []
        self.entries[word].append({'category': category, 'semantics': semantics})

    def get_categories(self, word):
        # Retrieve all categories for a given word.
        return self.entries.get(word, [])

    def __str__(self):
        # Display all entries in the lexicon.
        lexicon_str = "Lexicon:\n"
        for word, categories in self.entries.items():
            lexicon_str += f"{word}:\n"
            for entry in categories:
                semantics = entry['semantics'] if entry['semantics'] else 'None'
                lexicon_str += f"  - Category: {entry['category']}, Semantics: {semantics}\n"
        return lexicon_str

In [2]:
# Create an instance of the Lexicon
lexicon = Lexicon()

# Add entries
lexicon.add_entry("John", "NP", semantics="john")
lexicon.add_entry("Mary", "NP", semantics="mary")
lexicon.add_entry("likes", "(S\\NP)/NP", semantics="λx.λy.likes(y, x)")
lexicon.add_entry("runs", "S\\NP", semantics="λx.runs(x)")
lexicon.add_entry("the", "NP/N", semantics="λx.x")
lexicon.add_entry("dog", "N", semantics="dog")
lexicon.add_entry("cat", "N", semantics="cat")

# Handle ambiguous entries (e.g., "saw" as both a verb and noun)
lexicon.add_entry("saw", "(S\\NP)/NP", semantics="λx.λy.saw(y, x)")
lexicon.add_entry("saw", "N", semantics="saw")

In [3]:
# Retrieve categories for a specific word
word = "likes"
print(f"Categories for '{word}':")
for entry in lexicon.get_categories(word):
    print(f"  - Category: {entry['category']}, Semantics: {entry['semantics']}")

# Print the entire lexicon
print(lexicon)

Categories for 'likes':
  - Category: (S\NP)/NP, Semantics: λx.λy.likes(y, x)
Lexicon:
John:
  - Category: NP, Semantics: john
Mary:
  - Category: NP, Semantics: mary
likes:
  - Category: (S\NP)/NP, Semantics: λx.λy.likes(y, x)
runs:
  - Category: S\NP, Semantics: λx.runs(x)
the:
  - Category: NP/N, Semantics: λx.x
dog:
  - Category: N, Semantics: dog
cat:
  - Category: N, Semantics: cat
saw:
  - Category: (S\NP)/NP, Semantics: λx.λy.saw(y, x)
  - Category: N, Semantics: saw



## Generating sentences without semantic constraints

In [4]:
import random

class SentenceGenerator: 
    #A class is a blueprint for creating objects (instances), 
    # which can hold data and have functions (methods) associated with them.
    def __init__(self, lexicon): 
        # Constructor method for the class: special method in Python automatically called when an 
        # object (an instance of the class) is created.
        self.lexicon = lexicon 
        # This line stores the lexicon parameter passed to the constructor as an instance attribute.

    def generate_sentence(self, target_category="S"):
        # Generate a sentence that matches the target category (default is 'S' for a sentence).
        # Defines a method named generate_sentence: 
        # a member function of the SentenceGenerator class, 
        # meaning it operates on instances of that class.
        sentence, remaining_category = self.build_sentence(target_category) 
        # This line calls the build_sentence method: result is expected to be a tuple.
        
        # If we couldn't completely match the category, return None
        if remaining_category is None: #This part handles the outcome of the sentence generation.
            return " ".join(sentence)
        else:
            return None

    def generate_multiple_sentences(self, target_category="S", count=5):
        # Generate multiple unique sentences that match the target category.
        sentences = set() #This is a set that will hold the unique sentences generated.
        attempts = 0 #This counter keeps track of the number of attempts made to generate a sentence.
        max_attempts = count * 5  # Allow multiple attempts to find unique sentences
        
        # Ensures loop continues until desired number of unique sentences has been generated or 
        # max number of attempts is reached.
        while len(sentences) < count and attempts < max_attempts:
            attempts += 1
            sentence = self.generate_sentence(target_category)
            if sentence and sentence not in sentences:
                sentences.add(sentence)
        
        return list(sentences)

    def build_sentence(self, target_category, depth=0, max_depth=10):
        # Recursively build a sentence by matching words to the target category.
        if depth > max_depth: 
            #This block checks if the current recursion depth has exceeded the maximum allowed depth
            print(f"Exceeded max recursion depth: {depth} for target {target_category}")
            return [], target_category #safeguard to avoid an infinite recursion loop
        
        if target_category == "S":
            # Decompose S into NP and S\NP
            print("Decomposing S into NP and S\\NP")
            # The method calls self.build_sentence recursively to generate a noun phrase ("NP"). 
            # The recursion depth is increased by 1 (depth + 1) to track the level of recursion.
            # np_sentence will store the generated noun phrase.
            # np_category will store the resulting category after attempting to build the NP.
            np_sentence, np_category = self.build_sentence("NP", depth + 1, max_depth)
            # This checks if the np_category is None, indicating the NP was successfully generated 
            # (since build_sentence returns None when it successfully generates a category).
            if np_category is None:  # NP found
                print(f"NP successfully resolved: {np_sentence}")
                # The method then tries to build the remaining part of the sentence, 
                # which is the verb phrase (S\NP).
                snp_sentence, snp_category = self.build_sentence("S\\NP", depth + 1, max_depth)
                if snp_category is None:  # S\NP found
                    print(f"Successfully decomposed S: NP='{np_sentence}' and S\\NP='{snp_sentence}'")
                    # The method then returns a tuple consisting of:
                    # The concatenated string of the noun phrase (np_sentence) and verb phrase 
                    # (snp_sentence), forming the complete sentence.
                    # None, indicating that there are no remaining categories to expand
                    return np_sentence + snp_sentence, None
                else:
                    print(f"Failed to resolve S\\NP after matching NP='{np_sentence}'")
            else:
                print(f"Failed to resolve NP for S decomposition")

        # Shuffle lexicon keys for random selection
        lexicon_keys = list(self.lexicon.entries.keys())
        random.shuffle(lexicon_keys)

        for word in lexicon_keys: # iterating through each word in the lexicon keys, 
            entries = self.lexicon.entries[word] #retrieving the associated entries
            for entry in entries:
                category = entry['category']
                print(f"Trying word '{word}' with category '{category}' to match '{target_category}'")

                # Direct match:
                # if the category of the current word exactly matches the target_category, 
                # the word is directly returned as part of the sentence.
                if self.normalize_category(category) == self.normalize_category(target_category):
                    print(f"Direct match found: '{word}' for category '{target_category}'")
                    return [word], None

                # Forward application: (A/B) + B → A
                if self.is_forward_function(category):
                    left_category, right_category = map(self.normalize_category, self.split_category(category)) #the category is split 
                    if self.normalize_category(target_category) == left_category: #checks if the target_category matches the left_category
                        print(f"Attempting forward application: '{word}' as ({left_category}/{right_category})")
                        #it attempts forward application, meaning it tries to recursively generate the right_sentence for the right_category.
                        right_sentence, remaining_category = self.build_sentence(right_category, depth + 1, max_depth)
                        if remaining_category is None:
                            print(f"Forward application successful for '{word}'")
                            return [word] + right_sentence, None

                # Backward application: B + (A\B) → A
                if self.is_backward_function(category):
                    left_category, right_category = map(self.normalize_category, self.split_category(category))
                    if self.normalize_category(target_category) == left_category:
                        print(f"Attempting backward application: '{word}' as ({left_category}\\{right_category})")
                        left_sentence, remaining_category = self.build_sentence(right_category, depth + 1, max_depth)
                        if remaining_category is None:
                            print(f"Backward application successful for '{word}'")
                            return left_sentence + [word], None

        # No match found, return failure case
        print(f"No match found for target '{target_category}' at depth {depth}")
        return [], target_category

    def is_forward_function(self, category):
        # Check if the category is a forward function (A/B).
        return "/" in category

    def is_backward_function(self, category):
        # Check if the category is a backward function (A\B).
        return "\\" in category

    def split_category(self, function_category):
        # Split a function category (A/B or A\B) into its components A and B.
        if "/" in function_category:
            return function_category.split("/", 1)
        elif "\\" in function_category:
            return function_category.split("\\", 1)
        return None, None
    
    def normalize_category(self, category):
        # Remove unnecessary parentheses from a category.
        while category.startswith("(") and category.endswith(")"):
            category = category[1:-1]
        return category

In [5]:
# Create the generator given the lexicon
generator = SentenceGenerator(lexicon)

# Generate a single sentence
#print("Generated Sentence:", generator.generate_sentence("S"))

# Generate multiple unique sentences
print("Generated Sentences:", generator.generate_multiple_sentences("S", count=5))

Decomposing S into NP and S\NP
Trying word 'the' with category 'NP/N' to match 'NP'
Attempting forward application: 'the' as (NP/N)
Trying word 'Mary' with category 'NP' to match 'N'
Trying word 'John' with category 'NP' to match 'N'
Trying word 'runs' with category 'S\NP' to match 'N'
Trying word 'the' with category 'NP/N' to match 'N'
Trying word 'dog' with category 'N' to match 'N'
Direct match found: 'dog' for category 'N'
Forward application successful for 'the'
NP successfully resolved: ['the', 'dog']
Trying word 'John' with category 'NP' to match 'S\NP'
Trying word 'saw' with category '(S\NP)/NP' to match 'S\NP'
Attempting forward application: 'saw' as (S\NP/NP)
Trying word 'cat' with category 'N' to match 'NP'
Trying word 'likes' with category '(S\NP)/NP' to match 'NP'
Trying word 'John' with category 'NP' to match 'NP'
Direct match found: 'John' for category 'NP'
Forward application successful for 'saw'
Successfully decomposed S: NP='['the', 'dog']' and S\NP='['saw', 'John']'


## Generating sentences with semantic constraints

In [7]:
class LexiconSemCon:
    def __init__(self):
        # Dictionary to store lexical entries, where the key is a word
        # and the value is a list of categories (with optional semantics and constraints).
        self.entries = {}

    def add_entry(self, word, category, semantics=None, constraints=None):
        """
        Add a lexical entry to the lexicon.
        :param word: The word being added.
        :param category: The syntactic category (e.g., NP, S\\NP).
        :param semantics: Optional semantics (e.g., lambda expressions).
        :param constraints: Optional semantic constraints (e.g., {'animate': True}).
        """
        if word not in self.entries:
            self.entries[word] = []
        entry = {'category': category, 'semantics': semantics, 'constraints': constraints or {}}
        self.entries[word].append(entry)

    def get_categories(self, word):
        """
        Retrieve all categories for a given word.
        :param word: The word to look up.
        :return: A list of entries (each with category, semantics, and constraints).
        """
        return self.entries.get(word, [])

    def __str__(self):
        """
        Display all entries in the lexicon in a human-readable format.
        :return: A formatted string representation of the lexicon.
        """
        lexicon_str = "Lexicon:\n"
        for word, categories in self.entries.items():
            lexicon_str += f"{word}:\n"
            for entry in categories:
                semantics = entry['semantics'] if entry['semantics'] else 'None'
                constraints = entry['constraints'] if entry['constraints'] else 'None'
                lexicon_str += f"  - Category: {entry['category']}, Semantics: {semantics}, Constraints: {constraints}\n"
        return lexicon_str


In [32]:
# Create an instance of the Lexicon
lexicon2 = LexiconSemCon()

# Add entries
lexicon2.add_entry("John", "NP", semantics="john", constraints={"animate": True})
lexicon2.add_entry("Mary", "NP", semantics="mary", constraints={"animate": True})
lexicon2.add_entry("likes", "(S\\NP)/NP", semantics="λx.λy.likes(y, x)", constraints={"animate": True})
lexicon2.add_entry("runs", "S\\NP", semantics="λx.runs(x)", constraints={"animate": True})
lexicon2.add_entry("the", "NP/N", semantics="λx.x")
lexicon2.add_entry("dog", "N", semantics="dog", constraints={"animate": True})
lexicon2.add_entry("cat", "N", semantics="cat", constraints={"animate": True})
lexicon2.add_entry("rock", "N", semantics="rock", constraints={"animate": False})

# Handle ambiguous entries (e.g., "saw" as both a verb and noun)
lexicon2.add_entry("saw", "(S\\NP)/NP", semantics="λx.λy.saw(y, x)", constraints={"animate": True})
lexicon2.add_entry("saw", "N", semantics="saw", constraints={"animate": False})

In [44]:
import random

class SentenceGeneratorSemCon:
    def __init__(self, lexicon):
        self.lexicon = lexicon

    def generate_sentence(self, target_category="S"):
        sentence, remaining_category = self.build_sentence(target_category)
        if remaining_category is None:
            return " ".join(sentence)
        else:
            return None

    def generate_multiple_sentences(self, target_category="S", count=5):
        sentences = set()
        attempts = 0
        max_attempts = count * 5
        
        while len(sentences) < count and attempts < max_attempts:
            attempts += 1
            sentence = self.generate_sentence(target_category)
            if sentence and sentence not in sentences:
                sentences.add(sentence)
        
        return list(sentences)

    def build_sentence(self, target_category, depth=0, max_depth=10, current_constraints=None):
        if depth > max_depth:
            print(f"Exceeded max recursion depth: {depth} for target {target_category}")
            return [], target_category
        
        if target_category == "S":
            # Decompose S into NP and S\NP
            print("Decomposing S into NP and S\\NP")
            # The method calls self.build_sentence recursively to generate a noun phrase ("NP"). 
            # The recursion depth is increased by 1 (depth + 1) to track the level of recursion.
            # np_sentence will store the generated noun phrase.
            # np_category will store the resulting category after attempting to build the NP.
            np_sentence, np_category = self.build_sentence("NP", depth + 1, max_depth)
            # This checks if the np_category is None, indicating the NP was successfully generated 
            # (since build_sentence returns None when it successfully generates a category).
            if np_category is None:  # NP found
                print(f"NP successfully resolved: {np_sentence}")
                # The method then tries to build the remaining part of the sentence, 
                # which is the verb phrase (S\NP).
                snp_sentence, snp_category = self.build_sentence("S\\NP", depth + 1, max_depth)
                if snp_category is None:  # S\NP found
                    print(f"Successfully decomposed S: NP='{np_sentence}' and S\\NP='{snp_sentence}'")
                    # The method then returns a tuple consisting of:
                    # The concatenated string of the noun phrase (np_sentence) and verb phrase 
                    # (snp_sentence), forming the complete sentence.
                    # None, indicating that there are no remaining categories to expand
                    return np_sentence + snp_sentence, None
                else:
                    print(f"Failed to resolve S\\NP after matching NP='{np_sentence}'")
            else:
                print(f"Failed to resolve NP for S decomposition")
        
        # Shuffle lexicon keys for random selection
        lexicon_keys = list(self.lexicon.entries.keys())
        random.shuffle(lexicon_keys)

        for word in lexicon_keys:
            entries = self.lexicon.entries[word]
            for entry in entries:
                category = entry['category']
                semantics = entry['semantics']
                constraints = entry['constraints']
                
                print(f"Trying word '{word}' with category '{category}' to match '{target_category}'")

                # Check if constraints match
                if current_constraints and not self.constraints_match(current_constraints, constraints):
                    print(f"Semantic constraints mismatch for word '{word}'")
                    continue

                # Direct match
                if self.normalize_category(category) == self.normalize_category(target_category):
                    print(f"Direct match found: '{word}' for category '{target_category}'")
                    return [word], None

                # Forward application: (A/B) + B → A
                if self.is_forward_function(category):
                    left_category, right_category = map(self.normalize_category, self.split_category(category))
                    if self.normalize_category(target_category) == left_category:
                        print(f"Attempting forward application: '{word}' as ({left_category}/{right_category})")
                        right_sentence, remaining_category = self.build_sentence(right_category, depth + 1, max_depth, constraints)
                        if remaining_category is None:
                            print(f"Forward application successful for '{word}'")
                            return [word] + right_sentence, None

                # Backward application: B + (A\B) → A
                if self.is_backward_function(category):
                    left_category, right_category = map(self.normalize_category, self.split_category(category))
                    if self.normalize_category(target_category) == left_category:
                        print(f"Attempting backward application: '{word}' as ({left_category}\\{right_category})")
                        left_sentence, remaining_category = self.build_sentence(right_category, depth + 1, max_depth, constraints)
                        if remaining_category is None:
                            print(f"Backward application successful for '{word}'")
                            return left_sentence + [word], None

        print(f"No match found for target '{target_category}' at depth {depth}")
        return [], target_category

    def constraints_match(self, current_constraints, new_constraints):
        """
        Check if the current constraints are compatible with the new constraints.
        """
        for key, value in current_constraints.items():
            if key in new_constraints and new_constraints[key] != value:
                return False
        return True

    def is_forward_function(self, category):
        return "/" in category

    def is_backward_function(self, category):
        return "\\" in category

    def split_category(self, function_category):
        if "/" in function_category:
            return function_category.split("/", 1)
        elif "\\" in function_category:
            return function_category.split("\\", 1)
        return None, None

    def normalize_category(self, category):
        while category.startswith("(") and category.endswith(")"):
            category = category[1:-1]
        return category


## PROBLEM: Semantic contraints do not work properly yet

In [48]:
# Create the generator given the lexicon
generator2 = SentenceGeneratorSemCon(lexicon2)

# Generate a single sentence
#print("Generated Sentence:", generator.generate_sentence("S"))

# Generate multiple unique sentences
print("Generated Sentences:", generator2.generate_multiple_sentences("S", count=15))

Decomposing S into NP and S\NP
Trying word 'John' with category 'NP' to match 'NP'
Direct match found: 'John' for category 'NP'
NP successfully resolved: ['John']
Trying word 'Mary' with category 'NP' to match 'S\NP'
Trying word 'saw' with category '(S\NP)/NP' to match 'S\NP'
Attempting forward application: 'saw' as (S\NP/NP)
Trying word 'John' with category 'NP' to match 'NP'
Direct match found: 'John' for category 'NP'
Forward application successful for 'saw'
Successfully decomposed S: NP='['John']' and S\NP='['saw', 'John']'
Decomposing S into NP and S\NP
Trying word 'runs' with category 'S\NP' to match 'NP'
Trying word 'dog' with category 'N' to match 'NP'
Trying word 'the' with category 'NP/N' to match 'NP'
Attempting forward application: 'the' as (NP/N)
Trying word 'rock' with category 'N' to match 'N'
Direct match found: 'rock' for category 'N'
Forward application successful for 'the'
NP successfully resolved: ['the', 'rock']
Trying word 'the' with category 'NP/N' to match 'S\NP