# Notebook for CCG Generation

## Defining a lexicon

In [1]:
class Lexicon:
    def __init__(self):
        # Dictionary to store lexical entries, where the key is a word
        # and the value is a list of categories (with optional semantics).
        self.entries = {}


    def add_entry(self, word, category, semantics=None):
        # Add a lexical entry to the lexicon.
        if word not in self.entries:
            self.entries[word] = []
        self.entries[word].append({'category': category, 'semantics': semantics})

    def get_categories(self, word):
        # Retrieve all categories for a given word.
        return self.entries.get(word, [])

    def __str__(self):
        # Display all entries in the lexicon.
        lexicon_str = "Lexicon:\n"
        for word, categories in self.entries.items():
            lexicon_str += f"{word}:\n"
            for entry in categories:
                semantics = entry['semantics'] if entry['semantics'] else 'None'
                lexicon_str += f"  - Category: {entry['category']}, Semantics: {semantics}\n"
        return lexicon_str

In [2]:
# Create an instance of the Lexicon
lexicon = Lexicon()

# Add entries
lexicon.add_entry("John", "NP", semantics="john")
lexicon.add_entry("Mary", "NP", semantics="mary")
lexicon.add_entry("likes", "(S\\NP)/NP", semantics="λx.λy.likes(y, x)")
lexicon.add_entry("runs", "S\\NP", semantics="λx.runs(x)")
lexicon.add_entry("the", "NP/N", semantics="λx.x")
lexicon.add_entry("dog", "N", semantics="dog")
lexicon.add_entry("cat", "N", semantics="cat")

# Handle ambiguous entries (e.g., "saw" as both a verb and noun)
lexicon.add_entry("saw", "(S\\NP)/NP", semantics="λx.λy.saw(y, x)")
lexicon.add_entry("saw", "N", semantics="saw")

In [3]:
# Retrieve categories for a specific word
word = "likes"
print(f"Categories for '{word}':")
for entry in lexicon.get_categories(word):
    print(f"  - Category: {entry['category']}, Semantics: {entry['semantics']}")

# Print the entire lexicon
print(lexicon)

Categories for 'likes':
  - Category: (S\NP)/NP, Semantics: λx.λy.likes(y, x)
Lexicon:
John:
  - Category: NP, Semantics: john
Mary:
  - Category: NP, Semantics: mary
likes:
  - Category: (S\NP)/NP, Semantics: λx.λy.likes(y, x)
runs:
  - Category: S\NP, Semantics: λx.runs(x)
the:
  - Category: NP/N, Semantics: λx.x
dog:
  - Category: N, Semantics: dog
cat:
  - Category: N, Semantics: cat
saw:
  - Category: (S\NP)/NP, Semantics: λx.λy.saw(y, x)
  - Category: N, Semantics: saw



## Generating sentences without semantic constraints

In [4]:
import random

class SentenceGenerator: 
    #A class is a blueprint for creating objects (instances), 
    # which can hold data and have functions (methods) associated with them.
    def __init__(self, lexicon): 
        # Constructor method for the class: special method in Python automatically called when an 
        # object (an instance of the class) is created.
        self.lexicon = lexicon 
        # This line stores the lexicon parameter passed to the constructor as an instance attribute.

    def generate_sentence(self, target_category="S"):
        # Generate a sentence that matches the target category (default is 'S' for a sentence).
        # Defines a method named generate_sentence: 
        # a member function of the SentenceGenerator class, 
        # meaning it operates on instances of that class.
        sentence, remaining_category = self.build_sentence(target_category) 
        # This line calls the build_sentence method: result is expected to be a tuple.
        
        # If we couldn't completely match the category, return None
        if remaining_category is None: #This part handles the outcome of the sentence generation.
            return " ".join(sentence)
        else:
            return None

    def generate_multiple_sentences(self, target_category="S", count=5):
        # Generate multiple unique sentences that match the target category.
        sentences = set() #This is a set that will hold the unique sentences generated.
        attempts = 0 #This counter keeps track of the number of attempts made to generate a sentence.
        max_attempts = count * 5  # Allow multiple attempts to find unique sentences
        
        # Ensures loop continues until desired number of unique sentences has been generated or 
        # max number of attempts is reached.
        while len(sentences) < count and attempts < max_attempts:
            attempts += 1
            sentence = self.generate_sentence(target_category)
            if sentence and sentence not in sentences:
                sentences.add(sentence)
        
        return list(sentences)

    def build_sentence(self, target_category, depth=0, max_depth=10):
        # Recursively build a sentence by matching words to the target category, with a tree.
        if depth > max_depth:
            print(f"Exceeded max recursion depth: {depth} for target {target_category}")
            return [], None, None  # Include tree in return values

        if target_category == "S":
            # Decompose S into NP and S\NP
            print("Decomposing S into NP and S\\NP")
            np_sentence, np_category, np_tree = self.build_sentence("NP", depth + 1, max_depth)
            if np_category is None:  # NP found
                print(f"NP successfully resolved: {np_sentence}")
                snp_sentence, snp_category, snp_tree = self.build_sentence("S\\NP", depth + 1, max_depth)
                if snp_category is None:  # S\NP found
                    print(f"Successfully decomposed S: NP='{np_sentence}' and S\\NP='{snp_sentence}'")
                    tree = {
                        "node": "S",
                        "children": [np_tree, snp_tree]
                    }
                    return np_sentence + snp_sentence, None, tree
                else:
                    print(f"Failed to resolve S\\NP after matching NP='{np_sentence}'")
            else:
                print(f"Failed to resolve NP for S decomposition")

        lexicon_keys = list(self.lexicon.entries.keys())
        random.shuffle(lexicon_keys)

        for word in lexicon_keys:
            entries = self.lexicon.entries[word]
            for entry in entries:
                category = entry['category']
                print(f"Trying word '{word}' with category '{category}' to match '{target_category}'")

                # Direct match:
                if self.normalize_category(category) == self.normalize_category(target_category):
                    print(f"Direct match found: '{word}' for category '{target_category}'")
                    tree = {"node": target_category, "children": [word]}
                    return [word], None, tree

                # Forward application: (A/B) + B → A
                if self.is_forward_function(category):
                    left_category, right_category = map(self.normalize_category, self.split_category(category))
                    if self.normalize_category(target_category) == left_category:
                        print(f"Attempting forward application: '{word}' as ({left_category}/{right_category})")
                        right_sentence, remaining_category, right_tree = self.build_sentence(right_category, depth + 1, max_depth)
                        if remaining_category is None:
                            print(f"Forward application successful for '{word}'")
                            tree = {
                                "node": left_category,
                                "children": [{"node": f"({left_category}/{right_category})", "children": [word]}, right_tree]
                            }
                            return [word] + right_sentence, None, tree

                # Backward application: B + (A\B) → A
                if self.is_backward_function(category):
                    left_category, right_category = map(self.normalize_category, self.split_category(category))
                    if self.normalize_category(target_category) == left_category:
                        print(f"Attempting backward application: '{word}' as ({left_category}\\{right_category})")
                        left_sentence, remaining_category, left_tree = self.build_sentence(right_category, depth + 1, max_depth)
                        if remaining_category is None:
                            print(f"Backward application successful for '{word}'")
                            tree = {
                                "node": left_category,
                                "children": [left_tree, {"node": f"({left_category}\\{right_category})", "children": [word]}]
                            }
                            return left_sentence + [word], None, tree

        print(f"No match found for target '{target_category}' at depth {depth}")
        return [], target_category, None


    def is_forward_function(self, category):
        # Check if the category is a forward function (A/B).
        return "/" in category

    def is_backward_function(self, category):
        # Check if the category is a backward function (A\B).
        return "\\" in category

    def split_category(self, function_category):
        # Split a function category (A/B or A\B) into its components A and B.
        if "/" in function_category:
            return function_category.split("/", 1)
        elif "\\" in function_category:
            return function_category.split("\\", 1)
        return None, None
    
    def normalize_category(self, category):
        # Remove unnecessary parentheses from a category.
        while category.startswith("(") and category.endswith(")"):
            category = category[1:-1]
        return category

In [5]:
import anytree
from anytree import Node, RenderTree

# New code for visualizing the tree
def build_visual_tree(tree, parent=None):
    """
    Recursively convert the JSON-like tree into an anytree.Node tree.
    """
    if isinstance(tree, dict):
        node = Node(tree['node'], parent=parent)
        for child in tree.get('children', []):
            build_visual_tree(child, parent=node)
        return node
    elif isinstance(tree, str):  # Leaf node (word)
        return Node(tree, parent=parent)

In [6]:
import json

generator = SentenceGenerator(lexicon)

# Generate multiple sentences and store them in a matrix
num_sentences = 5  # Adjust the number of sentences as needed
matrix = []  # Initialize the matrix to store sentences and trees

# Generate sentences and trees
for _ in range(num_sentences):
    sentence, _, tree = generator.build_sentence("S")  # Generate sentence and its tree
    matrix.append([" ".join(sentence), tree])  # Append as a row in the matrix

# Print the matrix with ASCII tree visualizations
print("Generated Sentences and Trees:")
for i, (sentence, tree) in enumerate(matrix):
    print(f"Sentence {i + 1}: {sentence}")
    ascii_tree_root = build_visual_tree(tree)  # Convert tree to anytree format
    print("Tree Visualization:")
    for pre, fill, node in RenderTree(ascii_tree_root):
        print(f"{pre}{node.name}")
    print("-" * 50)

# Save the matrix to a JSON file
with open("sentences_and_trees.json", "w") as f:
    json.dump(matrix, f, indent=2)

print("\nMatrix saved to 'sentences_and_trees.json'")


Decomposing S into NP and S\NP
Trying word 'likes' with category '(S\NP)/NP' to match 'NP'
Trying word 'saw' with category '(S\NP)/NP' to match 'NP'
Trying word 'saw' with category 'N' to match 'NP'
Trying word 'John' with category 'NP' to match 'NP'
Direct match found: 'John' for category 'NP'
NP successfully resolved: ['John']
Trying word 'runs' with category 'S\NP' to match 'S\NP'
Direct match found: 'runs' for category 'S\NP'
Successfully decomposed S: NP='['John']' and S\NP='['runs']'
Decomposing S into NP and S\NP
Trying word 'likes' with category '(S\NP)/NP' to match 'NP'
Trying word 'cat' with category 'N' to match 'NP'
Trying word 'Mary' with category 'NP' to match 'NP'
Direct match found: 'Mary' for category 'NP'
NP successfully resolved: ['Mary']
Trying word 'likes' with category '(S\NP)/NP' to match 'S\NP'
Attempting forward application: 'likes' as (S\NP/NP)
Trying word 'runs' with category 'S\NP' to match 'NP'
Trying word 'the' with category 'NP/N' to match 'NP'
Attempting