In [5]:
import torch
from torch import nn
from lambeq import PennyLaneModel, Dataset, BobcatParser, RemoveCupsRewriter
import random
import numpy as np

#set seeds for reproducibility, initalize hyperparameters

BATCH_SIZE = 50
EPOCHS = 100
test_size = 0.2
val_size = 0.125
SEED = 35
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

class XORSentenceModel(PennyLaneModel):
    def _init__(self, **kwargs):
        PennyLaneModel.__init__(self, **kwargs)
        
        self.xor_net = nn.Sequential(nn.Linear(4,10), nn.ReLU(), nn.Linear(10,1), nn.Sigmoid())
        
        
    def forward(self, diagram_pairs):
        first_d, second_d = zip(*diagram_pairs)
        #evaluate circuits and concatenate results
        evalulated_pairs = torch.cat((self.get_diagram_output(first_d)), self.get_diagram_output(second_d), dim=1)
        evalulated_pairs = 2 * (evalulated_pairs - 0.5) #pass concatenated results through a simple neural network
        return self.xor_net(evalulated_pairs)

In [None]:
#input data
from sklearn.model_selection import train_test_split

def split_data(eng_sentences,rus_sentences):   
    eng_train, eng_test, rus_train, rus_test  = train_test_split(eng_sentences, rus_sentences, test_size=test_size, random_state=SEED)
    relative_val_size = val_size / (1 - test_size)
    #split for validation

    eng_train, eng_val, rus_train, rus_val = train_test_split(eng_train, rus_train, test_size=relative_val_size, random_state=SEED)
    print(list(zip(eng_train[:5], rus_train[:5])))
    return eng_train, eng_test, eng_val, rus_train, rus_test, rus_val

eng_train, eng_test, eng_val, rus_train, rus_test, rus_val = split_data(eng_sentences,rus_sentences)

[("If I were you, I'd talk to Tom", 'На вашем месте я бы поговорил с Томом'), ('Almost three thousand people died', 'Почти три тысячи человек погибло'), ("You wouldn't hit me, would you", 'Ты же ведь меня не ударишь, да'), ("Tom said that he didn't know you", 'Том сказал, что он вас не знает'), ('When did I say that', 'Когда я это сказал')]


In [None]:
import spacy
from lambeq import Diagram, Box, Wire

def spacy_to_diagram(doc):
    """Convert Spacy Parsing to a lambeq diagram"""
    diagram = Diagram()
    boxes = []
    wires = []
    
    for token in doc:
        box = Box(token.text, Wire())
        boxes.append(box)
        
        if token.dep_ == "nsubj": #connect subject to verb
            wires.append((boxes[token.head.i], boxes[token.i]))
def parse_sentences(sentences, parser=None):
    """Parses a list of sentences and returns a list of diagrams, skipping failures."""
    diagrams = []
    for sentence in sentences:
        try:
            if parser:  # Use spaCy for Russian
                doc = parser(sentence)
                # Extract dependency information or other relevant data from doc
                diagrams.append(doc)  # Store the spaCy Doc object
            else:  # Use BobcatParser for English
                diagrams.append(BobcatParser(verbose="text").sentence2diagram(sentence))
        except Exception as e:  # Catch any parsing error
            print(f" Error: {e}")
    return diagrams

# Process English sets (using BobcatParser)
eng_diagrams_train = parse_sentences(eng_train)
eng_diagrams_test = parse_sentences(eng_test)
eng_diagrams_val = parse_sentences(eng_val)

# Process Russian sets (using spaCy)
rus_diagrams_train = parse_sentences(rus_train, nlp)
rus_diagrams_test = parse_sentences(rus_test, nlp)
rus_diagrams_val = parse_sentences(rus_val, nlp)




KeyboardInterrupt: 