In [None]:
import discopy
from discopy.grammar import pregroup

# Assuming your file is named 'sentences.txt' and each line is a sentence
with open('russian_latin.txt', 'r') as f:
    sentences = f.readlines()

# Define your vocabulary (replace with your actual vocabulary)
vocabulary = {
    'марш': pregroup.noun,
    'иди': pregroup.verb,
    # ... add all your words
}

# Define your grammar (replace with your actual grammar rules)
grammar = pregroup.grammar

for sentence in sentences:
    sentence = sentence.strip()  # Remove leading/trailing whitespace
    words = sentence.split()  # Split the sentence into words

    # Convert words to DiscoPy terms
    terms = [vocabulary.get(word, None) for word in words]

    # Remove unknown words (or handle them appropriately)
    terms = [term for term in terms if term is not None]

    # If the sentence is empty after removing unknown words, skip it
    if not terms:
        continue

    # Create a DiscoPy diagram for the sentence
    diagram = grammar.diagram(terms)

    # Convert the diagram to a quantum circuit (if possible)
    try:
        circuit = diagram.to_quantum()
        print(f"Sentence: {sentence}")
        print(f"Circuit: {circuit}")
    except Exception as e:
        print(f"Error converting sentence '{sentence}' to circuit: {e}")

In [None]:
import spacy

# Load the English and Russian spaCy models
try:
    nlp_en = spacy.load("en_core_web_sm")
except OSError:
    print("Downloading English spaCy model...")
    import subprocess
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
    nlp_en = spacy.load("en_core_web_sm")

try:
    nlp_ru = spacy.load("ru_core_news_sm")
except OSError:
    print("Downloading Russian spaCy model...")
    import subprocess
    subprocess.run(["python", "-m", "spacy", "download", "ru_core_news_sm"])
    nlp_ru = spacy.load("ru_core_news_sm")


def tag_sentence(sentence, language):
    if language == "en":
        doc = nlp_en(sentence)
    elif language == "ru":
        doc = nlp_ru(sentence)
    else:
        raise ValueError("Unsupported language")

    tagged_words = [(token.text, token.pos_) for token in doc]
    return tagged_words

def process_file(filepath, language):  # Added language parameter
    try:
        with open(filepath, "r", encoding="utf-8") as file: # Added encoding for potential UTF-8 characters
            for line in file:
                sentence = line.strip()  # Remove leading/trailing whitespace
                if sentence:  # Skip empty lines
                    tagged_sentence = tag_sentence(sentence, language)
                    print(f"Sentence: {sentence}")
                    print(f"Tagged: {tagged_sentence}")
                    print("-" * 20)  # Separator between sentences
    except FileNotFoundError:
        print(f"Error: File not found at {filepath}")
    except Exception as e: # Catch other potential errors (e.g., UnicodeDecodeError)
        print(f"An error occurred: {e}")


# Example usage:
english_file = "english_sentences.txt"
russian_file = "russian_sentences.txt"

process_file(english_file, "en")  # Process English sentences
process_file(russian_file, "ru")  # Process Russian sentences

SyntaxError: invalid syntax (842801469.py, line 1)