In [5]:
import spacy

# Load the English and Russian spaCy models
try:
    nlp_en = spacy.load("en_core_web_sm")
except OSError:
    print("Downloading English spaCy model...")
    import subprocess
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
    nlp_en = spacy.load("en_core_web_sm")

try:
    nlp_ru = spacy.load("ru_core_news_sm")
except OSError:
    print("Downloading Russian spaCy model...")
    import subprocess
    subprocess.run(["python", "-m", "spacy", "download", "ru_core_news_sm"])
    nlp_ru = spacy.load("ru_core_news_sm")


def tag_sentence(sentence, language):
    if language == "en":
        doc = nlp_en(sentence)
    elif language == "ru":
        doc = nlp_ru(sentence)
    else:
        raise ValueError("Unsupported language")

    tagged_words = [(token.text, token.pos_) for token in doc]
    return tagged_words

def process_file(input_filepath, output_filepath, language):
    try:
        with open(input_filepath, "r", encoding="utf-8") as infile, \
                open(output_filepath, "w", encoding="utf-8") as outfile:  # Open output file
            for line in infile:
                sentence = line.strip()
                if sentence:
                    tagged_sentence = tag_sentence(sentence, language)
                    
                    outfile.write(f"{tagged_sentence}\n")
                    
    except FileNotFoundError:
        print(f"Error: File not found at {input_filepath}")
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage:
english_file = "english.txt"
english_output_file = "english_tagged.txt"  # Output file for English
russian_file = "russian.txt"
russian_output_file = "russian_tagged.txt"  # Output file for Russian

process_file(english_file, english_output_file, "en")
process_file(russian_file, russian_output_file, "ru")

print(f"Tagged English sentences saved to: {english_output_file}")
print(f"Tagged Russian sentences saved to: {russian_output_file}")

Tagged English sentences saved to: english_tagged.txt
Tagged Russian sentences saved to: russian_tagged.txt
