### Libs

In [2]:
import re

### Algorithm

In [7]:
class TextStatistics:
    def __init__(self):
        self.text = ""
        self.words = []
        self.letters = ()
        self.frequency = {}
        self.bilingual_words = []
        
    # текст из файла
    def get_text(self, file_path: str) -> str:
        with open(file_path, 'r', encoding='UTF-8') as file:
            self.text = file.read()
        
    # слова из текста
    def get_words(self) -> list:
        self.words = re.findall(r'\w+', self.text)
        return self.words
    
    # количество слов
    def words_amount(self) -> int:
        return len(self.words)
    
    # буквы из слов
    def get_letters(self) -> tuple:
        self.letters =  tuple("".join(self.words))
        return self.letters
    
    # количество букв и их частоту
    def letters_frequency(self) -> dict:
        split_text = "".join(self.words)

        for letter in self.letters:
            f_letter = split_text.count(letter)
            p_letter = 0
            for w in self.words:
                if letter in w:
                    p_letter += 1
            self.frequency[letter] = (f_letter, p_letter)
            
        return self.frequency
        
    # количество параграфов
    def paragraph_amount(self) -> int:
        return self.text.count("\n\n")
    
    # слова с включающие буквы из разных алфавитов
    def bilingual_word_amount(self) -> int:
        for w in self.words:
            if re.search(r'[a-zA-Z]', w) and re.search(r'[а-яА-Я]', w):
                self.bilingual_words.append(w)   
                  
        return self.bilingual_words



### Test

In [8]:
def text_statistics(file_path: str) -> tuple:
    ts = TextStatistics()
    ts.get_text(file_path)
    ts.get_words()
    ts.get_letters()
    
    w_amount = ts.words_amount()
    l_frequency = ts.letters_frequency()
    p_amount = ts.paragraph_amount()
    b_words = ts.bilingual_word_amount()
    
    text_statistics = {}
    
    for key, value in l_frequency.items():
        text_statistics[key] = value
    
    text_statistics["word_amount"] = w_amount
    text_statistics["paragraph_amount"] = p_amount
    text_statistics["bilingual_word_amount"] = len(b_words)
    
    return text_statistics

### Output

In [9]:
text_statistics("../test_data/statistics.txt")

{'П': (2, 2),
 'р': (50, 42),
 'о': (125, 93),
 'с': (55, 49),
 'т': (72, 60),
 'ы': (21, 20),
 'е': (70, 58),
 'ч': (13, 13),
 'и': (89, 79),
 'л': (46, 44),
 'а': (74, 53),
 'в': (44, 40),
 'з': (25, 25),
 'д': (35, 33),
 'н': (65, 53),
 'м': (31, 30),
 'п': (31, 31),
 'Н': (2, 2),
 'б': (16, 15),
 'х': (10, 9),
 'm': (7, 4),
 'b': (2, 2),
 'ь': (18, 17),
 'ф': (7, 7),
 'у': (23, 22),
 'к': (36, 32),
 'ц': (15, 15),
 'ю': (10, 10),
 'p': (3, 3),
 'r': (6, 3),
 'i': (6, 6),
 'e': (7, 6),
 '_': (5, 3),
 'n': (11, 7),
 'u': (4, 4),
 's': (4, 4),
 'l': (4, 4),
 'o': (6, 5),
 'w': (2, 2),
 'h': (4, 2),
 'g': (2, 2),
 'г': (6, 6),
 'ж': (6, 6),
 'я': (21, 18),
 'й': (13, 13),
 'э': (3, 3),
 'Ф': (2, 2),
 'щ': (7, 7),
 'a': (4, 2),
 't': (8, 4),
 'Р': (1, 1),
 'ш': (4, 4),
 'N': (1, 1),
 'Д': (2, 2),
 'В': (1, 1),
 'y': (1, 1),
 'Г': (1, 1),
 'Э': (1, 1),
 'word_amount': 171,
 'paragraph_amount': 3,
 'bilingual_word_amount': 4}