### Libs

In [2]:
import re

### Algorithm

In [4]:
class TextStatistics:
    def __init__(self):
        self.text = ""
        self.words = []
        self.letters = ()
        self.frequency = {}
        self.bilingual_words = []
        
    # текст из файла
    def get_text(self, file_path: str) -> str:
        with open(file_path, 'r', encoding='UTF-8') as file:
            self.text = file.read()
        
    # слова из текста
    def get_words(self) -> list:
        self.words = re.findall(r'\w+', self.text)
        return self.words
    
    # количество слов
    def words_amount(self) -> int:
        return len(self.words)
    
    # буквы из слов
    def get_letters(self) -> tuple:
        self.letters =  tuple("".join(self.words))
        return self.letters
    
    # количество букв и их частоту
    def letters_frequency(self) -> dict:
        split_text = "".join(self.words)

        for letter in self.letters:
            f_letter = split_text.count(letter)
            p_letter = 0
            for w in self.words:
                if letter in w:
                    p_letter += 1
            self.frequency[letter] = (f_letter, len(self.words) / p_letter)
            
        return self.frequency
        
    # количество параграфов
    def paragraph_amount(self) -> int:
        return self.text.count("\n\n")
    
    # слова с включающие буквы из разных алфавитов
    def bilingual_word_amount(self) -> int:
        for w in self.words:
            if re.search(r'[a-zA-Z]', w) and re.search(r'[а-яА-Я]', w):
                self.bilingual_words.append(w)   
                  
        return self.bilingual_words



### Test

In [5]:
def text_statistics(file_path: str) -> tuple:
    ts = TextStatistics()
    ts.get_text(file_path)
    ts.get_words()
    ts.get_letters()
    
    w_amount = ts.words_amount()
    l_frequency = ts.letters_frequency()
    p_amount = ts.paragraph_amount()
    b_words = ts.bilingual_word_amount()
    
    text_statistics = {}
    
    for key, value in l_frequency.items():
        text_statistics[key] = value
    
    text_statistics["word_amount"] = w_amount
    text_statistics["paragraph_amount"] = p_amount
    text_statistics["bilingual_word_amount"] = len(b_words)
    
    return text_statistics

### Output

In [6]:
text_statistics("../test_data/statistics.txt")

{'П': (2, 85.5),
 'р': (50, 4.071428571428571),
 'о': (125, 1.8387096774193548),
 'с': (55, 3.489795918367347),
 'т': (72, 2.85),
 'ы': (21, 8.55),
 'е': (70, 2.9482758620689653),
 'ч': (13, 13.153846153846153),
 'и': (89, 2.1645569620253164),
 'л': (46, 3.8863636363636362),
 'а': (74, 3.2264150943396226),
 'в': (44, 4.275),
 'з': (25, 6.84),
 'д': (35, 5.181818181818182),
 'н': (65, 3.2264150943396226),
 'м': (31, 5.7),
 'п': (31, 5.516129032258065),
 'Н': (2, 85.5),
 'б': (16, 11.4),
 'х': (10, 19.0),
 'm': (7, 42.75),
 'b': (2, 85.5),
 'ь': (18, 10.058823529411764),
 'ф': (7, 24.428571428571427),
 'у': (23, 7.7727272727272725),
 'к': (36, 5.34375),
 'ц': (15, 11.4),
 'ю': (10, 17.1),
 'p': (3, 57.0),
 'r': (6, 57.0),
 'i': (6, 28.5),
 'e': (7, 28.5),
 '_': (5, 57.0),
 'n': (11, 24.428571428571427),
 'u': (4, 42.75),
 's': (4, 42.75),
 'l': (4, 42.75),
 'o': (6, 34.2),
 'w': (2, 85.5),
 'h': (4, 85.5),
 'g': (2, 85.5),
 'г': (6, 28.5),
 'ж': (6, 28.5),
 'я': (21, 9.5),
 'й': (13, 13.