In [None]:
from dataclasses import dataclass


@dataclass
class WordFrequency:
    word: str
    frequency: int
        
    def __lt__(self, other):
        if isinstance(other, WordFrequency):
            return self.word < other.word
        if isinstance(other, str):
            return self.word < other
        return NotImplemented
    
    def __eq__(self, other):
        if isinstance(other, WordFrequency):
            return self.word == other.word
        if isinstance(other, str):
            return self.word == other
        return NotImplemented        

In [None]:
def count_unique_words(text: list[str]):
    unique_words = set(text)
    return max(
        ((word, text.count(word)) for word in unique_words), 
        key=lambda x: x[1]
    )

In [None]:
import bisect


def unsorted_list(text: list[str]):
    registred_words = []
    for word in text:
        try:
            i = registred_words.index(word)
            registred_words[i].frequency += 1
        except ValueError:
            registred_words.append(WordFrequency(word, 1))
    
    return max(registred_words, key=lambda word: word.frequency)

def sorted_list(text: list[str]):
    registred_words = []
    for word in text:
        i = bisect.bisect_left(registred_words, word)
        
        if i != len(registred_words) and registred_words[i].word == word:
            registred_words[i].frequency += 1
        else:
            registred_words.insert(i, WordFrequency(word, 1))
    
    return max(registred_words, key=lambda word: word.frequency)

In [None]:
from collections import Counter, defaultdict


def dictionary(text: list[str]):
    registry = dict()
    for word in text:
        try:
            registry[word] += 1
        except KeyError:
            registry[word] = 1
    return max(registry.items(), key=lambda x: x[1])

def default_dictionary(text: list[str]):
    registry = defaultdict(int)
    for word in text:
        registry[word] += 1
    return max(registry.items(), key=lambda x: x[1])
        
def counter(text: list[str]):
    return Counter(text).most_common(1)[0]

In [None]:
from time import perf_counter


def measure_time(f, *args, **kwargs):
    t_start = perf_counter()
    result = f(*args, **kwargs)
    t_finish = perf_counter()
    return result, t_finish - t_start

In [None]:
import re

with open("text_mag.txt") as f:
    text = f.read()
    
text = re.findall(r"\w+", text)
text = [word.lower() for word in text]
print(f"В тексте длинной {len(text)} количество уникальных слов {len(set(text))}")

In [None]:
methods = {
    "count unique words": count_unique_words,
    "sorted list": sorted_list,
    "unsorted list": unsorted_list,
    "dictionary": dictionary,
    "default dictionary": default_dictionary,
    "counter": counter,
}

time = {method_name:[] for method_name in methods}

sizes = [1_000 * 2**i for i in range(11)]
for size in sizes:
    x = text[:size]
    for method_name, f in methods.items():
        word, t = measure_time(f, x)
        print(f"{method_name}: {word}")
        time[method_name].append(t)
    print("_"*80)

In [None]:
import pandas as pd


df = pd.DataFrame(time, index=sizes)
df.to_csv("hash_map.csv")
df

In [None]:
import plotly.express as px


fig = px.line(df, labels={"variable": "Метод", "value": "Время (с)", "index": "Длина текста"})
fig.write_html("hash_map.html")
fig

In [None]:
px.line(df, labels={"variable": "Метод", "value": "Время (с)", "index": "Длина текста"}, log_x=True, log_y=True)