In [1]:
import pandas as pd
import pymorphy2
from termcolor import colored
import ipywidgets as widgets
from IPython.display import display
from nltk import bigrams
from nltk import FreqDist
from io import BytesIO

morph = pymorphy2.MorphAnalyzer()

file_upload = widgets.FileUpload(
    accept='.csv',  # Принимать только файлы CSV
    multiple=False  # Разрешить загрузку только одного файла
)

input_phrase = widgets.Text(
    value='',
    placeholder='Введите фразу для поиска',
    description='Фраза:',
    disabled=False
)

search_button = widgets.Button(description="Найти")
merge_button = widgets.Button(description="Объединить фразу")
collocations_button = widgets.Button(description="Коллокации")

output = widgets.Output()

dataset = None  # Объявление переменной в глобальной области видимости

def on_file_upload_change(change):
    global dataset  # Объявление, что мы хотим использовать глобальную переменную
    content = file_upload.value[0]['content']
    dataset = pd.read_csv(BytesIO(content), sep=';', header=None)
    dataset.columns = ['num', 'url', 'name']
    print("Файл загружен.")


def on_search_button_clicked(b):
    with output:
        input_text = input_phrase.value
        parsed_input_phrase = [morph.parse(word)[0].normal_form for word in input_text.lower().split()]
        matching_rows = []
        for index, row in dataset.iterrows():
            if isinstance(row['name'], str):
                row_text_lower = row['name'].lower()
                for word in parsed_input_phrase:
                    if word in row_text_lower:
                        matching_rows.append(row['name'])
                        break
        if matching_rows:
            print("Найденные строки:")
            for row in matching_rows:
                highlighted_row = row
                for word in parsed_input_phrase:
                    highlighted_row = highlighted_row.replace(word, colored(word, 'green'))
                print(highlighted_row)
        else:
            print("Подходящие строки не найдены.")


def on_merge_button_clicked(b):
    with output:
        input_text = input_phrase.value
        parsed_input_phrase = [morph.parse(word)[0].normal_form for word in input_text.lower().split()]
        merged_phrase = ' '.join(parsed_input_phrase)
        print("Объединенная фраза:", merged_phrase)
        
        matching_rows = dataset[dataset['name'].apply(lambda x: isinstance(x, str) and all(word in x.lower() for word in parsed_input_phrase))]
        if not matching_rows.empty:
            print("\nНайденные строки:")
            for index, row in matching_rows.iterrows():
                print(row['name'])
        else:
            print("Подходящие строки не найдены.")


def on_collocations_button_clicked(b):
    with output:
        input_text = input_phrase.value
        parsed_input_phrase = [morph.parse(word)[0].normal_form for word in input_text.lower().split()]
        
        collocations = []
        for index, row in dataset.iterrows():
            if isinstance(row['name'], str):
                row_text_lower = row['name'].lower()
                if all(word in row_text_lower for word in parsed_input_phrase):
                    words_in_row = row_text_lower.split()
                    collocations.extend(list(bigrams(words_in_row)))

        fdist = FreqDist(collocations)
        print("Коллокации:")
        for collocation, freq in fdist.items():
            if all(word in collocation for word in parsed_input_phrase):
                print(collocation, ":", freq)


file_upload.observe(on_file_upload_change, names='value')
search_button.on_click(on_search_button_clicked)
merge_button.on_click(on_merge_button_clicked)
collocations_button.on_click(on_collocations_button_clicked)

display(file_upload, input_phrase, search_button, merge_button, collocations_button, output)

FileUpload(value=(), accept='.csv', description='Upload')

Text(value='', description='Фраза:', placeholder='Введите фразу для поиска')

Button(description='Найти', style=ButtonStyle())

Button(description='Объединить фразу', style=ButtonStyle())

Button(description='Коллокации', style=ButtonStyle())

Output()

Файл загружен.
