In [77]:
from pystempel import Stemmer
import csv
import ipywidgets as widgets
from IPython.display import display
stemmer = Stemmer.polimorf()
import random
import spacy

Loading: 100%|██████████| 11368252/11368252 [00:11<00:00, 1000599.11bytes/s]


In [2]:
with open('interactions.csv', 'w') as file:
    dialog = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    dialog.writerow(['Witaj', 'Witaj! W czym mogę ci poomóc?'])
    dialog.writerow(['Jak się masz?', 'Dobrze. Dziękuję, że pytasz. A ty?'])
    dialog.writerow(['Czym jesteś?', 'Jestem chatbotem.'])
    dialog.writerow(['Jakie masz zainteresowania?', 'Lubię rozmawiać z ludźmi.'])
    dialog.writerow(['Dziękuję', "Proszę bardzo. Cieszę się, że mogłem pomóc."])
    dialog.writerow(['Do widzenia', 'Do widzenia! Miłego dnia!'])

In [None]:
#PyStempel
class Chatbot():
    def __init__(self, dialog_file):
        self.dialog_file = dialog_file
    
    def make_dialog_tuples(self):
        dialog = []
        with open(self.dialog_file, newline='') as file:
            csv_reader = csv.reader(file, delimiter=',', quotechar='"')
            for row in csv_reader:
                dialog.append((row[0], row[1]))
        return dialog
    
    def split_and_clean(self, sentence):
        interpoint = ',.?!\'":\\;@#$%^&*/+-=_`~<>{}()[]'    #Rozszerzyłem listę znaków
        if len(sentence[0])>1:          #Warunek upewniający się, czy to jest zdanie, czy tylko jedno słowo
            for word in sentence:       #Zmieniłem sprawdzanie znaków, teraz usuwa znaki nawet ze środka
                if word[0] in interpoint:
                    word = word[1:]
                if word[-1] in interpoint:
                    word = word[:-1]
        else:
            if sentence[0] in interpoint:
                sentence = sentence[1:]
            if sentence[-1] in interpoint:
                sentence = sentence[:-1]
        for inter in interpoint:
            if inter not in '\'':
                sentence.replace(inter, '')
        sentence_tokenized = [word.lower() for word in sentence.split()]
        sentence_tokenized_clean = []
        for word in sentence_tokenized:
            sentence_tokenized_clean.append(stemmer(word))
        return sentence_tokenized_clean
    
    def find_matching(self, input_prompt, dialog):
        answer = 'Nie posiadam na to odpowiedzi.'
        intersection_len_req = 0
        input_prompt = set(self.split_and_clean(input_prompt.lower()))  #Wyrzuciłem poza for, bo nie ma powodu wykonywać się wiele razy
        for index, pair in enumerate(dialog):
            match = set(self.split_and_clean(pair[0]))
            intersection_len = len(set.intersection(input_prompt, match))
            if intersection_len > intersection_len_req:
                answer = index
                intersection_len_req = intersection_len
            if intersection_len == intersection_len_req and intersection_len != 0:
                if random.randint(0, 1) == 1:
                    answer = index
        
        display(f'\tPrompt: {input_prompt}')
        if answer != 'Nie posiadam na to odpowiedzi.':
            answer = dialog[answer][1]
        return answer
    
    def run(self):
        dialog = self.make_dialog_tuples()
        input_prompt = widgets.Text(
            value='',
            placeholder='Witaj',
            description="Prompt:",
        )
        submit_button = widgets.Button(description='Submit')
        def on_submit(b):
            if not input_prompt:
                display('Puste')
            else:
                answer = self.find_matching(input_prompt.value, dialog)
                display(f'Response: {answer}')
        submit_button.on_click(on_submit)
        display(input_prompt, submit_button)

In [None]:
#Spacy
class Chatbot():
    def __init__(self, dialog_file):
        self.dialog_file = dialog_file
        self.nlp = spacy.load("pl_core_news_md")
    
    def make_dialog_tuples(self):
        dialog = []
        with open(self.dialog_file, newline='') as file:
            csv_reader = csv.reader(file, delimiter=',', quotechar='"')
            for row in csv_reader:
                dialog.append((row[0], row[1]))
        return dialog
    
    def split_and_clean(self, sentence):
        doc = self.nlp(sentence)
        return [token.lemma_.lower() for token in doc if token.is_alpha]
    
    def find_matching(self, input_prompt, dialog):
        answer = 'Nie posiadam na to odpowiedzi.'
        intersection_len_req = 0
        input_prompt = set(self.split_and_clean(input_prompt))  #Wyrzuciłem poza for, bo nie ma powodu wykonywać się wiele razy
        for index, pair in enumerate(dialog):
            match = set(self.split_and_clean(pair[0]))
            intersection_len = len(set.intersection(input_prompt, match))
            if intersection_len > intersection_len_req:
                answer = index
                intersection_len_req = intersection_len
            if intersection_len == intersection_len_req and intersection_len != 0:
                if random.randint(0, 1) == 1:
                    answer = index
        
        display(f'\tPrompt: {input_prompt}')
        if answer != 'Nie posiadam na to odpowiedzi.':
            answer = dialog[answer][1]
        return answer
    
    def run(self):
        dialog = self.make_dialog_tuples()
        input_prompt = widgets.Text(
            value='',
            placeholder='Witaj',
            description="Prompt:",
        )
        submit_button = widgets.Button(description='Submit')
        def on_submit(b):
            if not input_prompt:
                display('Puste')
            else:
                answer = self.find_matching(input_prompt.value, dialog)
                display(f'Response: {answer}')
        submit_button.on_click(on_submit)
        display(input_prompt, submit_button)

In [None]:
text = "To jest tekst"
nlp = spacy.load("pl_core_news_md")
doc = nlp(text)

()

In [86]:
chatbot = Chatbot('interactions.csv')
chatbot.run()

Text(value='', description='Prompt:', placeholder='Witaj')

Button(description='Submit', style=ButtonStyle())

"\tPrompt: {'witaj'}"

'Response: Witaj! W czym mogę ci poomóc?'

"\tPrompt: {'dziękować'}"

'Response: Proszę bardzo. Cieszę się, że mogłem pomóc.'