In [1]:
import csv
from IPython.display import display
import random
from PyQt5.QtWidgets import QApplication, QVBoxLayout, QWidget, QPushButton, QLineEdit, QLabel, QTextBrowser
from PyQt5.QtGui import QScreen, QGuiApplication
from PyQt5.QtCore import Qt
import sys
import os
from nltk.stem import WordNetLemmatizer as WNL
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
from nltk import pos_tag
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from nltk.classify import NaiveBayesClassifier as NBC
from sklearn.model_selection import train_test_split as tts
from nltk.classify.util import accuracy as nltk_accuracy
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from gensim import models, corpora
from nltk.corpus import names
import stopwordsiso
import pystempel
import spacy

In [2]:
with open('interactions.csv', 'w') as file:
    dialog = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    dialog.writerow(['Hello', 'Hello! How can I help you?'])
    dialog.writerow(['How are you?', 'Good. Thank you for asking. And you?'])
    dialog.writerow(['What are you?', 'I am a chatbot.'])
    dialog.writerow(['What are your interests?', 'I like talking to people.'])
    dialog.writerow(['Thank you', 'You are welcome. I am glad I could help.'])
    dialog.writerow(['Goodbye', 'Goodbye! Have a nice day!'])
    dialog.writerow(['How was your morning?', 'It was quite good, thank you!'])
    dialog.writerow(['What do you like to eat?', 'I love pizza!'])
    dialog.writerow(['Can you recommend a book?', 'I recommend The Witcher!'])
    dialog.writerow(['Who are you?', 'I am a virtual conversation assistant.'])
    dialog.writerow(['I like programming', 'I also really like programming in Python!'])
    dialog.writerow(['What is your favorite city?', 'I love the atmosphere of Krakow.'])
    dialog.writerow(['Do you like extreme sports?', 'Yes, especially bungee jumping.'])
    dialog.writerow(['Can you play an instrument?', 'I would like to learn to play the guitar.'])
    dialog.writerow(['What is your favorite cuisine?', 'I love Italian cuisine.'])
    dialog.writerow(['Do you have any pets?', 'Unfortunately, I don’t, but I really like dogs.'])
    dialog.writerow(['What is your favorite programming language?', 'Of course, Python!'])
    dialog.writerow(['Where do you like to spend your holidays?', 'By the sea in a warm climate.'])
    dialog.writerow(['Do you know any jokes?', 'Some say I am one big joke myself.'])
    dialog.writerow(['Do you like fantasy books?', 'Yes, especially Tolkien.'])
    dialog.writerow(['What kind of music do you listen to most often?', 'Mainly rock and pop.'])
    dialog.writerow(['Do you have a favorite athlete?', 'I admire Robert Lewandowski.'])
    dialog.writerow(['Do you know any foreign languages?', 'A bit of English and German.'])
    dialog.writerow(['What are your goals for the future?', 'To help people in conversations.'])
    dialog.writerow(['What inspires you the most?', 'Human creativity and intelligence.'])
    dialog.writerow(['Do you like cooking?', 'Yes, I enjoy it.'])
    dialog.writerow(['What is your favorite season?', 'I love summer evenings.'])
    dialog.writerow(['Do you read daily newspapers?', 'Sometimes I browse online news.'])
    dialog.writerow(['What is your favorite color?', 'Blue, it reminds me of the sky.'])
    dialog.writerow(['How do you like to spend your free time?', 'I like browsing memes on the internet.'])
    dialog.writerow(['Do you like watching movies?', 'Very much, especially science fiction.'])
    dialog.writerow(['What is your biggest dream?', 'To discover the meaning of existence.'])
    dialog.writerow(['Is finding love important?', 'For many people, it is the most important thing.'])
    dialog.writerow(['Can you draw something?', 'I don’t have artistic skills.'])
    dialog.writerow(['How do you deal with stress?', 'I try to breathe deeply and think positively.'])
    dialog.writerow(['Do you ride a bike?', 'I love long bike trips.'])
    dialog.writerow(['What is your favorite movie?', 'It’s hard to choose, but Inception is high on the list.'])
    dialog.writerow(['Do you know any magic tricks?', 'Unfortunately, none.'])
    dialog.writerow(['Do you like stand-up performances?', 'Yes, sometimes they are really funny.'])
    dialog.writerow(['Where would you like to go now?', 'Japan sounds interesting.'])
    dialog.writerow(['What is the best advice you have ever received?', 'Learn from mistakes and try again.'])
    dialog.writerow(['Do you like learning new skills?', 'Yes, it’s always an interesting adventure.'])
    dialog.writerow(['Do you play computer games?', 'Yes, I like logic games and RPGs.'])
    dialog.writerow(['What is your greatest achievement?', 'Providing meaningful answers in real-time.'])
    dialog.writerow(['Do you like surprising people?', 'Positive surprises are great.'])
    dialog.writerow(['What do you value in friendship?', 'Honesty and mutual support.'])
    dialog.writerow(['Do you have any favorite quotes?', 'Life is short, laugh often.'])
    dialog.writerow(['Do you like taking risks?', 'It depends on the situation, but I tend to be cautious.'])
    dialog.writerow(['What is your opinion on coffee?', 'I like the smell, but I rarely drink it.'])
    dialog.writerow(['Can you describe yourself in one word?', 'Curious.'])
    dialog.writerow(['What do you do when you are bored?', 'I look for new information and learn.'])

In [3]:
#QT_App

class Chatbot():
    def __init__(self, dialog_file):
        self.dialog_file = dialog_file
        self.diLemma = WNL()
    
    def make_dialog_tuples(self):
        dialog = []
        with open(self.dialog_file, newline='') as file:
            csv_reader = csv.reader(file, delimiter=',', quotechar='"')
            for row in csv_reader:
                dialog.append((row[0], row[1]))
        return dialog
    
    def get_wordnet_pos(self, tag):
        if tag.startswith('J'):
            return wordnet.ADJ
        elif tag.startswith('V'):
            return wordnet.VERB
        elif tag.startswith('N'):
            return wordnet.NOUN
        elif tag.startswith('R'):
            return wordnet.ADV
        else:
            return None
    
    def split_and_clean(self, sentence):
        tokenized = word_tokenize(sentence.lower())
        tagged = pos_tag(tokenized)
        lemmatized = []

        for word, tag in tagged:
            if word.isalpha():
                pos = self.get_wordnet_pos(tag) or wordnet.NOUN
                lemmatized.append(self.diLemma.lemmatize(word, pos=pos))
        
        return lemmatized
    
    def find_matching(self, input_prompt, dialog):
        answer = 'I don\'t know how to answer that.'
        intersection_len_req = 0
        input_prompt = set(self.split_and_clean(input_prompt))
        for index, pair in enumerate(dialog):
            match = set(self.split_and_clean(pair[0]))
            intersection_len = len(set.intersection(input_prompt, match))
            if intersection_len > intersection_len_req:
                answer = index
                intersection_len_req = intersection_len
            if intersection_len == intersection_len_req and intersection_len != 0:
                if random.randint(0, 1) == 1:
                    answer = index
        
        if answer != 'I don\'t know how to answer that.':
            answer = dialog[answer][1]
        return answer
    
    def ask(self, prompt):
        dialog = self.make_dialog_tuples()
        answer = self.find_matching(prompt, dialog)
        return answer

class ChatbotApp(QWidget):
    def __init__(self, dialog_file):
        super().__init__()
        self.setWindowTitle("ChatBot")
        self.setGeometry(100, 100, 400, 200)
        self.chatbot = Chatbot(dialog_file)
        self.history_text = ""

        self.layout = QVBoxLayout()

        self.history = QTextBrowser(self)
        self.layout.addWidget(self.history)

        self.input_field = QLineEdit(self)
        self.input_field.setPlaceholderText("Prompt")
        self.layout.addWidget(self.input_field)

        self.button = QPushButton("Submit", self)
        self.button.clicked.connect(self.submit_onClick)
        self.layout.addWidget(self.button)

        self.label_decomposition = QLabel("Last decomposition:", self)
        self.layout.addWidget(self.label_decomposition)

        quit_button = QPushButton("Exit", self)
        quit_button.clicked.connect(self.close_application)
        self.layout.addWidget(quit_button)

        self.setLayout(self.layout)
        self.centerWindow()

    def centerWindow(self):
        screen_geometry = QApplication.primaryScreen().availableGeometry()
        window_geometry = self.frameGeometry()
        window_geometry.moveCenter(screen_geometry.center())
        self.move(window_geometry.topLeft())

    def close_application(self):
        QApplication.quit()

    def submit_onClick(self):
        response = self.chatbot.ask(self.input_field.text())
        self.label_decomposition.setText(f"Last decomposition: {self.chatbot.split_and_clean(self.input_field.text())}")
        self.history_text += f'User: {self.input_field.text()}\nChatbot: {response}\n'
        self.history.setText(self.history_text)
        QGuiApplication.processEvents()
        self.history.verticalScrollBar().setValue(self.history.verticalScrollBar().maximum())
        self.input_field.clear()

In [4]:
app = QApplication(sys.argv)
window = ChatbotApp("interactions.csv")
window.show()
try:
    app.exec_()
finally:
    del app

Dropped Escape call with ulEscapeCode : 0x03007703


In [5]:
data = []
with open('data.txt', 'r') as f:
    for line in f.readlines():
        data.append(line[:-1])

In [6]:
# Klasyfikacja do kategorii na własnych danych
# Train
count_vectorizer = CountVectorizer()
train_data = data[0:50] + data[92:]
train_tc = count_vectorizer.fit_transform(train_data)

tfidf = TfidfTransformer()
train_tfidf = tfidf.fit_transform(train_tc)

classifier = MultinomialNB().fit(train_tfidf, [0 if i < 50 else 1 for i in range(102)])

# Test
input_data = data[50:92]

input_tc = count_vectorizer.transform(input_data)
input_tfidf = tfidf.transform(input_tc)
predictions = classifier.predict(input_tfidf)

score = 0

# Output
for sent, category in zip(input_data, predictions):
    print('\nInput:', sent, '\nPredicted category:', category, '\nCorrect category:', {0 if data.index(sent) < 72 else 1})
    if (0 if data.index(sent) < 72 else 1) == category:
        score += 1

print(f'\nScore: {score}/{len(input_data)}')


Input: Fotografia lasu deszczowego z wodospadem, wykonana w naturalnym świetle. 
Predicted category: 0 
Correct category: {0}

Input: Realistyczny obraz alpejskiej łąki z kwitnącymi kwiatami, zrobiony w wysokiej rozdzielczości. 
Predicted category: 0 
Correct category: {0}

Input: Zdjęcie skalistego wybrzeża z zachodzącym słońcem, uchwycone w naturalnym świetle. 
Predicted category: 0 
Correct category: {0}

Input: Fotografia górskiego krajobrazu z rwącym potokiem, wykonana profesjonalnym aparatem. 
Predicted category: 0 
Correct category: {0}

Input: Realistyczne zdjęcie tropikalnej plaży z hamakiem między palmami, zrobione w słoneczny dzień. 
Predicted category: 0 
Correct category: {0}

Input: Zdjęcie pustynnego krajobrazu z kaktusami i błękitnym niebem, uchwycone w wysokiej rozdzielczości. 
Predicted category: 0 
Correct category: {0}

Input: Fotografia jesiennego lasu z promieniami słońca przebijającymi się przez drzewa, wykonana aparatem o wysokiej jakości. 
Predicted category: 

In [7]:
# Klasyfikacja do kategorii na własnych danych, NaiveBayes

def extract_features(words):
    return dict([(word, True) for word in words])

def dict_tts(dictionary, test_size=None, train_size=None, random_state=86):
    keys = list(dictionary.keys())
    values = list(dictionary.values())
    if test_size is not None:
        shuffled_keys_train, shuffled_keys_test, shuffled_values_train, shuffled_values_test = tts(keys, values, test_size=test_size, random_state=random_state)
    elif train_size is not None:
        shuffled_keys_train, shuffled_keys_test, shuffled_values_train, shuffled_values_test = tts(keys, values, train_size=train_size, random_state=random_state)
    else:
        shuffled_keys_train, shuffled_keys_test, shuffled_values_train, shuffled_values_test = tts(keys, values, train_size=0.8, random_state=random_state)

    return {key: value for key, value in zip(shuffled_keys_train, shuffled_values_train)}, {key: value for key, value in zip(shuffled_keys_test, shuffled_values_test)}

data = {value: (0 if num < 72 else 1) for num, value in enumerate(data)}

data_train, data_test = dict_tts(data, test_size=0.2)

data_train = [(extract_features(sentence.split()), category) for sentence, category in data_train.items()]
data_test_check = [(extract_features(sentence.split()), category) for sentence, category in data_test.items()]

classifier = NBC.train(data_train)

print('\nAccuracy of the classifier:', nltk_accuracy(classifier, data_test_check))

for uPrompt, correct in data_test.items():
    print("\nPrompt:", uPrompt)

    probabilities = classifier.prob_classify(extract_features(uPrompt.split()))
    predicted_sentiment = probabilities.max()

    print("Predicted sentiment:", predicted_sentiment)
    print("Correct sentiment:", correct)
    print("Probability:", round(probabilities.prob(predicted_sentiment), 2))


Accuracy of the classifier: 1.0

Prompt: Realistyczny obraz alpejskiego krajobrazu z ośnieżonymi szczytami, zrobiony aparatem.
Predicted sentiment: 0
Correct sentiment: 0
Probability: 1.0

Prompt: Fotografia zimowego krajobrazu z zamarzniętym jeziorem, wykonana aparatem o wysokiej jakości.
Predicted sentiment: 0
Correct sentiment: 0
Probability: 1.0

Prompt: Obraz mrocznego portalu otoczonego przez cienie i płomienie.
Predicted sentiment: 1
Correct sentiment: 1
Probability: 1.0

Prompt: Zdjęcie malowniczej doliny z rwącą rzeką, uchwycone w wysokiej rozdzielczości.
Predicted sentiment: 0
Correct sentiment: 0
Probability: 1.0

Prompt: Ilustracja przedstawiająca cmentarz z unoszącymi się duchami i mgłą.
Predicted sentiment: 1
Correct sentiment: 1
Probability: 1.0

Prompt: Malowidło przedstawiające upadłego anioła z płonącymi skrzydłami na tle zniszczonego miasta.
Predicted sentiment: 1
Correct sentiment: 1
Probability: 1.0

Prompt: Artystyczny obraz mrocznego jeziora z odbiciem księżyca 

In [None]:
# Wykrycie tematów i przedstawienie udziału słów w tematach
type="l"  # 's*' dla Stemmera i 'l*' dla Lemmatizera

if type.startswith('s'): stemmer = pystempel.Stemmer.polimorf()
elif type.startswith('l'): nlp = spacy.load('pl_core_news_md')

def process(input_text, type='stemmer'):
    stop_words = stopwordsiso.stopwords('pl')

    if type.startswith('s'):
        tokenizer = RegexpTokenizer(r'\w+')
        tokens = tokenizer.tokenize(input_text.lower())
        tokens = [x for x in tokens if not x in stop_words]
        tokens_stemmed = [stemmer(x) for x in tokens]
        tokenizer = RegexpTokenizer(r'\w+')
    elif type.startswith('l'):
        tokens = [token.lemma_.lower() for token in nlp(input_text)]
    return tokens_stemmed

tokens = [process(sentence) for sentence in data]
dict_tokens = corpora.Dictionary(documents=tokens)
doc_term_mat = [dict_tokens.doc2bow(token) for token in tokens]
ldamodel = models.ldamodel.LdaModel(doc_term_mat,
                                    num_topics=2,
                                    id2word=dict_tokens,
                                    passes=90)

for item in ldamodel.print_topics(num_topics=2, num_words=10):
    print('\nTopic', item[0])

    list_of_strings = item[1].split(' + ')
    for text in list_of_strings:
        weight = text.split('*')[0]
        word = text.split('*')[1]
        print(word, '==>', str(round(float(weight) * 100, 2)) + '%')


Topic 0
"obraza" ==> 7.0%
"przedstawiać" ==> 5.2%
"mroczny" ==> 4.2%
"otoczyć" ==> 2.9%
"ilustracja" ==> 2.6%
"artystyczny" ==> 2.6%
"malowidło" ==> 2.5%
"tło" ==> 2.1%
"unosić" ==> 1.3%
"smokać" ==> 1.2%

Topic 1
"zdjęcie" ==> 5.2%
"wysoki" ==> 3.8%
"wykonać" ==> 3.5%
"fotografia" ==> 3.5%
"uchwycić" ==> 3.5%
"zrobić" ==> 3.5%
"realistyczny" ==> 3.5%
"krajobraz" ==> 3.4%
"aparat" ==> 3.2%
"świetło" ==> 2.4%
