In [2]:
import numpy as np
import re
import random
from collections import Counter
from nltk.corpus import stopwords
from nltk import word_tokenize, pos_tag

#Load text file of scraped Elon Musk tweets and split text on \n 
data_path = "elon_tweets.txt"
with open(data_path, 'r', encoding='utf-8') as f:
    lines = f.read().split('\n')

# Preprocess tweets by removing reply tweets (with @) and https:\\ links
# Append preprocessed tweets to a list called elon_tweets
elon_tweets = []
for line in lines:
    if '@' in line:
        pass
    else:
        tweet = re.sub(r'http\S+', '', line)
        elon_tweets.append(tweet.lower())


# Begin creating the ChatBot
class ChatBot:
 
    #Potential negative and exit responses to end program
    negative_responses = ("no", "nope", "nah", "naw", "not a chance", "sorry", 'nothing', 'not')
    exit_commands = ("quit", "pause", "exit", "goodbye", "bye", "later", "stop")
    
    
    # Predtermined conjuction phrases to make ChatBot more 'human'
    topic_change_phrases = ['Good, you were starting to bore me!\n',
                            'Excellent, I have so much more knowledge to share!\n',
                            'Direct and to the point, I love it\n',
                           'Phew! I was running out of things to say...\n',
                           "Let's have a quick time out and reset\n"]
    
    new_topic_phrases = ['What field are you interested in now?\n',
                         'Perfect! What would you like to ask me about?\n',
                         'Love your thirst for knowledge, keep drinking...\n',
                         'I am an open book, what chapter is next?\n',
                        'Ask away, today really is Elon Musk unfiltered...\n',
                        'I am a transparent person, ask me anything']
    
    unknown_phrases = ["I'm inteligent but I don't know everything you know\n",
                       "For once in my life I am speechless...\n",
                       "If I told you that, I would have to kill you\n",
                       "We are working on some incredible things at the moment and I'll get back to you when I know\n",
                       "Unfortunately, some things I can't disclose right now\n",
                      'Woah ok, this topic got a bit personal lets move on']

    
    
    def start_chat(self):
        
        #Greet user and save name
        user_name = input("Elon: Welcome to the future friend, my name is Elon Musk… what's your name?\n\n")
        
        # Ask if user wants to start to chat
        user_response = input(f'\nElon: Greetings {user_name}\nElon: I have technical expertise spanning over many industries along with some controversial opinions. Would you like me to share my infinte knowledge with you?\n\n')
        
        # If response negative, end conversation
        words = word_tokenize(user_response)
        for word in words:
            if word in self.negative_responses:
                print('\nElon: Ok, have a great day!\n')
                break
                
        # Otherwise continue to chat
            else:
                self.chat(user_response)
        
        
        
    def chat(self, reply):
        # If exit command called in reply, end chat
        if self.make_exit(reply) == True:
            pass
        
        # Otherwise determine intented topic
        else:
            self.topic_intent(reply)
        
            
    
    def change_chat(self, reply, topic):
        
        words = word_tokenize(reply)
        
        # End conversation if exit command used
        for word in words:
            if word in self.exit_commands:
                self.chat(reply)
                break
                
            else:
                # If negative, new intended topic determined through .topic_intent()
                for word in words:
                    if word in self.negative_responses:
                        print('\nElon: ' + random.choice(self.topic_change_phrases))
                        self.topic_intent(topic)
                        break
                    
                    # If positive, specialize conversation towards initial intended topic using .deeper_topic_intent()
                    else:
                        return input(self.deeper_topic_intent(topic))     
                    
                    
            
    def topic_intent(self, topic):
        
        # Takes user input as topic_sentence and extracts most popular noun in sentence, saved as topic
        topic_sentence = input('Elon: ' + random.choice(self.new_topic_phrases) + '\n')
        topic = self.extract_pop_noun(elon_tweets, topic_sentence)
        
        #M ore conjunction phrases that use user_input
        deeper_topic_phrases = [f'What more do you want to know about {topic}\n',
                                f'{topic} is a rabbit hole of knowledge, lets explore it!\n',
                               f'What other {topic} related questions do you have?\n',
                               f"Let's continue this {topic} interrogation then...\n",
                               f" I've got some juicy {topic} gossip if you ask me the right questions\n"]
        still_interested_phrases = [f'Am I still entertaining you with {topic}?\n',
                                    f'Would you like to know more about {topic}?\n',
                                    f'I have vast opinions about {topic}, would you like to continue discussing?',
                                   f"Are you as captivated with {topic} as I am?\n",
                                    f"Would you like to continue this {topic} cross examination?\n"]
        
        # Refines tweets to only ones which contain user selected topic
        topic_tweets = []
        for tweet in elon_tweets:
            if topic in tweet:
                topic_tweets.append(tweet)
        
        # If no tweets are found about intended topic, random 'unknown_phrases' printed and .topic_intent() restarted
        if len(topic_tweets) == 0:
            print('\nElon: ' + random.choice(self.unknown_phrases))
            self.topic_intent(topic)
        
        # If tweets are found, one is printed at random and user is asked if they want to continue with intended topic
        # user reply and topic is fed into .change_chat()
        else:
            reply = input('\nElon: ' + random.choice(topic_tweets) + '\n' + 'Elon: ' +  random.choice(still_interested_phrases) + '\n')

        self.change_chat(reply, topic)
            
            
            
    def deeper_topic_intent(self, topic):
        
        # More conjunction phrases that use user_input
        deeper_topic_phrases = [f'What more do you want to know about {topic}\n',
                                f'{topic} is a rabbit hole of knowledge, lets explore it!\n',
                               f'What other {topic} related questions do you have?\n',
                               f"Let's continue this {topic} interrogation then...\n",
                               f" I've got some juicy {topic} gossip if you ask me the right questions\n"]
        still_interested_phrases = [f'Am I still entertaining you with {topic}?\n',
                                    f'Would you like to know more about {topic}?\n',
                                    f'I have vast opinions about {topic}, would you like to continue discussing?',
                                   f"Are you as captivated with {topic} as I am?\n",
                                    f"Would you like to continue this {topic} cross examination?\n"]
        
        # Again user input is taken as topic_sentence and extracts most popular noun in sentence, saved as deep_topic
        deep_topic_sentence = input('\nElon: ' + random.choice(deeper_topic_phrases) + '\n')
        deep_topic = self.extract_pop_noun(elon_tweets, deep_topic_sentence)
        
        # Inital topic tweet subset found again
        topic_tweets = []
        for tweet in elon_tweets:
            if topic in tweet:
                topic_tweets.append(tweet)
        
        # This time any inital topic tweets which also contain the deep_topic are collated
        deep_lst = []
        for tweet in topic_tweets:
            if deep_topic in tweet:
                deep_lst.append(tweet)
        
        # If no tweets are found about both topics, random 'unknown_phrases' printed
        if len(deep_lst) == 0:
            reply = input('\nElon: ' + random.choice(self.unknown_phrases) + 'Elon: ' + random.choice(still_interested_phrases)+ '\n')
        
        #Otherwise random tweet is printed
        else:
            reply = input('\nElon: ' + random.choice(deep_lst) + '\n' + 'Elon: ' + random.choice(still_interested_phrases) + '\n\n')
        
        # Both paths lead to user being asked if they would like to continue
        # Reply and inital topic fed back to .change_chat()
        self.change_chat(reply, topic)
        
    
    
    def extract_pop_noun(self, tweets, topic_sentence):
        #Function takes a user sentence and extracts only the most popular noun to use as intended topic
        
        #All english stopwords such as 'the', 'and', 'in' ect.
        stops = stopwords.words('english')
        
        # All words in sentence are POS tagged and only nouns are extracted
        nouns = [token for token, pos in pos_tag(word_tokenize(topic_sentence)) if pos.startswith('N')]
        
        #All tweets tokenized into singular words and added to word list
        word_lst = []
        for tweet in tweets:
            words = tweet.split()
            for word in words:
                word_lst.append(word)
        
        # Counter function creates dictionary with keys as words and values as number of times word has appeared
        count_dict = Counter(word_lst)
        for stopword in stops:
            count_dict.pop(stopword, None)
        
        # If there is more than one noun in user sentence, key with greatest value in dictionary extracted
        # Saved and returned as pop_noun
        if len(nouns) > 1:
            noun_max = 0
            for noun in nouns:
                if count_dict[noun] > noun_max:
                    noun_max = count_dict[noun]

            for word, count in count_dict.items(): 
                if count == noun_max and word in nouns:
                    pop_noun = word

            return pop_noun
        
        # If there are no nouns in sentence, function returns gibberish string to force an unknown_phrase path
        elif len(nouns) == 0:
            return 'poiuytrewq'
        
        #If only one, that one is chosen
        else:
            pop_noun = nouns[0]
            return pop_noun
        
        
        
    def make_exit(self, reply):
        
        # If user reply contains an exit command, goodbye message printed and script ends
        for exit_command in self.exit_commands:
            if exit_command in reply:
                print('\nI survived your grilling! Have a great day now')
                return True
        
        # If not script continues
        return False


#Chatbot initialized and function called to begin script
elon_bot = ChatBot()
elon_bot.start_chat()