In [1]:
#Importing the necessary libraries
##io module allows us to manage the file-related input and output operations, while the random module enables access
## to functions that support many python operations, strings helps to process standard python strings.
##The numpy library helps us in working with arrays.
##The scikit-learn TfidfVectorizer helps convert a collection of raw documents to a matrix of TF-IDF(Term Frequency Inverse Document Frequency) features
##it computes the word counts, IDF values, and Tf-idf scores all using the same dataset.
##The scikit-learn cosine similarity helps to measure similarity between documents 

import io
import random
import string # to process standard python strings
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

In [2]:
##The NLTK helps to work with human language data
##Wordnet is an large, freely and publicly available lexical database for the English language aiming,
##This is to establish structured semantic relationships between words.

import nltk
from nltk.stem import WordNetLemmatizer
nltk.download('popular', quiet=True)

True

In [3]:
#This is the text file containing the collection of words relating to the ideas behind machine learning models 
##And some key algorithms used for each
##The raw represents unmodified data
###The raw lower converts to lower case

ML_model=open('C:/Users/USER/Documents/List.txt','r',errors = 'ignore')
raw=ML_model.read()
raw = raw.lower()

In [4]:
#Data Cleaning and Pre-processing

##Tokenization is used to describe the process of converting the normal text strings into a list of tokens
##Lemmatization is the process of obtaining the actual words
##The sent token helps convert to list of sentences
### The word token helps convert to list of words

sent_tokens = nltk.sent_tokenize(raw) 
word_tokens = nltk.word_tokenize(raw)

In [5]:
#We then define a function(LemTokens) which will input the tokens and return normalized tokens.

lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

In [6]:
##The Keywords are then matched, defining a function which aids greetings  

Inputs = ("hello", "hi", "greetings", "How far", "what's up","hey",)
Responses = ["hi", "hey", "*wave*", "I am great! How can I help you", "hello", "I am glad! You are talking to me"]
def greeting(sentence):
 
    for word in sentence.split():
        if word.lower() in Inputs:
            return random.choice(Responses)

In [7]:
#Define a function that searches for words from the ML_model groups

def response(user_response):
    Alexia_response=''
    sent_tokens.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx=vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf==0):
        Alexia_response=Alexia_response+"I am sorry! I don't understand what you mean"
        return Alexia_response
    else:
        Alexia_response = Alexia_response+sent_tokens[idx]
        return Alexia_response

In [8]:
#Alexia's interaction based on user inputs

flag=True
print("Alexia: My name is Alexia. I will answer your queries about Machine learning models. If you want to exit, type Bye!")
while(flag==True):
    user_response = input()
    user_response=user_response.lower()
    if(user_response!='bye'):
        if(user_response=='thanks' or user_response=='thank you' ):
            flag=False
            print("Alexia: You are welcome..")
        else:
            if(greeting(user_response)!=None):
                print("Alexia: "+greeting(user_response))
            else:
                print("Alexia: ",end="")
                print(response(user_response))
                sent_tokens.remove(user_response)
    else:
        flag=False
        print("Alexia: Bye! take care of yourself..")

Alexia: My name is Alexia. I will answer your queries about Machine learning models. If you want to exit, type Bye!
hello
Alexia: hello
hey
Alexia: I am glad! You are talking to me
What is Supervised learning
Alexia: supervised learning
supervised learning is the simplest of the learning models to understand.
What is Neural networks
Alexia: you can learn more about neural networks and back-propagation in "a neural networks deep dive."
Q-learning
Alexia: q-learning
q-learning is one approach to reinforcement learning that incorporates q values for each state–action pair that indicate the reward to following a given state path.
Decision trees
Alexia: decision trees
a decision tree is a supervised learning method for classification.
K-means clustering
Alexia: k-means clustering
k-means clustering is a simple and popular clustering algorithm that originated in signal processing.
Bye
Alexia: Bye! take care of yourself..
