In [1]:
import nltk
import numpy as np
import random
import string

#### Reading the Video Game Dataset and Tokenizing it using NLTK

In [2]:
f = open('videogame.txt', 'r', errors = 'ignore')
file = f.read()
file.lower()

sent_token = nltk.sent_tokenize(file)
word_token = nltk.word_tokenize(file)

In [3]:
print("Sentence Tokenization: \n", sent_token[1:2],"\n")
print("Word Tokenization: \n", word_token[1:10])

Sentence Tokenization: 
 ['Since the 1980s, video games have become an increasingly important part of the entertainment industry, and whether they are also a form of art is a matter of dispute.'] 

Word Tokenization: 
 ['video', 'game', 'is', 'an', 'electronic', 'game', 'that', 'involves', 'interaction']


#### Processing the data using NLTK pre-trained Punkt tokenizer
Punkt Tokenizer includes:
1. WordList and Stopwords list for removing noise
2. Stemming and Lemmatization capabilities

In [4]:
lemmer = nltk.stem.WordNetLemmatizer()

def LemToken(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

rem_punc = dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
    return LemToken(nltk.word_tokenize(text.lower().translate(rem_punc)))

#### Setting custom keywords for keyword matching

In [5]:
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up","hey", "how u doin", "heya", "yo")
GREETING_RESPONSES = ["hi", "hey", "heylo", "hi there", "hello", "hi whats up dude", "Hi nice talking to you"]
def greeting(sentence):
 
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

#### Generating Response
Using the concept of document similarity our chatbot creates responses.
Scikit Learn offers tools for this. These are:
1. TFidf Vectorizer : (Term Frequency - inverse document frequency) works by generating the frequency of words and converting them in vector form.
```
["Video", "Games", "is", "are", "worst, "great"] -> Video games are great
                                TFidf Vectorizer Result : (1,1,0,1,0,1)
```
2. Cosine Similarity: It is the measure of similarity between two non zero vectors. Calculated using the formula
```
Cosine Similarity(d1,d2) = Dot Product(d1,d2)/|d1|*|d2|
```

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
def response(user_response):
    bot_response=''
    sent_token.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_token)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx=vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf==0):
        bot_response+= "I am sorry! I don't understand you"
        return bot_response
    else:
        bot_response += sent_token[idx]
        return bot_response

#### Chatbot Response Window

In [8]:
flag = True
print("Bot: My name is GameBot. Made by Rajmeet I am trained to answer your queries about gaming. \nif you want to exit, just type 'bye'")

while(flag==True):
    user_response = input(">")
    user_response=user_response.lower()
    if(user_response!='bye'):
        if(user_response=='thanks' or user_response=='thank you'):
            flag=False
            print("BOT: You are welcome")
        if user_response == "what are video games":
            for i in sent_token[1:2]:
                print(i, end = "\n")
        else:
            if(greeting(user_response)!=None):
                print("BOT: "+greeting(user_response))
            else:
                print("BOT: ",end="")
                print(response(user_response))
                sent_token.remove(user_response)
    else:
        flag=False
        print("BOT: Bye! take care")

Bot: My name is GameBot. Made by Rajmeet I am trained to answer your queries about gaming. 
if you want to exit, just type 'bye'
>hi
BOT: hello
>what are video games
Since the 1980s, video games have become an increasingly important part of the entertainment industry, and whether they are also a form of art is a matter of dispute.
>what are multiplayer games
BOT: 

  'stop_words.' % sorted(inconsistent))


Multiplayer
Main article: Multiplayer video game
Multiplayer video games are those that can be played either competitively, sometimes in Electronic Sports, or cooperatively by using either multiple input devices, or by hotseating.
>tell me about a console
BOT: Home console
A white Xbox 360 console and controller.
>dajsuidjsai
BOT: I am sorry! I don't understand you
>bye
BOT: Bye! take care
