In [59]:
# use natural language toolkit
import nltk
from nltk.stem.lancaster import LancasterStemmer
import os
import json
import datetime
stemmer = LancasterStemmer()

In [60]:
# 3 classes of training data
training_data = []
training_data.append({"class":'Advantages',"sentence":'Because tasks can be completed within the browser without communicating with the server, JavaScript can create a smooth “desktop-like” experience for the end user'})
training_data.append({"class":'Advantages',"sentence":'From drag-and-drop blocks to stylized sliders, there are numerous ways that JavaScript can be used to enhance a website’s UI/UX.'})
training_data.append({"class":'Advantages',"sentence":'TObjects can inherit from other objects, which makes JavaScript so simple, powerful, and great for dynamic applications.'})
training_data.append({"class":'Advantages',"sentence":'javascript has the ability to inherit parent class behaviour to child class'})

training_data.append({"class":'Disadvantages',"sentence":' Because the code executes on the users’ computer, in some cases it can be exploited for malicious purposes. This is one reason some people choose to disable Javascript'})
training_data.append({"class":'Disadvantages',"sentence":'JavaScript is sometimes interpreted differently by different browsers. Whereas server-side scripts will always produce the same output, client-side scripts can be a little unpredictable. Don’t be overly concerned by this though - as long as you test your script in all the major browsers you should be safe. Also, there are services out there that will allow you to test your code automatically on check in of an update to make sure all browsers support your code.'})

#training_data.append({"class":'Differences',"sentence":' A constructor function instantiates an instance via the “new” keyword. This new instance inherits properties from a parent class.'})
#training_data.append({"class":'Differences',"sentence":' An instance is created by cloning an existing object that serves as a prototype. This instance—often instantiated using a factory function or “Object.create()”—can benefit from selective inheritance from many different objects'})
print ("%s sentences in training data" % len(training_data))

6 sentences in training data


In [62]:
# capture unique stemmed words in the training corpus
class_words={}
corpus_words = {}
classes = list(set([a['class'] for a in training_data]))
for c in classes:
    class_words[c] = []
    
for data in training_data:
    # tokenize each sentence into words
    for word in nltk.word_tokenize(data['sentence']):
        # ignore a few things
        if word not in ["?", "'s"]:
            # stem and lowercase each word
            stemmed_word = stemmer.stem(word.lower())
            if stemmed_word not in corpus_words:
                corpus_words[stemmed_word] = 1
            else:
                corpus_words[stemmed_word] += 1
                
            class_words[data['class']].extend([stemmed_word])

# we now have each word and the number of occurances of the word in our training corpus (the word's commonality)
print ("Corpus words and counts: %s" % corpus_words)
# also we have all words in each class
print ("Class words: %s" % class_words)

Corpus words and counts: {'becaus': 2, 'task': 1, 'can': 6, 'be': 6, 'complet': 1, 'within': 1, 'the': 8, 'brows': 4, 'without': 1, 'commun': 1, 'with': 1, 'serv': 2, ',': 8, 'javascrib': 6, 'cre': 1, 'a': 3, 'smoo': 1, '“': 1, 'desktop-like': 1, '”': 1, 'expery': 1, 'for': 3, 'end': 1, 'us': 3, 'from': 2, 'drag-and-drop': 1, 'block': 1, 'to': 7, 'styl': 1, 'slid': 1, 'ther': 3, 'ar': 2, 'num': 1, 'way': 1, 'that': 2, 'enh': 1, 'websit': 1, '’': 3, 's': 1, 'ui/ux': 1, '.': 7, 'tobject': 1, 'inherit': 2, 'oth': 1, 'object': 1, 'which': 1, 'mak': 2, 'so': 1, 'simpl': 1, 'pow': 1, 'and': 1, 'gre': 1, 'dynam': 1, 'apply': 1, 'has': 1, 'abl': 1, 'par': 1, 'class': 2, 'behavio': 1, 'child': 1, 'cod': 3, 'execut': 1, 'on': 3, 'comput': 1, 'in': 3, 'som': 2, 'cas': 1, 'it': 1, 'exploit': 1, 'malicy': 1, 'purpos': 1, 'thi': 2, 'is': 2, 'reason': 1, 'peopl': 1, 'choos': 1, 'dis': 1, 'sometim': 1, 'interpret': 1, 'diff': 2, 'by': 2, 'wherea': 1, 'server-side': 1, 'scripts': 2, 'wil': 2, 'alway': 

In [63]:
# we can now calculate the Naive Bayes score for a new sentence
sentence = "javascript is server-side programming language"

# calculate a score for a given class
def calculate_class_score(sentence, class_name):
    score = 0
    for word in nltk.word_tokenize(sentence):
        if word in class_words[class_name]:
            score += 1
    return score

In [64]:
# now we can find the class with the highest score
for c in class_words.keys():
    print ("Class: %s  Score: %s" % (c, calculate_class_score(sentence, c)))

Class: Disadvantages  Score: 2
Class: Advantages  Score: 0


In [65]:
# calculate a score for a given class taking into account word commonality
def calculate_class_score_commonality(sentence, class_name):
    score = 0
    for word in nltk.word_tokenize(sentence):
        if word in class_words[class_name]:
            score += (1 / corpus_words[word])
    return score

In [66]:
# now we can find the class with the highest score
for c in class_words.keys():
    print ("Class: %s  Score: %s" % (c, calculate_class_score_commonality(sentence, c)))

Class: Disadvantages  Score: 1.5
Class: Advantages  Score: 0


In [67]:
def find_class(sentence):
    high_class = None
    high_score = 0
    for c in class_words.keys():
        score = calculate_class_score_commonality(sentence, c)
        if score > high_score:
            high_class = c
            high_score = score
    return high_class, high_score

In [68]:
find_class("javascript inherits properties from parent class")

('Advantages', 1.0)