In [34]:
from flask import Flask, render_template, request
app = Flask(__name__)
from nltk.corpus import wordnet
import pymysql
import configparser
from stanfordcorenlp import StanfordCoreNLP
import gensim
import json

config = configparser.ConfigParser()
config.read("config.txt")
word2vec_path = config.get("configuration","word2vec_path")
stanford_corenlp_path = config.get("configuration","stanford_corenlp_path")
print(stanford_corenlp_path)


pymysql.install_as_MySQLdb()
# Connect to DB
mydbhost = config.get("configuration","mydbhost")
mydbuser = config.get("configuration","mydbuser")
mydbpasswd = config.get("configuration","mydbpasswd")
mydbdb = config.get("configuration","mydbdb")

# Importing word2vec to find similarity and neighboring words
model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_path, binary=True, limit=500000) 
print('Word 2 Vec model ready')

/Users/shrey/AnacondaProjects/stanford-corenlp-full-2018-02-27
Word 2 Vec model ready


In [35]:
def sentiments(word, nlp):
    """ This function calls Stanford Core Nlp's sentiment annotator
    
        Args:
            word (String): The input word for setiment detection
            nlp (StanfordCoreNLP): Core nlp instance for sentiment detection
        Returns:
            String : Sentiment value: Very negative; Negative; Neutral; 
            postive; Very positive
    """
    try:
        sjson = nlp.annotate(word,properties={'annotators': 'sentiment','outputFormat': 'json','timeout': 1000})
        res = json.loads(sjson)
        sval = res["sentences"][0]["sentiment"]
        return(sval)
    except json.decoder.JSONDecodeError as e:
        print (e)

In [36]:
def ners(sentence, nlp):
    sentence_ner = nlp.ner(sentence + ' dummy')
    to_replace_ners = []
    org = ''
    kind = ''
    for (i, j) in sentence_ner: #i = NCSU, j = ORGANISATION; i = bent, j = 0
        if(j == 'O'):
            if org != '': 
                to_replace_ners.append((org[1:], kind))
                org = ''
            else:
                pass
        else:
            org = org + "_" + i
            kind = j
    #print (sentence_ner)   
    return to_replace_ners

#ners("Barrack Obama went to the Harvard University", nlp)

In [37]:
def pos_tags(statement, nlp):
    
    sentence_tags = nlp.pos_tag(statement)
    
    to_replace_verbs = []
    to_replace_verbphrases = []
    to_replace_adjectives = []
    to_replace_adjphrases = []
    to_replace_nouns = []
    to_replace_nounphrases = []
    
    verb_check = 0
    noun_check = 0
    adj_check = 0

    for (i, j) in sentence_tags: # Here verb checks are activated after a verb is discovered, so pair of verbs as verbphrases? Check Logic.
        
        
      if(verb_check == 1):
        verbphrase = verb + '_' + i
        to_replace_verbphrases.append((verbphrase, i))
        verb_check = 0 

      if(noun_check == 1):
        nounphrase = noun + '_' + i
        to_replace_nounphrases.append((nounphrase, i))
        noun_check = 0 

      if(adj_check == 1):
        adjphrase = adj + '_' + i
        to_replace_adjphrases.append((adjphrase, i))
        adj_check = 0 

      if(j == 'VBD' or j=='VBZ' or j == 'VBP' or j == 'VBN' or j == 'VBG'):
        to_replace_verbs.append(i)
        verb = i
        verb_check = 1

      if(j == 'NN' or j=='NNS' or j == 'NNP' or j == 'NNPS'):
        to_replace_nouns.append(i)
        noun = i
        noun_check = 1

      if(j == 'JJ'):
        to_replace_adjectives.append(i)
        adj = i
        adj_check = 1
        
    return (to_replace_verbs, 
            to_replace_verbphrases,
            to_replace_adjectives,
            to_replace_adjphrases,
            to_replace_nouns,
            to_replace_nounphrases)

#pos_tags("Barrack Obama went to the Harvard University", nlp)

In [38]:
def w2v_similar(to_replace_ners, to_replace_verbs, to_replace_verbphrases,
    to_replace_adjectives, to_replace_adjphrases,
    to_replace_nouns, to_replace_nounphrases,  model):
    
    ## These are using W2V, currently used.
    topk = 10
    replacement_ners = []
    replacement_verbs = []
    replacement_verbphrases = []
    replacement_nouns = []
    replacement_nounphrases = []
    replacement_adjectives = []
    replacement_adjphrases = []
    
    for (i, j) in to_replace_ners:
        try:
            similar_ners = model.most_similar([i, j.lower()], [], topk)
            senti_ners = []
            for (similar_ner, score) in similar_ners:
                senti_ners.append([similar_ner, score, sentiments(similar_ner, nlp)])
            replacement_ners.append((i, senti_ners))
        except KeyError as e:
            print(e)
        
        ## Use W2V for replacing nouns, verbs, adj verbphrases, nounphrases, and adj phrases
    
    for verb in to_replace_verbs:
        try: 
            similar_verbs = model.most_similar(verb, [], topk)
            senti_verbs = [[i, j, sentiments(i, nlp)] for (i, j) in similar_verbs]
            replacement_verbs.append((verb, senti_verbs))
        except KeyError as e:
            print(e)
            
    for (verbphrase, nn) in to_replace_verbphrases:
        try:
            similar_verbphrases = model.most_similar([verbphrase, nn], [], topk)
            replacement_verbphrases.append((verbphrase, similar_verbphrases))
        except KeyError as e:
            print(e)
    
 
    for noun in to_replace_nouns:
        try:
            similar_nouns = model.most_similar(noun, [], topk)
            senti_nouns = [[i, j, sentiments(i, nlp)] for (i, j) in similar_nouns]
            replacement_nouns.append((noun, senti_nouns))
        except KeyError as e:
            print(e)
    
    
    
    for (nounphrase, nn) in to_replace_nounphrases:
        try:
            similar_nounphrases = model.most_similar([nounphrase, nn], [], topk)
            replacement_nounphrases.append((nounphrase, similar_nounphrases))
        except KeyError as e:
            print(e)
    
    
    
    for adjective in to_replace_adjectives: 
        try: 
            similar_adjectives = model.most_similar(adjective, [], topk)
            senti_adjectives = [[i, j, sentiments(i, nlp)] for (i, j) in similar_adjectives]
            replacement_adjectives.append((adjective, senti_adjectives))
        except KeyError as e:
            print(e)
        
    
    
    for (adjphrase, nn) in to_replace_adjphrases:
        try:
            similar_adjphrases = model.most_similar([adjphrase, nn], [], topk)
            replacement_adjphrases.append((adjphrase, similar_adjphrases))
        except KeyError as e:
            print(e)
    
#     print("Alternative NERs" + str(replacement_ners))
#     print("Alternative Verbs" +  str(replacement_verbs))
#     print("Alternative Verb Phrases" + str(replacement_verbphrases))
#     print("Alternative Noun" + str(replacement_nouns))
#     print("Alternative Noun Phrases" + str(replacement_nounphrases))
#     print("Alternative Adjectives" + str(replacement_adjectives))
#     print("Alternative Adjective Phrases" + str(replacement_adjphrases))
    
    
    return (replacement_ners,
            replacement_verbs,
            replacement_verbphrases,
            replacement_nouns,
            replacement_nounphrases,
            replacement_adjectives,
            replacement_adjphrases)

# statement = "Barrack Obama went to the Harvard University" 
# to_replace_verbs, to_replace_verbphrases, to_replace_adjectives, to_replace_adjphrases, to_replace_nouns, to_replace_nounphrases = pos_tags(
#     statement, nlp)
# w2v_similar(ners(statement, nlp), to_replace_verbs, 
#             to_replace_verbphrases,
#             to_replace_adjectives,
#             to_replace_adjphrases,
#             to_replace_nouns,
#             to_replace_nounphrases, model)

In [39]:
## Connect to Wikipedia Database of instance and class. 
## For each ner, find similar ners using W2V and append to replacement_ners (earlier version)
## For each similar ner, find it's category from simple types Wikipedia Database
def wiki_ontology(to_replace_ners):
    #print(to_replace_ners)
    ner_categories = []
    
    for ner in to_replace_ners:
        replace = []
        connection = pymysql.connect(host=mydbhost, user=mydbuser, passwd=mydbpasswd, db=mydbdb)
        ne = ner[0]
        print(ne)
        try:
            with connection.cursor() as cursor:
            # Execute SQL select statement
                query = "SELECT instance, class FROM simple_types where instance like CONCAT('%', '"+ ne + "' ,'%')"
                cursor.execute(query)
                # Get the number of rows in the resultset
                numrows = cursor.rowcount
                # Get and display one row at a time
                rep = []
                for x in range(0, 10): #top 10 results from wikicat
                    row = cursor.fetchone()
                    if row:
                        supernym = row[1][9:-1].replace('_', ' ')
                        rep.append([supernym, 0, 0])
                if rep:
                    ner_categories.append((ne.replace('_', ' '), rep))
        finally:
          # Close connection.
          connection.close()
                    
    return (ner_categories)

# to_replace_nlp = ners("Obama went to the Harvard University", nlp)
# print(to_replace_nlp)
# wiki_ontology(to_replace_nlp)

In [40]:
@app.route("/")
def main():
    print('Main Page opened')
    return render_template('index.html')

In [41]:
@app.route('/explore',methods=['POST'])
def explore():   
    # importing StandfordCoreNLP to tokenize, tag, and ner
    # Tree syntax of natural language: http://www.cs.cornell.edu/courses/cs474/2004fa/lec1.pdf


    # print("In Explore")
    # print("StanfordCoreNLP path:", stanford_corenlp_path)
    

    # read the posted values from the UI
    statement = ''
    print('explore')
    if request.form.get('inputStatement', None):
        statement = request.form['inputStatement']
        print(statement)
    
    
    statement_sentiment = "Sentiment of the query is: " + sentiments(statement, nlp)
    to_replace_ners = ners(statement, nlp)
    print(statement_sentiment)
    #sentence_tokens = nlp.word_tokenize(statement)
    # sentence_parse = nlp.parse(statement)
    # sentence_dependency = nlp.dependency_parse(statement)

    to_replace_verbs, to_replace_verbphrases, to_replace_adjectives, to_replace_adjphrases, to_replace_nouns, to_replace_nounphrases = pos_tags(statement, nlp)
    
    replacement_ners, replacement_verbs, replacement_verbphrases, replacement_nouns, replacement_nounphrases, replacement_adjectives, replacement_adjphrases = w2v_similar(
        to_replace_ners, to_replace_verbs, 
        to_replace_verbphrases, to_replace_adjectives, to_replace_adjphrases,
        to_replace_nouns, to_replace_nounphrases, model)
    
    replacement_ners.extend(wiki_ontology(to_replace_ners))
    
    print("Analysis complete")
    return render_template ('explore.html', 
                           statement = statement,
                           statement_sentiment = statement_sentiment,
                           replacement_ners = replacement_ners,
                           replacement_verbs = replacement_verbs,
                           replacement_verbphrases = replacement_verbphrases,
                           replacement_nouns = replacement_nouns,
                           replacement_nounphrases = replacement_nounphrases,
                           replacement_adjectives = replacement_adjectives,
                           replacement_adjphrases = replacement_adjphrases,
                           )
    #return redirect(url_for('main', statement = _statement))
 
    # validate the received values
    # if _statement:
        # return json.dumps({'html':'<span>All fields good !!</span>'})
    # else:
        # return json.dumps({'html':'<span>Enter the required fields</span>'})

In [42]:
@app.route('/generate',methods=['POST'])
def generate():
    print("Generating alternatives")
    statement = request.form['stmnt']
    alt = []
    alt_stmnts = []
    for i in request.form:
        if i != "stmnt": 
            opt = request.form.getlist(i)
            for j in opt:
                if i != j:
                    alt.append((i, j))
            
#        statement = statement.replace(i, request.form[i])
#    print (statement)
    from itertools import chain, combinations
    def all_subsets(ss):
        return chain(*map(lambda x: combinations(ss, x), range(0, len(ss)+1)))
    
    asubs = set(all_subsets(alt))
    print(len(asubs)) 
    for subset in asubs:
        nustat = statement
        for i in subset:
            nustat = nustat.replace(i[0],i[1])
        alt_stmnts.append( (nustat, sentiments(nustat, nlp)) ) 
    
    return render_template ('generate.html', 
                       statements = set(alt_stmnts))   

In [None]:
nlp = StanfordCoreNLP(stanford_corenlp_path)
print('Core NLP instance')
if __name__ == "__main__":
    from werkzeug.serving import run_simple
    app.debug = True
    run_simple('localhost', 2000, app, use_debugger=True)

Core NLP instance
explore
Russian Hackers sent phishing emails to North Carolina State University servers
Sentiment of the query is: Negative
"word 'North_Carolina_State_University' not in vocabulary"
"word 'sent_phishing' not in vocabulary"
"word 'Hackers_sent' not in vocabulary"
"word 'emails_to' not in vocabulary"
"word 'Carolina_State' not in vocabulary"
"word 'State_University' not in vocabulary"
"word 'University_servers' not in vocabulary"
"word 'Russian_Hackers' not in vocabulary"
Russian
North_Carolina_State_University
Analysis complete
Generating alternatives
2
explore
Russian Hackers sent phishing emails to Nuclear research reactors servers 
Expecting value: line 1 column 1 (char 0)


Traceback (most recent call last):
  File "/Users/shrey/anaconda3/lib/python3.6/site-packages/flask/app.py", line 1997, in __call__
    return self.wsgi_app(environ, start_response)
  File "/Users/shrey/anaconda3/lib/python3.6/site-packages/flask/app.py", line 1985, in wsgi_app
    response = self.handle_exception(e)
  File "/Users/shrey/anaconda3/lib/python3.6/site-packages/flask/app.py", line 1540, in handle_exception
    reraise(exc_type, exc_value, tb)
  File "/Users/shrey/anaconda3/lib/python3.6/site-packages/flask/_compat.py", line 33, in reraise
    raise value
  File "/Users/shrey/anaconda3/lib/python3.6/site-packages/flask/app.py", line 1982, in wsgi_app
    response = self.full_dispatch_request()
  File "/Users/shrey/anaconda3/lib/python3.6/site-packages/flask/app.py", line 1614, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/Users/shrey/anaconda3/lib/python3.6/site-packages/flask/app.py", line 1517, in handle_user_exception
    reraise(exc_type, exc

explore
Russian Hackers sent phishing emails to Nuclear research reactors servers 
Sentiment of the query is: Negative
"word 'sent_phishing' not in vocabulary"
"word 'Hackers_sent' not in vocabulary"
"word 'emails_to' not in vocabulary"
"word 'research_reactors' not in vocabulary"
"word 'reactors_servers' not in vocabulary"
"word 'Russian_Hackers' not in vocabulary"
"word 'Nuclear_research' not in vocabulary"
Russian
Analysis complete


In [None]:
## Experiment with wordnet for replacing verbs, nouns, and adjectives
## Not yet used.
#    replacement_verbs_synonyms = []
#    replacement_verbs_antonyms = []
#    replacement_adjectives_synonyms = []
#    replacement_adjectives_antonyms = []
#    replacement_nouns_synonyms = []
#    replacement_nouns_antonyms = [] 
#    for verb in to_replace_verbs:
#        for syn in wordnet.synsets(verb): 
#            for l in syn.lemmas():
#                replacement_verbs_synonyms.append(l.name())
#                if l.antonyms():
#                    #print('L.Antonyms:', l.antonyms())
#				    #replacement_verbs_antonyms.append(l.antonyms()[0].name())
#                    for m in l.antonyms():
#                        replacement_verbs_antonyms.append(m.name())
#
#    print('Replacement Verb Synonyms', set(replacement_verbs_synonyms))
#    print('Replacement Verb Antonyms', set(replacement_verbs_antonyms))
#
#	
#    for noun in to_replace_nouns:
#        for syn in wordnet.synsets(noun): 
#            for l in syn.lemmas():
#                replacement_nouns_synonyms.append(l.name())
#                if l.antonyms():
#                    #print('L.Antonyms:', l.antonyms())
#                    #replacement_nouns_antonyms.append(l.antonyms()[0].name())
#                    for m in l.antonyms():
#                        replacement_nouns_antonyms.append(m.name())
#					
#    print('Replacement Noun Synonyms', set(replacement_nouns_synonyms))
#    print('Replacement Noun Antonyms', set(replacement_nouns_antonyms))		
#
#	
#    for adjective in to_replace_adjectives:
#        for syn in wordnet.synsets(adjective): 
#            for l in syn.lemmas():
#                replacement_adjectives_synonyms.append(l.name())
#                if l.antonyms():
#                    #print('L.Antonyms:', l.antonyms())
#                    #replacement_adjectives_antonyms.append(l.antonyms()[0].name())
#                    for m in l.antonyms():
#                        replacement_adjectives_antonyms.append(m.name(
#					
#    print('Replacement Adj Synonyms', set(replacement_adjectives_synonyms))
#    print('Replacement Adj Antonyms', set(replacement_adjectives_antonyms))	

#    nlp.close()

#Examples: Russian Hackers sent phishing emails to North Carolina State University servers
#Examples:  Through a HTTP message sent to host, the malware begins to proxy TCP connections