# Ner

#### Imports

In [1]:
from functional import seq, pseq
from functional.streams import Sequence
from IPython.core.display import HTML

def display_seq(sequence:Sequence,rows:int)-> None:
        sequence._repr_html_= lambda :sequence.tabulate(rows,tablefmt='html')
        display(sequence)
        sequence._repr_html_= lambda :sequence.tabulate(10,tablefmt='html')

In [2]:
#! python -m spacy download en_core_web_lg --user
#! python -m spacy download en_core_web_sm --user

In [3]:
# Then, we're going to use the term "program" to find synsets like so: 
from nltk import wordnet
def get_sysn(word):
    syns = wordnet.wordnet.synsets(word) 
    return seq(syns).flat_map(lambda x: x.lemmas()).map(lambda x: x.name()).to_set()

for wo in ["asap", "important", "quick"]:
    print(wo)
    print(get_sysn(wo))

asap
{'ASAP'}
important
{'crucial', 'significant', 'of_import', 'important', 'authoritative'}
quick
{'agile', 'straightaway', 'promptly', 'spry', 'fast', 'nimble', 'immediate', 'prompt', 'quick', 'ready', 'speedy', 'quickly', 'flying', 'warm'}


In [4]:
from itertools import chain
from spacy.lang.en import English
from typing import List, Optional
from functional import seq
from dateparser import parse as parse_date
# import nltk,spacy
import en_core_web_lg
import re
from spacy.tokens.doc import Doc
nlp = en_core_web_lg.load()

sen_nlp = English()
sen_nlp.max_length = 6 * 1000 * 1000 * 1000
sentencizer = sen_nlp.create_pipe("sentencizer")
sen_nlp.add_pipe(sentencizer)

In [5]:
application_parts = ["user", "admin", "mobile"]

app_suffixes = ["panel", "application", "website", "page", "board", "site"]

In [6]:

samples = {
   """On yesterday at 5 pm i couldn't complete the transaction.  It was on user website. I expected transaction to look as always, but i saw message "something went wrong".  This issue is very important to us."""  :
    {
        "time" : "10 jan at 17:00",
        "exptected": "then transaction to look as always",
        "part of app": "user website",
        "actual": """saw message "something went wrontg" """,
        "importance" : "high"
    },
   """I write to report an issue. Trading Panel stopped working Today at 14:00. User "kazik" was affected. The page wouldnt load, i was a little upset. Please fix it when you can. """:
    {
        "time" : "11 jan at 14:00",
        "exptected": "",
        "actual": """the page wouldn't load""",
        "importance" : "low"
    },
    """
   Hello!
Yesterday I had some problems on the admin panel. I tried to create a new user.
The app should create the user an return a success message but nothing happened. The "create" button was completely unresponsive.
Can you fix it ASAP? It's very important. Thank you in advance. 
    """: {
        "time" : "10 jan at 14:00",
        "exptected": "create the user and return success message",
        "actual": """nothing happened""",
        "importance" : "high"
    } ,
    """
    3 days ago I was looking at user page. I expected it to work as always and then it was covered in red errors.
    """ :
    {"time": "9 jan"}
    
}

In [7]:

splitters = [
    "but",
    ", then",
    "and then",
] + ["but {} ".format(suf) for suf in ["I", "she", "he", "it", "we", "they"]]

split_reg = re.compile("({})".format("|".join(splitters)))
def sent_tokenize(data: str):
    # TODO - include but (I|it|he|she)
    return seq(sen_nlp(data).sents)\
        .flat_map(lambda sen: split_reg.split(sen.__repr__()))\
        .filter(lambda tok: tok not in splitters)\
        .map(nlp)\
        .to_list()




prec_pron = "-pron-"
prec_suf = [
    "be in",
    "enter",
    "be look"]
prec_phrases = [prec_pron + " " + suf for suf in prec_suf]


def extract_precondition(nlp_sents: List[Doc]) -> List[str]:
    res = []
    for doc in nlp_sents:
        lemmatized = " ".join(seq(doc).map(lambda x: x.lemma_).to_list()).lower()
        if seq(prec_phrases).exists(lambda phrase: phrase in lemmatized):
            res.append(doc.__repr__())
    return list(set(res))


def extract_priority(nlp_sents: List[Doc]) -> List[str]:
    
    #urg = get_urgency([s.__repr__() for s in nlp_sents])
    return [] 


app_areas = ["user", "admin", "mobile"]
app_suffixes = ["panel", "application", "website", "page", "board", "site"]
app_area_phrases = seq(app_areas).flat_map(lambda area: seq(app_suffixes).map(lambda suffix: "{} {}".format(area, suffix))).to_list()


def extract_part_of_app(nlp_sents: List[Doc]) -> List[str]:
    res = []
    for doc in nlp_sents:
        lemmatized = " ".join(seq(doc).map(lambda x: x.lemma_).to_list()).lower()
        p = seq(app_area_phrases).filter(lambda phrase: phrase in lemmatized).to_list()
        res = res + p
    return list(set(res))


def extract_expected(nlp_sents: List[Doc]) -> List[str]:
    res = []
    words = ["expect", "suppose", "supposed", "thought", "should", "ought"]
    for doc in nlp_sents:
        if seq(doc).exists(lambda tok: tok.lemma_ in words):
            res.append(doc.__repr__())
    return res


def extract_date(nlp_sents: Doc) -> List[str]:
    dates = []
    times = []

    for doc in nlp_sents:

        for tok in doc.ents:
            if (tok.label_ == 'DATE'):
                dates.append(tok.__repr__())
            elif (tok.label_ == 'TIME'):
                times.append(tok.__repr__())

    dates_set = seq(dates).map(date_to_canonical_or_stay).to_set()

    if len(dates_set) == 1 and len(times) > 0:
        return [datetime_to_canon_or_stay(dates[0] + " " + times[0])]
    else:
        return list(dates_set)


did_not_regex = re.compile("do not (work|run|complete)")


def extract_actual(nlp_sents: Doc):
    res = []
    words = ["fail", "error", "break", "wrong"]
    for doc in nlp_sents:
        lemmatized = " ".join(seq(doc).map(lambda x: x.lemma_).to_list())

        if did_not_regex.match(lemmatized) is not None or seq(words).exists(lambda w: w in lemmatized) or "stop work" in lemmatized:
            res.append(doc.__repr__())
    return res


def date_to_canonical(date: str) -> Optional[str]:
    dt = parse_date(date)
    if dt is not None:
        return dt.date().isoformat()


def date_to_canonical_or_stay(date: str) -> str:
    canon = date_to_canonical(date)
    return date if canon is None else canon


def datetime_to_canon_or_stay(date: str) -> str:
    dt = parse_date(date)
    if dt is not None:
        return dt.isoformat()
    else:
        return date


In [8]:
import pprint

for sample, expected  in samples.items():
    
    display(HTML("<h4>{}</h4>".format("next")))
    display(HTML("<p>{}</p>".format(sample)))
    display(HTML("<h5>{}</h5>".format("Predicted")))
    
    sents = sent_tokenize(sample)
    
    pprint.pprint({
        "precondition": extract_precondition(sents),
        "part of app": extract_part_of_app(sents),
        "expected": extract_expected(sents),
        "actual": extract_actual(sents),
        "priority": extract_priority(sents),
        "date": extract_date(sents)
    })
    display(HTML("<h5>{}</h5>".format("Declared")))
    pprint.pprint(expected)

{'actual': [' i saw message "something went wrong".'],
 'date': ['2020-01-11T17:00:00'],
 'expected': ['I expected transaction to look as always, '],
 'part of app': ['user website'],
 'precondition': [],
 'priority': []}


{'actual': 'saw message "something went wrontg" ',
 'exptected': 'then transaction to look as always',
 'importance': 'high',
 'part of app': 'user website',
 'time': '10 jan at 17:00'}


{'actual': ['Trading Panel stopped working Today at 14:00.'],
 'date': ['2020-01-12T14:00:00'],
 'expected': [],
 'part of app': [],
 'precondition': [],
 'priority': []}


{'actual': "the page wouldn't load",
 'exptected': '',
 'importance': 'low',
 'time': '11 jan at 14:00'}


{'actual': [],
 'date': ['2020-01-11'],
 'expected': ['\nThe app should create the user an return a success message '],
 'part of app': ['admin panel'],
 'precondition': [],
 'priority': []}


{'actual': 'nothing happened',
 'exptected': 'create the user and return success message',
 'importance': 'high',
 'time': '10 jan at 14:00'}


{'actual': [' it was covered in red errors.'],
 'date': ['2020-01-09'],
 'expected': ['I expected it to work as always '],
 'part of app': ['user page'],
 'precondition': ['\n    3 days ago I was looking at user page.'],
 'priority': []}


{'time': '9 jan'}


# Junkyard

In [9]:
def print_ners(text:str):
    d = nlp(text)
    print(text)
    print([(X.text, X.label_) for X in d.ents])
    print("")

In [10]:
names = seq(samples.keys())

names.for_each(lambda text: print_ners(text))

On yesterday at 5 pm i couldn't complete the transaction.  It was on user website. I expected transaction to look as always, but i saw message "something went wrong".  This issue is very important to us.
[('yesterday', 'DATE'), ('5 pm', 'TIME')]

I write to report an issue. Trading Panel stopped working Today at 14:00. User "kazik" was affected. The page wouldnt load, i was a little upset. Please fix it when you can. 
[('Trading Panel', 'ORG'), ('Today', 'DATE'), ('14:00', 'TIME')]


   Hello!
Yesterday I had some problems on the admin panel. I tried to create a new user.
The app should create the user an return a success message but nothing happened. The "create" button was completely unresponsive.
Can you fix it ASAP? It's very important. Thank you in advance. 
    
[('Yesterday', 'DATE')]


    3 days ago I was looking at user page. I expected it to work as always and then it was covered in red errors.
    
[('3 days ago', 'DATE')]



In [11]:
txt = list(samples.keys())[0]

In [12]:
sent_tokenize(txt)

[On yesterday at 5 pm i couldn't complete the transaction.,
  It was on user website.,
 I expected transaction to look as always, ,
  i saw message "something went wrong".,
  This issue is very important to us.]

In [129]:
doc

I expected transaction to look as always, but i saw message "something went wrong".

In [123]:
# for t in doc:
#     print(t.dep_)
from spacy import displacy

import en_core_web_sm
nlp_sm = en_core_web_sm.load()


In [27]:
#doc = nlp_sm(sent_tokenize(txt)[3])
doc  = nlp("I thought i would see correct response, then it failed")
doc
#displacy.serve(doc, style="dep",minify=True)

I thought i would see correct response, then it failed

In [28]:
for d in doc:
    print(d)

I
thought
i
would
see
correct
response
,
then
it
failed


In [17]:
dsklfnsldd= "3 days ago he was looking at user page. I was in bathroom. He entered the page,  I expected it to work as always and then it was covered in red errors."
" ".join(seq(nlp(dsklfnsldd)).map(lambda x: x.lemma_).to_list()).lower()

'3 day ago -pron- be look at user page . -pron- be in bathroom . -pron- enter the page ,   -pron- expect -pron- to work as always and then -pron- be cover in red error .'

In [20]:
prec_phrases

['-pron- be inenterbe look']

In [26]:
clusters = {
    "user no load":
    {
        "user page does not load correctly",
        "user panel fails to load",
        "loading fails on user app",
        "user app fails to load",
    },
    
    "buttons are not aligned":
    {
        "user page - buttons not aligned",
        "user sees that buttons are missaligned",
        "buttons are not in line",
    },
    "admin blank":
    {
        "admin page doesnt' load",
        "amin panel is empty",
        "amin panel is blank",
        "admin cannot login",
    }
}

In [27]:
all_things = seq(clusters.items()).flat_map(lambda tag_entries: seq(tag_entries[1]).map(lambda e: (e,tag_entries[0])) )

In [29]:
all_things = all_things.map(lambda s_tag: (nlp(s_tag[0]),s_tag[1]))

In [33]:
import pandas as pd

ModuleNotFoundError: No module named 'pandas'

In [51]:
from random import randint
def randId(prefix):
    return prefix + str(randint(0,100))

In [54]:
from collections import namedtuple
Issue =  namedtuple("Issue", ["title", "id"])
all_things = seq(clusters.items()).flat_map(lambda tag_entries: seq(tag_entries[1]).map(lambda e: Issue(title = nlp(e),id= randId(tag_entries[0])) ))
inQuestion =  Issue(title=nlp("amin panel is blank"), id= "elo")
ala = all_things.map(lambda x : (x,x.title.similarity(inQuestion.title)))
ala.sorted(lambda x : x[1],reverse=True).take_while(lambda tup: tup[1] > 0.85)

0,1
"Issue(title=amin panel is blank, id='admin blank39')",1.0
"Issue(title=amin panel is empty, id='admin blank88')",0.89988


In [43]:
ola = alas[alas.similarity < 1].dropna().sort_values('similarity',ascending=False)
ola[ola.similarity > 0.86]

Unnamed: 0,sen1,tok1,sen2,tok2,similarity
35,"(user, app, fails, to, load)",user no load,"(loading, fails, on, user, app)",user no load,0.9422
25,"(loading, fails, on, user, app)",user no load,"(user, app, fails, to, load)",user no load,0.9422
34,"(user, app, fails, to, load)",user no load,"(user, panel, fails, to, load)",user no load,0.915259
14,"(user, panel, fails, to, load)",user no load,"(user, app, fails, to, load)",user no load,0.915259
97,"(amin, panel, is, blank)",admin blank,"(amin, panel, is, empty)",admin blank,0.89988
107,"(amin, panel, is, empty)",admin blank,"(amin, panel, is, blank)",admin blank,0.89988
33,"(user, app, fails, to, load)",user no load,"(user, page, does, not, load, correctly)",user no load,0.881261
3,"(user, page, does, not, load, correctly)",user no load,"(user, app, fails, to, load)",user no load,0.881261
11,"(user, panel, fails, to, load)",user no load,"(user, page, does, not, load, correctly)",user no load,0.87447
1,"(user, page, does, not, load, correctly)",user no load,"(user, panel, fails, to, load)",user no load,0.87447


In [19]:
def _tab_entry(a,b):
    return (a.__repr__(),b.__repr__(), a.similarity(b))

In [15]:
d1.similarity(d2)


0.9332730827444652

In [16]:
d1.similarity(nlp("page does not load"))

0.7694986442251492