name fix and common paackage

Kadia-IVA · Jun 24, 2020 · 78d7c9c · 78d7c9c
1 parent e779ed6
commit 78d7c9c
Show file tree

Hide file tree

Showing 18 changed files with 314 additions and 38 deletions.
diff --git a/chat.py b/chat.py
@@ -1,22 +1,17 @@
 import json
-import asyncio
-import websockets
 import requests
 
-auth = f"http://localhost:{json.load(open('urls.json'))['meta']}/auth"
-uri = f"ws://localhost:{json.load(open('urls.json'))['meta']}/"
+url = f"http://localhost:8080/reply"
 
-async def chat():
-    async with websockets.connect(uri) as websocket:
-        token = requests.post(auth, json={"name": input("name: ")}).json()['token']
-        await websocket.send(f'token {token}')
-        print('Chat started.. Say something.')
-        while True:
-            text = input(">>")
-            if text == '/stop':
-                break
-            await websocket.send(text)
-            text = await websocket.recv()
-            print(f"{text}")
+def chat():
+    token = input("name: ").replace(' ', '_')
+    print('Chat started.. Say something.')
+    while True:
+        text = input(">>")
+        if text == '/stop':
+            break
+        reply = requests.post(url, )
+        text = await websocket.recv()
+        print(f"{text}")
 
-asyncio.get_event_loop().run_until_complete(chat())
+chat()
diff --git a/docs/_static/architecture.svg b/docs/_static/architecture.svg
diff --git a/docs/architecture/parser_and_ner.rst b/docs/architecture/parser_and_ner.rst
@@ -1,7 +1,7 @@
-Parser&NER
+Preprocessor
 =================
 
-Parses raw text to unified format used in Kadia`s ecosystem.
+It parses raw text to unified format used in Kadia`s ecosystem.
 It'll be like Alisa`s but with references to original text and original text itself.
 Maybe, it will include correction suggestions.
 

diff --git a/docs/path/stages.rst b/docs/path/stages.rst
@@ -5,7 +5,7 @@ Version 0.1
 --------------
 
 This version will test our basic systems. Skills will be called by /run commands by the user directly.
-The main goal is to test skill API, deployment and team skills.
+The main goal is to test skill API, deployment and teamwork.
 
 Available modules:
 

diff --git a/kadia-сommon/kadia-common/__init__.py b/kadia-сommon/kadia-common/__init__.py
@@ -0,0 +1 @@
+import schemes
diff --git a/kadia-сommon/kadia-common/schemes.py b/kadia-сommon/kadia-common/schemes.py
@@ -0,0 +1,58 @@
+from pydantic import BaseModel
+from typing import List
+
+class Tag(BaseModel):
+    tag: str # Make Enum
+    score: float
+
+class Token(BaseModel):
+    tags: List[Tag]
+    raw: str
+    ind: int # position in the original phrase
+    pos: str # Part of Speech; Make Enum
+
+class Speech(BaseModel):
+    raw: str
+    tokenized: List[Token]
+    stylized: str
+    args: List[Token]
+    sentoment: float # from -1 to 1. It is not supported for now.
+
+class Author(BaseModel):
+    is_user: bool
+    _id: str
+
+class Replic(BaseModel):
+    author: Author
+    timestamp: int
+    speech: Speech
+
+class SessionConfigs(BaseModel):
+    local: bool = True
+    visual: bool = False
+
+class UserSettings(BaseModel):
+    style: bool = False
+    confirmation_threshold: float = 1.0
+
+class UserState(BaseModel):
+    dialog_skill_id: str = ""
+    is_waiting: bool = False
+
+class PublicUser(BaseModel):
+    session: SessionConfigs = SessionConfigs()
+    settings: UserSettings = UserSettings()
+    state: UserState = UserState()
+
+class User(PublicUser):
+    token: str
+
+class Skill(BaseModel):
+    zip: str # b64
+    author: Author
+    name: str
+
+class SkillInstance(BaseModel):
+    skill: Skill
+    state: str # b64
+    user_id: str
diff --git a/kadia-сommon/setup.py b/kadia-сommon/setup.py
@@ -0,0 +1,13 @@
+from distutils.core import setup
+
+setup(name='kadia_common',
+      version='0.0.1',
+      description='The common code for Kadia voice assistant',
+      author='Andrew Ishutin',
+      author_email='hazmozavr@gmail.com',
+      url='https://github.com/kadia-iva/kadia',
+      packages=['kadia-common'],
+      install_requires=[
+        'pydantic'
+      ]
+     )
diff --git a/meta_manager/requirements.txt b/meta_manager/requirements.txt
@@ -0,0 +1,2 @@
+fastapi
+requests
diff --git a/meta_manager/run.py b/meta_manager/run.py
@@ -1,17 +1,17 @@
-from fastapi import FastAPI, WebSocket, Body
+from fastapi import FastAPI, Body
 from fastapi.responses import HTMLResponse
 from schemes import *
 import requests
 import json
 import time
+import os
 
-ports = json.load(open('../urls.json'))
 users = {}
 
 app = FastAPI()
 
-ner_url = f"http://localhost:{ports['ner']}/"
-algo_url = f"http://localhost{ports['algo']}/"
+ner_url = os.getenv("PREPROCESSER_URL", "http://localhost:8081/")
+skill_manager_url = os.getenv("SKILL_MANAGER_URL", "http://localhost:8082")
 
 def fetch_user(name=None, token=None):
     if token is None:
@@ -26,10 +26,17 @@ def add_replic_to_history(replic):
 def change_user_state(token, state):
     users[token].state = state
 
-@app.post('/auth')
-def auth_user(name: str = Body(..., embed=True)):
-    user = fetch_user(name=name)
-    return {"token": user.token}
+def fetch_skill(skill_id):
+    pass
+
+def fetch_skill_instance(user, skill):
+    pass
+
+def update_instance_state(instance, new_state):
+    pass
+
+def add_replic_to_dialog(replic, instance):
+    pass
 
 @app.post("/reply") # add more decorators
 def reply(token: str = Body(..., embed=True), \
@@ -39,9 +46,42 @@ def reply(token: str = Body(..., embed=True), \
     replic = Replic(author=Author(is_user=True, _id=user.token), # user._id
                     timestamp=time.time(), # make timezone agnostic
                     speech=speech)
-    add_replic_to_history(replic)
-    if user.state.dialog_skill_id == "":
-        response = requests.get(algo_url, json={'replic': replic.as_dict()})
-        algo = response['']
+    add_replic_to_history(replic, user)
+    if user.state.dialog_skill_id == "" and "/run" not in speech.raw:
+            return "Sorry, I do not understand you. Use /run to start dialog"
+    skill_id = user.state.dialog_skill_id
+    if "/run" in speech.raw:
+        _, skill_id = speech.raw.split()
+    skill = fetch_skill()
+    instance = fetch_skill_instance(user, skill)
+    add_replic_to_dialog(replic, instance)
+    change_user_state() # is waiting for response..
+
+    resp = requests.post(skill_manager_url, json={'skill': skill.as_dict(),
+                                                  'instance': instance.as_dict()}).json()
+    new_state = resp['state']
+    update_instance_state(instance, new_state)
+
+    speech = resp['speech']
+    if resp['code']:
+        """
+        -1 - error
+        0 - normal return
+        1 - positive return
+        """
+        pass
+
+    if resp['type'] == "dialog+end":
+        """
+        dialog
+        dialog+do-not-understand
+        dialog+end
+        """
+        update_user_state()
+
+    speech = requests.get(ner_url + 'postprocess', json={'replic': replic}).json('replic')
+    replic = Replic(..., speech=speech)
+    add_replic_to_dialog(replic, instance)
+    add_replic_to_history(replic, user)
 
-    return
+    return replic.raw
diff --git a/preprocesser/README.md b/preprocesser/README.md
@@ -0,0 +1,5 @@
+```
+uvicorn run:app --port 8080
+
+
+```
diff --git a/preprocesser/requirements.txt b/preprocesser/requirements.txt
@@ -0,0 +1,3 @@
+fastapi
+-f https://download.pytorch.org/whl/torch_stable.html
+torch==1.5.1+cpu
diff --git a/preprocesser/run.py b/preprocesser/run.py
@@ -0,0 +1,90 @@
+from fastapi import FastAPI, WebSocket, Body
+from fastapi.responses import HTMLResponse
+from schemes import *
+import requests
+import json
+import time
+import os
+import string
+import logging
+
+mask = '[MASK]'
+digits = "0123456789"
+punctuation = string.punctuation + '’“”’—‘' + '–'
+
+"""
+Useful links:
+
+!!!Dataset instructions: https://www.weak-learner.com/blog/2019/12/27/ontonotes-5/
+!!!Training instruction: https://huggingface.co/transformers/v2.2.0/examples.html#training
+
+DeepPavlov configs: https://github.com/deepmipt/DeepPavlov/blob/master/deeppavlov/configs/ner/ner_ontonotes_bert.json
+Example: https://github.com/flairNLP/flair
+"""
+
+def char_is_emoji(character):
+    return character in emoji.UNICODE_EMOJI
+
+def parse_to_words(text):
+    text = text.lower()
+    words = []
+    words_indexes = []
+    current = []
+    for i, el in enumerate(text + ' '):
+        if el.isalpha():
+            current.append(el)
+        elif el in digits:
+            if len(current) != 0 and not current[-1] in digits:
+                words.append(''.join(current))
+                words_indexes.append([i - len(current), i - 1])
+                current = []
+            current.append(el)
+        elif el.isspace():
+            if len(current) != 0:
+                words.append(''.join(current))
+                words_indexes.append([i - len(current), i - 1])
+                current = []
+        elif el in punctuation: # commas, dots, slashes
+            if len(current) != 0:
+                words.append(''.join(current))
+                words_indexes.append([i - len(current), i - 1])
+                current = []
+            words.append(el)
+            words_indexes.append([i, i])
+        elif char_is_emoji(el): # emoji
+            words.append('[UNK]')
+            words_indexes.append([i, i])
+        else:
+            logging.warning(f"Warning! Strange char {el}")
+    return words, words_indexes
+
+def do_truecase(text):
+    """
+    truecase text https://pypi.org/project/truecase/
+    source: https://arxiv.org/pdf/1903.11222.pdf
+    """
+    pass
+
+def do_ner(text):
+    """use distilbert-base-cased"""
+    pass
+
+def do_parse(text, entities):
+    """parse text to tokens. Entity must be a single token"""
+    pass
+
+app = FastAPI()
+
+@app.post("/") # add more decorators
+def reply(text: str = Body(..., embed=True)):
+    do_truecase()
+    do_ner()
+    do_parse()
+    speech = Speech(...)
+    return speech.as_dict()
+
+@app.post("/postprocess") # add more decorators
+def reply(text: str = Body(..., embed=True)):
+    do_parse()
+    speech = Speech(...)
+    return speech.as_dict()
diff --git a/kadia-schemes/kadia_schemes/schemes.py → preprocesser/schemes.py b/kadia-schemes/kadia_schemes/schemes.py → preprocesser/schemes.py
diff --git a/skill_manager/README.md b/skill_manager/README.md
@@ -0,0 +1,5 @@
+```
+uvicorn run:app --port 8080
+
+
+```
diff --git a/skill_manager/requirements.txt b/skill_manager/requirements.txt
@@ -0,0 +1,3 @@
+fastapi
+-f https://download.pytorch.org/whl/torch_stable.html
+torch==1.5.1+cpu
diff --git a/skill_manager/run.py b/skill_manager/run.py
@@ -0,0 +1,19 @@
+from fastapi import FastAPI, WebSocket, Body
+from fastapi.responses import HTMLResponse
+from schemes import *
+import requests
+import json
+import time
+import os
+import string
+import logging
+
+app = FastAPI()
+
+@app.post("/") # add more decorators
+def reply(text: str = Body(..., embed=True)):
+    do_truecase()
+    do_ner()
+    do_parse()
+    speech = Speech(...)
+    return speech.as_dict()