In [1]:
import grpc

from common import common_pb2

from spell_check_idl import spell_check_service_pb2 as spell_check_pb2
from spell_check_idl import spell_check_service_pb2_grpc as spell_check_pb2_grpc

# host = '0.0.0.0:10829'
host = 'global-spell-check-service.engineering.onmail.io:50051'
check_config = spell_check_pb2.CheckConfig(
    iterations = 3,
    min_probability = 0.7,
    min_error_probability = 0.5,
    case_sensitive = True,
    languagetool_post_process = True,
    languagetool_call_thres = 0.7,
    whitelist = ["Citao", "Guibin", "OnMail"],
    with_debug_info = True
)

def call_gec_grpc(text):
    with grpc.insecure_channel(host) as channel:
        stub = spell_check_pb2_grpc.SpellCheckServiceStub(channel)
        common_req = {
            "log_id": "abcdefg",
            "client_name": "local_test",
            "email_address": "wucitao@engineering.onmail.com",
            "user_id": 89,
        }

        req_content = {
            "text": text,
            "config": check_config
        }
        request = spell_check_pb2.TextCheckReq(common_req=common_req, req_content=req_content)
        response = stub.TextCheck(request)
        return response

def call_gec_batch_grpc(texts):
    with grpc.insecure_channel(host) as channel:
        stub = spell_check_pb2_grpc.SpellCheckServiceStub(channel)
        common_req = {
            "log_id": "abcdefg",
            "client_name": "local_test",
            "email_address": "wucitao@engineering.onmail.com",
            "user_id": 89,
        }

        req_content = {
            "texts": texts,
            "config": check_config
        }
        request = spell_check_pb2.BatchTextCheckReq(common_req=common_req, req_content=req_content)
        response = stub.BatchTextCheck(request)
        return response

In [2]:
text_list = [
    "The marked is closed today.",
    "Hi, Guibin! My namme is Citao. The marked was closed yestreday. (This email are sent from OnMail.)",
    "England coach Andy Robinson is already without centre Will Greenwood and flanker Richard Hill while fly-half Jonny Wilkinson is certain to miss at least the first two games.",
    "This boook is very interestign.",
    "My nammeo  are Citao.",
    "I want go school.",
    "I wants go school.",
    "In my opinion it is not a good choice.",
    "Plan going well?",
    "My namme is Edison. <This emali are sent from OnMail>"
    
]

text_list = [
    "My namme is Citao",
    "Myy name are Citao"
]

In [3]:
for text in text_list:
    resp = call_gec_grpc(text)
    print(text)
    print(resp.resp_content.result.corrected_text)
    for c in resp.resp_content.result.corrections:
        print(c)
    print('-'*80)

KeyboardInterrupt: 

In [None]:
import random

source_list = [i.strip() for i in open("./dataset/sampled_test.source").readlines()]
target_list = [i.strip() for i in open("./dataset/sampled_test.target").readlines()]
st_list = list(zip(source_list, target_list))
random.shuffle(st_list)
source_list, target_list = zip(*st_list)
del st_list

In [None]:
for i, text in enumerate(source_list[:20]):
    text = drop_punctuation_space(text)
    target = drop_punctuation_space(target_list[i])
    
    resp = call_gec_grpc(text)
    if resp.resp_content.corrected_text != target:
        print(text)
        print(resp.resp_content.corrected_text)
        print(target)
        print()
        for c in resp.resp_content.corrections:
            print(c.orig_sub_text)
            print(c.cor_sub_text)
            print(c.start_index, c.end_index)
        print('-'*80)
        print()

In [None]:
### text = "In the early eighties \"casual sex\" in the United States was a widely accepted idea, now it is something people really take into consideration and rarely occurs."
text = "The divisions in their family and the fight between prince and the princes of Wales and the possibility of the divorce ."
text = "In the early eighties \" casual sex \" in the United States was a widely accepted idea , now it is something people really take into consideration and rarely occurs ."
resp = call_gec_grpc(text)
print(text)
print(resp.resp_content.corrected_text)
print()
for c in resp.resp_content.corrections:
    print(c)

In [None]:
text = "I want to buy a a apple."
resp = call_gec_grpc(text)
print(text)
print(resp.resp_content.corrected_text)
print()
for c in resp.resp_content.corrections:
    print(c)

In [None]:
corrections = [['I', 'I', [0, 1]], ['want', 'want', [2, 6]], ['to', 'to', [7, 9]], ['buy', 'buy', [10, 13]], ['a', 'a', [14, 15]], ['a', '', [16, 17]], ['apple', 'apple', [18, 23]], ['.', '.', [23, 24]]]

In [None]:
forward_merge_corrections(text, corrections)

In [None]:
import re


In [None]:
import re
import string
exclude = list(set(string.punctuation))
r_exclude = ['\\'+i for i in exclude]
# DROP_RE = re.compile(r"(\w)( +)([{}]+)".format(''.join(list(r_exclude))))

DROP_1_RE = re.compile(r"(\w)( +)([,\.\!]+)")
DROP_2_RE = re.compile(r"(\")( )([^\"]+)( )(\")")


# In the early eighties " casual sex " .

def drop_punctuation_space(text):
    text = re.sub(DROP_1_RE, r"\1\3", text)
    text = re.sub(DROP_2_RE, r"\1\3\5", text)
    return text


In [None]:
drop_punctuation_space('In the early eighties " casual sex " in the United S')

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
import base64

In [None]:
base64.b('e09s66hgmps')

In [None]:
base64.b64decode(int('e09s66hgmps', 36))

In [None]:
b36_int = int('dv51xlqapz4', 36)
# base64.b64encode(bytes(b36_int))

In [None]:
b36_int

In [30]:
call_gec_batch_grpc(text_list)

common_resp {
  log_id: "abcdefg"
  runtime: 225233
}
resp_content {
  result {
    need_correct: true
    corrected_text: "My name is Citao"
    corrections {
      orig_sub_text: "namme"
      cor_sub_text: "name"
      start_index: 3
      end_index: 8
      type: SPELIING
    }
  }
  result {
    need_correct: true
    corrected_text: "My name is Citao"
    corrections {
      orig_sub_text: "Myy"
      cor_sub_text: "My"
      end_index: 3
      type: GRAMMAR
    }
    corrections {
      orig_sub_text: "are"
      cor_sub_text: "is"
      start_index: 9
      end_index: 12
      type: GRAMMAR
    }
  }
}

In [35]:
vals = []
for line in sys.stdin:
    vals.append(line)