In [2]:
import pytz
import aiohttp
import asyncio
import datetime


async def datetime_to_iso_string(date: datetime.datetime) -> str :
    return (
        date
        .replace(microsecond=0)
        .astimezone(pytz.timezone('Asia/Seoul'))
        .isoformat()
    )


async def request_sampling_data(target_keyword: list, size_per_channel: int, day_interval: int):
    url = 'http://k8s.mysterico.com:31464/analyzer/niz_sample_data/generate'
    async with aiohttp.ClientSession() as session:
        today = datetime.datetime.now()
        iso_today = await datetime_to_iso_string(today)
        interval = datetime.timedelta(days=day_interval)
        response = await session.post(
            url=url,
            json={
                "size_per_channel": size_per_channel,
                "target_keyword": target_keyword,
                "start_date": await datetime_to_iso_string(today - interval),
                "end_date": iso_today,
                "include": ["channel"]
            }
        )
        return await response.json()


async def random_sampling(target_keyword: list, size_per_channel: int, day_interval: int):
    response = await request_sampling_data(target_keyword, size_per_channel, day_interval)
    documents = response.get("documents")
    documents = [document.get("contentPlainText") for document in documents]
    return documents




In [3]:
import unicodedata
from typing import List

async def request_ner(sentences: List[str]):
    url = 'http://127.0.0.1:8000/predict'
    sentences = [unicodedata.normalize('NFC', sentence) for sentence in sentences]
    async with aiohttp.ClientSession() as session:
        response = await session.post(
            url=url,
            json={
                "sentences": sentences
            }
        )
        return await response.json()

In [4]:
await request_ner(['''👨‍🔧👩‍🌾🧑‍🌾🧑‍🚀👩‍🚀👨‍🔬야경스타그램 일러스트그램 비건샴푸 성실한 🌚 '''])

[{'sentence': '👨\u200d🔧👩\u200d🌾🧑\u200d🌾🧑\u200d🚀👩\u200d🚀👨\u200d🔬야경스타그램 일러스트그램 비건샴푸 성실한 🌚 ',
  'merged_sentence': '                         [일러스트그램 비건샴푸 : PS] 성실한 🌚',
  'entities': [['일러스트그램 비건샴푸', 'PS', [25, 36]]],
  'not_entities': [['성실한 🌚', 'O', [37, 42]]]}]

In [6]:
import time
import random

characters_per_line = 100
sampling_trial = 20
target_keyword = [
    "맥도날드"
]

documents = await random_sampling(
    target_keyword=target_keyword,
    size_per_channel=1000,
    day_interval=2 ** 10
)
random_documents = [random.choice(documents) for _ in range(sampling_trial)]

print(f'total documents : {len(documents)}')
print(f'sampled documents : {len(random_documents)}\n')

def split_documents_by_index(document: str, index: int):
    criterias = [(idx, idx+index) for idx in range(0, len(document), index)]
    result = [document[start_idx:end_idx] for start_idx, end_idx in criterias]
    return '\n'.join(result)

for random_document in random_documents:
    try:
        start = time.time()
        result = await request_ner([random_document])
        for k, result in result[0].items():
            print(f'<<{k}>>')
            if k == 'merged_sentence':
                print(split_documents_by_index(result, characters_per_line), '\n')
                continue
            print(result, '\n')
        print(time.time() - start, 'sec')
        print('='*100, '\n')
    except Exception as e:
        print('\n', e)
        print(random_document, '\n')
        continue

total documents : 10094
sampled documents : 20

<<sentence>>
 

<<merged_sentence>>
 

<<entities>>
[] 

<<not_entities>>
[] 

0.3178889751434326 sec

<<sentence>>
오늘은 혼밥 맥도날드 갑니다!
 
햄버거 최대 2개 가능한데요
 
치사량에 도전하러 갑니다
 
빅맥 버거랑 불고기버거 먹을꼬에염 ㅎㅎㅎ
 
빅맥셋트 사면 도시락 이벤트 하네요;;
 
필요도 없는걸;; 

<<merged_sentence>>
[오늘은 : DT] 혼밥 [맥도날드 : ORG] 갑니다!   [햄버거 : FNB] 최대 2개 가능한데요   치사량에 도전하러 갑니다   [빅맥 버거 : FNB]랑 [불고기버거 : 
FNB] 먹을꼬에염 ㅎㅎㅎ   [빅맥 : FNB]셋트 사면 도시락 이벤트 하네요;;   필요도 없는걸;; 

<<entities>>
[['오늘은', 'DT', [0, 3]], ['맥도날드', 'ORG', [7, 11]], ['햄버거', 'FNB', [19, 22]], ['빅맥 버거', 'FNB', [53, 58]], ['불고기버거', 'FNB', [60, 65]], ['빅맥', 'FNB', [78, 80]]] 

<<not_entities>>
[['혼밥', 'O', [4, 6]], ['갑니다!', 'O', [12, 16]], ['최대 2개 가능한데요   치사량에 도전하러 갑니다', 'O', [23, 50]], ['랑', 'O', [58, 59]], ['먹을꼬에염 ㅎㅎㅎ', 'O', [66, 75]], ['셋트 사면 도시락 이벤트 하네요;;   필요도 없는걸;;', 'O', [80, 111]]] 

0.07361102104187012 sec

<<sentence>>
'전국 소등'부터 친환경 인증샷 이벤트까지 풍성 [서울경제] 22일 ‘지구의 날’을 맞아 전국 곳곳에서 다양한 행사가 벌어진다. 올해 51주년인 지구의날은 환경 오염의 심각성을 알리기 위해서 제정된 