In [189]:
from flask import request, jsonify, Flask, Response, render_template
from flask_cors import CORS

import requests

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from matplotlib.cbook import boxplot_stats
import matplotlib.pyplot as plt

from elasticsearch import Elasticsearch

import tensorflow_hub as hub

import numpy as np
import pandas as pd
import json

In [190]:
from sklearn.manifold import TSNE
from scipy.spatial.distance import pdist 
from numpyencoder import NumpyEncoder

In [191]:
embed = hub.load('./model')
def get_use_vector(keyword):
    return np.array(embed([keyword])).tolist()[0]

In [251]:
def get_knn_query(field, vector, limit):
    data_query={
        "size": limit,
        "query": {
            "knn": {
                f"{field}": {
                    "vector": vector,
                    "k": limit
                }
            }
        },
        "_source": [
            "bookId", "title", "authors", "num_pages", "published_year", "publisher", field
        ]
    }
    return data_query

In [279]:
def get_match_query(field, inputs, limit):
    data_query={
        "size": limit,
        "query": {
             "match": {field: inputs }
        },
        "_source": [
            "bookId", "title", "authors", "language_code", "num_pages", "published_year", "publisher"
        ]
    }
    return data_query

In [280]:
def el_get_data(index, field, inputs, limit, term="knn"):

    headers={'Accept': 'application/json', 'Content-type': 'application/json'}
    elastic_url =f"http://52.66.250.236:9200/{index}/_search"

    if term == "knn":
        _body= get_knn_query(field, inputs, limit)
        print('knn')
    else:
        _body= get_match_query(field, inputs, limit)
        print(_body)
        
    response = requests.get(elastic_url, data=json.dumps(_body), auth=('admin','admin'), verify=False, headers=headers)
    return response.text

In [300]:
def predict_outlier(df, cols, out=''):
    for idx, col in enumerate(cols):
        q1  = df[col].quantile(0.25) 
        q3  = df[col].quantile(0.75)
        iqr = q3 - q1    #IQR is interquartile range.
        df[f"{out}_is_outlier_{idx}"] = (df[col] < q1 - 1.5 * iqr) | (df[col] > q3 + 1.5 *iqr)
    return df

In [332]:
def get_tsne_chart_data(book_data, keyword_embeded, keyword, num):
    
        result={}
        
        book_data.insert(0, [1,keyword_embeded, keyword])
        df = pd.DataFrame(book_data, columns = ['bookId', 'vector', 'title'])
        embeddings = [np.asarray(item, dtype=np.float32) for item in df.vector]
        
        tsne = PCA(n_components=2, random_state=0)
        df['x_tsne'], df['y_tsne'] = zip(*tsne.fit_transform(X=embeddings))
        book_points=np.array(df[['x_tsne', 'y_tsne']]).tolist()
        title = df['title'].to_list()
        
        if num > 0:
            result['scatter'] = {
                    'keys':book_points[0], 'title':title, 'book':book_points[1:-num], 'corpus':book_points[-num:]
                }
        else:
            result['scatter'] = {
                    'keys':book_points[0], 'title':title, 'book':book_points[1:], 'corpus':[]
                }
        return result

In [333]:
def json_response(j_str):
    j_data = json.loads(j_str)
    if 'hits' in j_data and 'hits' in j_data['hits']:
        data={'status':True, 'is_knn': False}
        data['result']=j_data['hits']['hits']
    else:
        data={'status':False, 'error': j_str}
        
    return json.dumps(data)

In [351]:
def get_book_data(j_str, field):
    j_data = json.loads(j_str)
    if 'hits' in j_data and 'hits' in j_data['hits']:
        data={'status':True}
        result=j_data['hits']['hits']
        data['chart']=[[item['_source']['bookId'], item['_source'].pop(field), item['_source']['title']] for item in result]
        data['result']=result
        return data
    else:
        data={'status':False, 'error': j_str}

In [363]:
def get_similar_books(index, keyword, field, limit, term, num):

    if term == "knn":
        _input = get_use_vector(keyword)
    else:
        _input = keyword

    book_data = el_get_data(index, field, _input, limit, term)
#     return book_data
    if term == "knn":
        j_data = get_book_data(book_data, field)
        j_data['chart_data'] = get_tsne_chart_data(j_data.pop('chart'), _input, keyword, num)
        j_data['is_knn'] = True
        j_data = json.dumps(j_data, cls=NumpyEncoder)
    else:
        j_data = json_response(book_data)
    return j_data

In [364]:
def write_to_file(data):
    with open('visiters.json', 'a') as jf:
        jf.write(f"{json.dumps(data)}\n")

In [365]:
app = Flask(__name__)
CORS(app)

@app.route('/', methods=['GET'])
def home():
    return "<h1>Patent K-NN Search</h1><p>This site is a prototype API for get K-NN Patent.</p>"

@app.route('/search', methods=['GET', 'POST'])
def search():
    index = request.args.get('index')
    keyword = request.args.get('keyword')
    field = request.args.get('field')
    limit = int(request.args.get('limit', 3))
    term = request.args.get('term','text')
    num = int(request.args.get('num', 1))
    location = request.args.get('location')

    print(f"index={index},keyword={keyword},filed={field},limit={limit},term={term}")
    
    
    user_dic = {
        'index': index,
        'keyword': keyword,
        'field': field,
        'limit': limit,
        'term': term,
        'num': num,
        'location': location
    }

    write_to_file(user_dic)
    
    j_str = get_similar_books(index, keyword, field, limit+num, term, num)

    return Response(j_str,  mimetype='application/json')

In [None]:
if __name__ == '__main__':
    from werkzeug.serving import run_simple
    run_simple('0.0.0.0', 3000, app, threaded=True)
    # app.run(host='0.0.0.0', port=1212, threaded=True)

INFO:werkzeug: * Running on http://0.0.0.0:3000/ (Press CTRL+C to quit)
INFO:werkzeug:223.182.209.186 - - [12/Nov/2020 08:38:37] "[37mGET /search?index=books_lexical&keyword=sample&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=sample,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:117.194.166.70 - - [12/Nov/2020 08:39:12] "[37mGET /search?index=books_lexical&keyword=test&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=test,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:117.196.6.151 - - [12/Nov/2020 08:39:20] "[37mGET /search?index=books_lexical&keyword=woman%20related%20books&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=woman related books,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:117.196.6.151 - - [12/Nov/2020 08:40:07] "[37mGET /search?index=books_lexical&keyword=woman%20related%20books&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=woman related books,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:223.182.209.186 - - [12/Nov/2020 08:40:10] "[37mGET /search?index=books_lexical&keyword=sample&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=sample,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:117.194.166.70 - - [12/Nov/2020 08:42:13] "[37mGET /search?index=books_lexical&keyword=women&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=women,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:117.194.166.70 - - [12/Nov/2020 08:42:18] "[37mGET /search?index=books_lexical&keyword=women&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=women,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:223.182.209.186 - - [12/Nov/2020 08:42:21] "[37mGET /search?index=books_lexical&keyword=Sample&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=Sample,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:117.196.6.151 - - [12/Nov/2020 08:44:09] "[37mGET /search?index=books_lexical&keyword=woman%20related%20book&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=woman related book,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:223.182.209.186 - - [12/Nov/2020 08:45:32] "[37mGET /search?index=books_lexical&keyword=sample&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=sample,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:223.182.209.186 - - [12/Nov/2020 08:45:36] "[37mGET /search?index=books_lexical&keyword=sample&field=title_vec&limit=10&term=knn&num=0&location=[object%20Object] HTTP/1.1[0m" 200 -


index=books_lexical,keyword=sample,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:117.194.166.70 - - [12/Nov/2020 08:45:37] "[37mGET /search?index=books_lexical&keyword=women%20related&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=women related,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:223.182.209.186 - - [12/Nov/2020 08:45:59] "[37mGET /search?index=books_lexical&keyword=sample&field=title_vec&limit=10&term=knn&num=0&location=[object%20Object] HTTP/1.1[0m" 200 -


index=books_lexical,keyword=sample,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:223.182.209.186 - - [12/Nov/2020 08:46:01] "[37mGET /search?index=books_lexical&keyword=sample&field=title_vec&limit=10&term=knn&num=0&location=[object%20Object] HTTP/1.1[0m" 200 -


index=books_lexical,keyword=sample,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:223.182.209.186 - - [12/Nov/2020 08:46:04] "[37mGET /search?index=books_lexical&keyword=sample&field=title_vec&limit=10&term=knn&num=0&location=[object%20Object] HTTP/1.1[0m" 200 -


index=books_lexical,keyword=sample,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:117.194.166.70 - - [12/Nov/2020 08:46:28] "[37mGET /search?index=books_lexical&keyword=aaa&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=aaa,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:117.194.166.70 - - [12/Nov/2020 08:46:51] "[37mGET /search?index=books_lexical&keyword=aaa&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=aaa,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:117.194.166.70 - - [12/Nov/2020 08:46:53] "[37mGET /search?index=books_lexical&keyword=aaa&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=aaa,filed=title_vec,limit=10,term=knn
knn


INFO:werkzeug:117.196.6.151 - - [12/Nov/2020 08:47:09] "[37mGET /search?index=books_lexical&keyword=religion%20based%20books&field=title_vec&limit=10&term=knn&num=0&location=null HTTP/1.1[0m" 200 -


index=books_lexical,keyword=religion based books,filed=title_vec,limit=10,term=knn
knn
