# Bigquery와 파이썬 연동(추가적인 승인절차 없이 사용 가능)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 로컬에서 이용시 pip install google-cloud-bigquery 콘솔에서 설치
# 설치 후 작동하지 않을 경우 pip install googld-cloud-bigquery==1.5.0으로 버전 낮추면 작동합니다.

In [None]:
# Bigquery 연동 패키지 import
from google.cloud import bigquery
from google.oauth2 import service_account

In [None]:
import pandas as pd
import json
from pandas.io import gbq
from pandas import DataFrame

In [None]:
from typing import List
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub

In [None]:
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

In [None]:
credentials = service_account.Credentials.from_service_account_file('/content/drive/Shareddrives/BKMS/BKMS_FinalProject/recommendation_model/local_bigquery/dbmsproject-350608-bc1d3264916d.json')

## 1. Create table(파이썬 거쳐서 빅쿼리에 데이터 보내야 할 경우 사용)

In [None]:
# 테이블 생성 쿼리

client = bigquery.Client(project = 'dbmsproject-350608', credentials = credentials)

schema = [
    bigquery.SchemaField("Title", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("Year", "INTEGER", mode="REQUIRED"),
]

table = bigquery.Table('dbmsproject-350608.part1json.pracdata', schema=schema)
table = client.create_table(table)

In [None]:
# 데이터 보내기

# dataframe.to_gbq(destination_table,project_id,if_exists='replace',credentials=credentials) 

## 2. Create text embedding

In [None]:
# 빅쿼리에서 데이터 불러오기

client = bigquery.Client(project = 'dbmsproject-350608', credentials = credentials)
query = 'SELECT * FROM part1json.prac1'
df = client.query(query).to_dataframe()

In [None]:
df2 = df.reset_index()
all_titles = [df2['title'][i] for i in range(len(df2['title']))]

In [None]:
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4", tags=None, options=None)

In [None]:
title_lst = list(df['title'])

In [None]:
# 임베딩 벡터 생성, 512차원

embeddings = embed(title_lst)

## 3. Calculate score using embedding vector and export result to bigquery

In [None]:
# 테이블 생성 쿼리

client = bigquery.Client(project = 'dbmsproject-350608', credentials = credentials)

schema = [
    bigquery.SchemaField("Title", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("Score", "INTEGER", mode="REQUIRED"),
]

table = bigquery.Table('dbmsproject-350608.part1json.embed2', schema=schema)
table = client.create_table(table)

In [None]:
def get_recommendations(title):
  title_num = title_lst.index(title)
  tit_lst, score_lst = [], []
  for i in range(len(title_lst)):
    tit_lst.append(title_lst[i])
    score_lst.append(np.inner(embeddings[title_num], embeddings[i]))
  del tit_lst[title_num]
  del score_lst[title_num]
  dft = pd.DataFrame(tit_lst, columns = ['Title'])
  dfs = pd.DataFrame(score_lst, columns = ['Score'])
  return_df = pd.concat([dft,dfs],axis=1)
  return_df.to_gbq(destination_table='part1json.embed2', credentials = credentials,
		  project_id='dbmsproject-350608', # Project id
		  if_exists='replace')

## 4. Recommendation result

In [None]:
get_recommendations('3GIO.')

1it [00:04,  4.47s/it]


In [None]:
query = f"""
SELECT
  Title, Score,
FROM
  `dbmsproject-350608.part1json.embed2`
ORDER BY
  Score DESC
LIMIT
  20
"""

In [None]:
# 쿼리 실행 결과

query_job = client.query(query)

In [None]:
# df로 변환

df = query_job.to_dataframe()

In [None]:
df.head()

Unnamed: 0,Title,Score
0,Frontmatter,0.55395
1,Flußanalyse,0.55075
2,FFTW.,0.544288
3,Fachkunde,0.542142
4,1may,0.536059


In [None]:
# Flask

import flask
app = flask.Flask(__name__, template_folder='templates')
# Set up the main route
@app.route('/', methods=['GET', 'POST'])
def main():
  if flask.request.method == 'GET':
    return(flask.render_template('index.html'))
  if flask.request.method == 'POST':
    pname = flask.request.form['paper_name']
    pname = pname.title()
    if pname not in all_titles:
      return(flask.render_template('negative.html', name=pname))
    else:
      result_final = get_recommendations(pname)
      names, dates = [], []
      for i in range(len(result_final)):
        names.append(result_final.iloc[i][0])
        dates.append(result_final.iloc[i][1])
      return flask.render_template('positive.html', paper_names=names, paper_date=dates, search_name=pname)
    
if __name__ == '__main__':
    app.run()