## Connect to Firebase

In [1]:
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import pandas as pd

cred = credentials.Certificate(
    "./netflix-comment-system-firebase-adminsdk-hq5cn-ad06fee744.json")
firebase_admin.initialize_app(cred)
db = firestore.client()
doc_ref = db.collection('Comments')

## Create a data structure to access different collections easily

In [None]:
drama_db = {doc.id: doc_ref.document(doc.id) for doc in doc_ref.get()}
drama_db_list = list(drama_db.keys())

## Load the local data into the notebook

In [None]:
import pandas as pd
import json

drama_pic = pd.read_csv('../scrape_for_comp/drama_pic.csv')
douban_df = pd.read_csv('./douban_score_info.csv')
post_df = pd.read_csv('../scrape_for_comp/Posts_from_fb.csv')
comment_df = pd.read_csv('../scrape_for_comp/Comments_from_fb.csv')
imdb = pd.read_csv('./netflix_scores.csv')
rottentomatoes = pd.read_csv('../scrape_for_comp/rottentomatoes_scores.csv')

with open('../scrape_for_comp/drama_post_dt.json') as fh:
    drama_post_dt = json.load(fh)

## Import drama image urls into the database

In [None]:
for idx, row in drama_pic.iterrows():
    if row['name'] in drama_db.set({'img': row.img_url})

## Import drama introduction and scores from douban

In [None]:
for idx, row in douban_df.iterrows():
    if row['name'] in drama_db_list:
        drama_db[row['name']].update({'info': row['info']})
        drama_db[row['name']].document().set({'source': '豆瓣', 'score': row.score})

## Deal with the comments
1. extract the comments under the posts related to the drama
2. separate the comments by sentiment scores
3. load them into the database respectively

In [None]:
for k, v in drama_post_dt.items():
    if len(v) == 0:
        continue
    print(k, 'comment loading')
    comments = pd.DataFrame()
    for pid in v:
        comments = comments.append(comment_df[comment_df.post_id == int(pid)])
    pos_comment = comments[comments.sentiment >= 5]
    neg_comment = comments[comments.sentiment < 5]
    for idx, row in pos_comment.iterrows():
        if k in drama_db_list:
            cm_dt = dict()
            cm_dt['time'] = row.comment_time
            cm_dt['text'] = row.comment_text
            cm_dt['score'] = row.sentiment
            drama_db[k].collection('pos_comment').document(str(row.comment_id)).set(cm_dt)

    for idx, row in neg_comment.iterrows():
        if k in drama_db_list:
            cm_dt = dict()
            cm_dt['time'] = row.comment_time
            cm_dt['text'] = row.comment_text
            cm_dt['score'] = row.sentiment
            drama_db[k].collection('neg_comment').document(str(row.comment_id)).set(cm_dt)

## import the scores from IMDb and rotten tomatoes

In [None]:
for idx, row in imdb.iterrows():
    if row['名稱'] in drama_db_list:
        drama_db[row['名稱'].replace('/', '-')].collection('scores').document().set({'score': row['評分'], 'source': row['來源']})

In [None]:
for idx, row in rottentomatoes.iterrows():
    if row['name'] in drama_db_list:
        score_dt = {'tomatometer': row['tomatometer'], 'audience': row['audience'], 'source': '爛番茄'}
        drama_db[row['name'].replace('/', '-')].collection('scores').document().set()