In [5]:
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy import text as query_text
from sqlalchemy.sql import func
import os

import pandas as pd
import numpy as np

app = Flask(__name__)
# Configure SQLite database
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///' + os.path.join(app.root_path, '../douban.db')
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
app.app_context().push()

db = SQLAlchemy(app)

# many to many relation
# https://flask-sqlalchemy.palletsprojects.com/en/2.x/models/
movie_actor_association = db.Table(
    'movie_actor_association',
    db.Column('movie_id', db.Integer, db.ForeignKey('douban_movies.movie_id')),
    db.Column('person_id', db.Integer, db.ForeignKey('persons.person_id')),
)


class DoubanMovie(db.Model):
    __tablename__ = 'douban_movies'

    movie_id = db.Column(db.Integer, primary_key=True)
    movie_name = db.Column(db.String(20))
    release_date = db.Column(db.String(15))
    country = db.Column(db.String(20))
    movie_type = db.Column(db.String(10))
    release_year = db.Column(db.Integer)
    description = db.Column(db.Text)
    douban_url = db.Column(db.String(255))
    poster  = db.Column(db.String(255))
    douban_rate  = db.Column(db.Float)
    rating_count = db.Column(db.Integer)
    
    # actors = db.relationship('Person', secondary=movie_actor_association, backref='movies', lazy='dynamic')
    persons = db.relationship('Person', secondary=movie_actor_association, backref='movies', lazy='dynamic')
 
    def __repr__(self):
        return f'<Douban {self.movie_name}>' 
    
class Person(db.Model):
    __tablename__ = 'persons'

    person_id = db.Column(db.Integer, primary_key=True)
    person_name = db.Column(db.String(20))
    person_name_en = db.Column(db.String(20))
    gender = db.Column(db.String(2))
    birth_date = db.Column(db.String(20))
    birth_place = db.Column(db.String(20))
    birth_year = db.Column(db.Integer())
    
    def __repr__(self):
        return f'<Person {self.person_name}>'    


In [91]:
movies = pd.read_sql('douban_movies', db.engine)
persons = pd.read_sql('persons', db.engine)
associations = pd.read_sql('movie_actor_association', db.engine)

In [93]:
movie_actor = {}

for movie_id in associations.movie_id.unique():
    movie_actor[movie_id] = associations.loc[associations.movie_id==movie_id, 'person_id'].values.tolist()[:3]
movie_actor = pd.DataFrame.from_dict(movie_actor, orient='index').stack().droplevel(-1).reset_index()
movie_actor.columns = ['movie_id', 'person_id']

movie_name = movies.set_index('movie_id')['movie_name'].to_dict()
movie_rate = movies.set_index('movie_id')['douban_rate'].to_dict()
rate_count = movies.set_index('movie_id')['rating_count'].to_dict()
person_group = movie_actor.groupby('person_id')['movie_id']
actors = pd.concat([
    person_group.apply(lambda x: [mid for mid in x]),
    person_group.apply(lambda x: [movie_name[mid] for mid in x]),
    person_group.apply(lambda x: [movie_rate[mid] for mid in x]),
    person_group.apply(lambda x: [rate_count[mid] for mid in x]),
    person_group.apply(lambda x: [f"{movie_name[mid]}（{movie_rate[mid]})/n" for mid in x]),
], keys =['movie_id', 'movie_name', 'movie_rate', 'rate_count', 'movies'], 
axis=1).join(persons.set_index('person_id'))
actors['max_rate'] = actors.movie_rate.apply(max)
actors['top_count'] = actors.movie_rate.apply(len)
actors = actors.sort_values(['max_rate', 'top_count'], ascending=False)[:20]

In [96]:
actors.columns

Index(['movie_id', 'movie_name', 'movie_rate', 'rate_count', 'movies',
       'person_name', 'person_name_en', 'gender', 'birth_date', 'birth_place',
       'birth_year', 'max_rate', 'top_count'],
      dtype='object')

In [101]:
cols = ['person_name', 'gender', 'birth_date', 'birth_place',  'birth_year', 'max_rate', 'top_count', 'movies']
actors[cols]

Unnamed: 0_level_0,person_name,gender,birth_date,birth_place,birth_year,max_rate,top_count,movies
person_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1054534,摩根·弗里曼,男,1937年06月01日,"美国,田纳西州,孟菲斯",1937.0,9.7,4,"[肖申克的救赎（9.7)/n, 七宗罪（8.8)/n, 遗愿清单（8.7)/n, 百万美元宝..."
1054521,蒂姆·罗宾斯,男,1958年10月16日,"美国,加利福尼亚州,西科维纳",1958.0,9.7,2,"[肖申克的救赎（9.7)/n, 黑水（8.6)/n]"
1041179,鲍勃·冈顿,男,1945年11月15日,"美国,加利福尼亚,圣莫尼卡",1945.0,9.7,1,[肖申克的救赎（9.7)/n]
1003494,张国荣,男,,"中国,香港",,9.6,9,"[霸王别姬（9.6)/n, 倩女幽魂（8.8)/n, 英雄本色（8.6)/n, 东邪西毒（8..."
1035641,巩俐,女,1965年12月31日,"中国,辽宁,沈阳",1965.0,9.6,3,"[霸王别姬（9.6)/n, 唐伯虎点秋香（8.7)/n, 红高粱（8.5)/n]"
1010665,查尔斯·劳顿,男,,"英国,英格兰,约克郡,士嘉堡",,9.6,1,[控方证人（9.6)/n]
1013957,玛琳·黛德丽,女,,"德国,柏林",,9.6,1,[控方证人（9.6)/n]
1048197,泰隆·鲍华,男,,"美国,俄亥俄州,辛辛那提",,9.6,1,[控方证人（9.6)/n]
1050265,张丰毅,男,1956年09月01日,"中国,湖南,长沙",1956.0,9.6,1,[霸王别姬（9.6)/n]
1041029,莱昂纳多·迪卡普里奥,男,1974年11月11日,"美国,加利福尼亚,洛杉矶",1974.0,9.5,5,"[盗梦空间（9.4)/n, 泰坦尼克号（9.5)/n, 猫鼠游戏（9.1)/n, 禁闭岛（8..."


In [46]:
associations

Unnamed: 0,movie_id,person_id
0,26752088,1274297
1,26752088,1313837
2,26752088,1276085
3,26752088,1312976
4,26752088,1322072
...,...,...
11814,1325958,1150778
11815,1325958,1054487
11816,1325958,1440990
11817,1325958,1293034


In [47]:
associations.movie_id.unique()

array([26752088,  1292052,  3541415, 26794435,  1291561, 25662329,
       30166972, 26348103,  1292722,  1889243,  1295644, 27060077,
       26387939, 20495023,  1291546,  3319755,  3793023, 26683290,
        1295038, 27010768,  1292064, 25986180,  4920389,  1849031,
       26100958,  3011091, 30170448,  3742360,  1291560, 26861685,
       30252495, 27110296, 26580232,  1291549,  1307914,  2129039,
       27119724, 24773958,  1929463, 30334073,  1652587,  1306249,
        2131459, 26425063,  1295124,  1308807, 11026735, 27622447,
       30318116, 25921812,  3287562,  1851857,  1302425,  5912992,
        6786002,  1292370,  1296996,  1293172,  1305487,  4739952,
        1866479,  1291544,  1293839,  3011235, 26325320,  1432146,
        1298070,  1292215, 27024903, 26799731, 11525673,  2334904,
       21937452,  1485260,  1907966, 26611804,  1292343,  1457217,
        7065154,  1297447,  1291843,  1298624,  1862151, 26374197,
       26709258,  1418200,  1291571, 25937854,  1292223, 21318