In [50]:
from flask import Flask 
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy import text as query_text
from sqlalchemy.sql import func
import os

import pandas as pd
import numpy as np

app = Flask(__name__)
# Configure SQLite database
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///' + os.path.join(app.root_path, 'douban.db')
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
app.app_context().push()

db = SQLAlchemy(app)

def execute_scripts(scripts):
    scripts = [sql.strip() for sql in sql_script.split(';')]
    db.session.begin()
    for sql in scripts:
        db.session.execute(query_text(sql))
    try:
        db.session.commit()
        print(f'Execute sucessfully.')
    except Exception as e:
        session.rollback()
        print(f'Error during transaction: {e}')

In [54]:
sql_script = """
DROP TABLE IF EXISTS movies;
CREATE TABLE IF NOT EXISTS movies (
    movie_id INTEGER PRIMARY KEY NOT NULL,
    movie_name TEXT NOT NULL,
    release_date DATETIME,
    country TEXT,
    movie_type TEXT,
    release_year INTEGER,
    description TEXT,
    douban_url TEXT,
    poster TEXT,
    douban_rate FLOAT,
    rating_count INTEGER
);
"""
execute_scripts(sql_script)

movies = pd.read_csv('douban/movies.csv')
cols_str = ['director', 'author', 'actor', 'genre', 'aggregateRating']
movies[cols_str] = movies[cols_str].map(eval)

movies['release_date'] = pd.to_datetime(movies.datePublished)
movies['release_year'] = movies.release_date.dt.year
movies['description'] = movies['description']
movies['douban_url'] = movies['url'].apply(lambda x: f"https://movie.douban.com{x}")
movies['douban_rate'] = movies['aggregateRating'].apply(lambda x: x['ratingValue'])
movies['rating_count'] = movies['aggregateRating'].apply(lambda x: x['ratingCount'])
movies['movie_type'] = movies['genre'].apply(lambda x: x[0]if len(x)==1 else x[1])
cols  = ['movie_id', 'name',  'release_date', 'country', 'movie_type', 'release_year', 'description', 'douban_url', 'image', 'douban_rate', 'rating_count']
df = movies.reset_index()[cols]
df.columns = ['movie_id', 'movie_name',  'release_date', 'country', 'movie_type', 'release_year', 'description', 'douban_url', 'poster', 'douban_rate', 'rating_count']
df.to_sql('movie_info', db.engine, if_exists='append', index=False)

Execute sucessfully.


428

In [67]:
sql_script = """
DROP TABLE IF EXISTS persons;
CREATE TABLE IF NOT EXISTS persons (
    person_id INTEGER PRIMARY KEY NOT NULL,
    person_name TEXT NOT NULL,
    person_name_en TEXTL
);
"""

persons = pd.read_csv('douban/persons.csv').fillna('')
execute_scripts(sql_script)
cols  = ['person_id', 'name_cn', 'name_en']
df = persons[cols]
df.columns = ['person_id', 'person_name', 'person_name_en']
df.to_sql('persons', db.engine, if_exists='append', index=False)

Execute sucessfully.


10028