In [None]:
!pip install mysql-connector-python

In [1]:
import requests
import json
import mysql.connector

from pprint import pprint

# Part 1 Access API

1. http://www.omdbapi.com/ is IMDb API that can be requested data by users from the page.
2. Use requests package to access the API and see what data the API return.
3. key is something that you can get it free just by sharing your email inforation to IMDb.

In [2]:
def imdb_parser(key, movie=None, title=False, _id=None):
    if title:
        url = 'http://www.omdbapi.com/?apikey=' + '%s&' % (key) + 't=%s' % (movie)
    elif _id:
        url = 'http://www.omdbapi.com/?apikey=' + '%s&' % (key) + 'i=%s' % (_id)
    else:
        url = 'http://www.omdbapi.com/?apikey=' + '%s&' % (key) + 's=%s' % (movie)
    url_obj = requests.get(url, headers = {'user-agent' : 'Mozilla/5.0'})
    print('Result from the API response: \n' + '='*100 + '\n' + url_obj.text + '\n' +'=' * 100)
    json_obj = json.loads(url_obj.text) 
    print('Result from the json parser with pprint:')
    print('*' * 100)
    pprint(json_obj)
    print('*' * 100)

In [3]:
# Search for title 'blade', with my key ab010800, parse by json package and print with pprint.
imdb_parser(key='ab010800', movie='blade')

Result from the API response: 
{"Search":[{"Title":"Blade Runner","Year":"1982","imdbID":"tt0083658","Type":"movie","Poster":"https://m.media-amazon.com/images/M/MV5BNzQzMzJhZTEtOWM4NS00MTdhLTg0YjgtMjM4MDRkZjUwZDBlXkEyXkFqcGdeQXVyNjU0OTQ0OTY@._V1_SX300.jpg"},{"Title":"Blade Runner 2049","Year":"2017","imdbID":"tt1856101","Type":"movie","Poster":"https://m.media-amazon.com/images/M/MV5BNzA1Njg4NzYxOV5BMl5BanBnXkFtZTgwODk5NjU3MzI@._V1_SX300.jpg"},{"Title":"Blade","Year":"1998","imdbID":"tt0120611","Type":"movie","Poster":"https://m.media-amazon.com/images/M/MV5BOTk2NDNjZWQtMGY0Mi00YTY2LWE5MzctMGRhZmNlYzljYTg5XkEyXkFqcGdeQXVyMTAyNjg4NjE0._V1_SX300.jpg"},{"Title":"Blade II","Year":"2002","imdbID":"tt0187738","Type":"movie","Poster":"https://m.media-amazon.com/images/M/MV5BOWVjZTIzNDYtNTBlNC00NTJjLTkzOTEtOTE0MjlhYzI2YTcyXkEyXkFqcGdeQXVyNTAyODkwOQ@@._V1_SX300.jpg"},{"Title":"Blade: Trinity","Year":"2004","imdbID":"tt0359013","Type":"movie","Poster":"https://m.media-amazon.com/images/M/MV5BMj

# Part 2. Set up SQL database and table schema, and then insert the data

In [4]:
def connect_to_local(host='localhost', user='root', connect_timeout=5):
    '''
    Connect to the local server. Note that connect_timeout=5 because if error happens the server would keep running,
    making SQL keep running limitless. If that happens, restart the kernel.
    For long query, alternate connect_time.
    
    Args:
    host (str): Your local host, 'localhost' as default.
    user (str): Your user name, 'root' as default.
    password (str): Your password.
    
    Returns:
        mysql.connector.connect() object.
    '''
    mydb = mysql.connector.connect(
        host=host,
        user=user,
        connect_timeout=connect_timeout)
    print('Connect successfully' if mydb else 'Disconnected')    
    return mydb

def build_database(cursor, database):
    cursor.execute('DROP DATABASE IF EXISTS %s' % (database))
    cursor.execute('CREATE DATABASE %s' % (database))
    print('Build %s database successfully' % (database))
    return 

def build_table(cursor, table, database):
    cursor.execute('USE %s' % (database))
    cursor.execute('DROP TABLE IF EXISTS %s' % (table))
    sql = ('CREATE TABLE %s (id INT AUTO_INCREMENT PRIMARY KEY, '
           'title VARCHAR(255), '
           'year YEAR(4), '
           'genre VARCHAR(255), '
           'director VARCHAR(255), '
           'imdb_rating DECIMAL(2,1) CONSTRAINT chk_imdb CHECK (imdb_rating BETWEEN 0 AND 10), '
           'rotten_tomatoes INT CONSTRAINT chk_rotten CHECK (rotten_tomatoes BETWEEN 0 AND 100), '
           'metacritic INT CONSTRAINT chk_meta CHECK (metacritic BETWEEN 0 AND 100), '
           'plot VARCHAR(255), '
           'box_office INT)')
    cursor.execute(sql % table)
    print('Build %s table successfully' % (table))
    return

def imdb_id(key, movie=None, title=False, _id=None):
    if title:
        url = 'http://www.omdbapi.com/?apikey=' + '%s&' % (key) + 't=%s' % (movie)
    elif _id:
        url = 'http://www.omdbapi.com/?apikey=' + '%s&' % (key) + 'i=%s' % (_id)
    else:
        url = 'http://www.omdbapi.com/?apikey=' + '%s&' % (key) + 's=%s' % (movie)
    url_obj = requests.get(url, headers = {'user-agent' : 'Mozilla/5.0'})
    json_obj = json.loads(url_obj.text) 
    if title:
        return json_obj['imdbID']
        
def imdb_id_detail_parser(key, _id):
    url = 'http://www.omdbapi.com/?apikey=' + '%s&' % (key) + 'i=%s' % (_id)
    url_obj = requests.get(url, headers = {'user-agent' : 'Mozilla/5.0'})
    json_obj = json.loads(url_obj.text)
    return json_obj
    
def insert_information(cursor, db, database_name, table, json_obj):
    '''
    Insert data.
    Note that cursor.execute(sql, val) is a method that could pass string into SQL statement easily.
    
    Args:
        cursor (obj): current MySQL cursor object.
        db (obj): current MySQL connection (db) object.
        database_name (str): current database name.
        table: current table that you want to insert.
        json_obj: json object that contains movie information
    
    Returns:
        None
    '''
    ### Note that the parameter markers used by mysql.connector may look the same as the %s used 
    ### in Python string formatting but the relationship is only coincidental. Some database adapters
    ### like oursql and sqlite3 use ? as the parameter marker instead of %s.
    sql = "INSERT INTO " + database_name + "." + table + ("(title, year, genre, director, imdb_rating, "
                                                          "rotten_tomatoes, metacritic, plot, box_office)"
                                                          " VALUES (%s, %s, %s, %s, %s,"
                                                          " %s, %s, %s, %s)")
    ### Raw data from json object
    title = json_obj['Title']
    year = json_obj['Year']
    genre = json_obj['Genre']
    director = json_obj['Director']
    imdb_rating = json_obj['imdbRating']
    rotten_tomatoes = json_obj['Ratings'][1]['Value']
    metacritic = json_obj['Ratings'][2]['Value']
    plot = json_obj['Plot']
    box_office = json_obj['BoxOffice']
    
    ### Change some string into corrected format
    year = None if (year == 'N/A') else int(year)
    imdb_rating = None if (imdb_rating == 'N/A') else float(imdb_rating)
    rotten_tomatoes = None if (rotten_tomatoes == 'N/A') else int(rotten_tomatoes.replace('%', ''))
    metacritic = None if (metacritic == 'N/A') else int(metacritic.split(sep='/')[0])
    box_office = None if (box_office == 'N/A') else int(box_office.replace('$', '').replace(',', ''))
    
    val = (title, year, genre, director, imdb_rating, rotten_tomatoes, metacritic, plot, box_office,)
    
    try:
        cursor.execute(sql, val)
        db.commit()
        print('Insert %s information successfully' % (title))
    except mysql.connector.Error as error:
        print("Failed to update record to database rollback: %s" % (error))
        #reverting changes because of exception
        db.rollback()

In [5]:
### Create database/table
mydb = connect_to_local()
mycursor = mydb.cursor()
build_database(mycursor, 'ucdavis')
build_table(mycursor, 'omdb', 'ucdavis')
### Your favorite movies to add in
my_top10 = ['The Conjuring', 'The Conjuring 2', 'Joker', 'Pulp Fiction', 'Django Unchained', 
           'The Hateful Eight', 'Inglourious Basterds', 'The Greatest Showman', 'Shutter Island',
           'A Cure for Wellness']
### Insert information, ab010800 is the free key requested on the API website.
for movie in my_top10:
    _id = imdb_id('ab010800', movie, title=True)
    json_obj = imdb_id_detail_parser('ab010800', _id)
    insert_information(cursor=mycursor, db=mydb, database_name='ucdavis', table='omdb', json_obj=json_obj)

Connect successfully
Build ucdavis database successfully
Build omdb table successfully
Insert The Conjuring information successfully
Insert The Conjuring 2 information successfully
Insert Joker information successfully
Insert Pulp Fiction information successfully
Insert Django Unchained information successfully
Insert The Hateful Eight information successfully
Insert Inglourious Basterds information successfully
Insert The Greatest Showman information successfully
Insert Shutter Island information successfully
Insert A Cure for Wellness information successfully
