In [1]:
import mysql.connector
import re
import config
import requests
import json
import time


def convert_to_int(string):
    clean_string = re.sub('\W+','', string )
    return int(clean_string)


def rt_rating(movie):
    for rating in movie['Ratings']:
        if rating['Source'] == "Rotten Tomatoes":
            rt_v = rating['Value']
            return convert_to_int(rt_v)
        

def connect_to_db():
    cnx = mysql.connector.connect(
    host=config.host, user=config.user, passwd=config.password, database=config.DB_NAME)
    cursor = cnx.cursor()
    return cnx, cursor


def convert_list_from_tuples(tuple_list):
    """The movie titles are retrieved as a list of tupes, 
    return instead a list with just the title from the first half of each tuple"""
    title_list = []
    for item in tuple_list:
        title_list.append(item[0])
    return title_list
    

def get_all_movie_info():
    select_q = """
                SELECT *
                FROM movies
                """
    cnx, c = connect_to_db()
    c.execute(q)
    results = c.fetchall()
    cnx.close()
    c.close()
    return results

def get_movie_names():
    name_q = """
            SELECT title
            FROM movies"""
    cnx, c = connect_to_db()
    c.execute(name_q)
    results = c.fetchall()
    cnx.close()
    c.close()
    title_list = convert_list_from_tuples(results) 
    return title_list

titles = get_movie_names()
titles[:5]

['Lock, Stock and Two Smoking Barrels',
 'Taxi Driver',
 'Perfect Blue',
 'Star Wars',
 'Wolf Children']

In [2]:
def check_data_fields(data, list_of_fields):
    """Takes in a dictionary and a list of all keys that must be in that dictionary
        returns false if any of those keys are not contained"""
    exists = True
    for field in list_of_fields:
        if field not in data:
            exists = False        
    return exists

def omdb_api(movies):
    """This function retrieves the movie deatils for list of given movies using omdb api"""
    movies_details = []
    for title in movies:
        title.replace(" ", "+")
        url = f'http://www.omdbapi.com/?apikey={config.omdb_api}&t={title}'
        response = requests.get(url)
        movie = response.json()
        #check to make sure each key is always in the dict
        necessary_fields = ['Director', 'BoxOffice', 'Ratings', 'Title']
        if check_data_fields(movie, necessary_fields):
            #4 necessary fields, title is important for the insert statement query later
            movie_dict = {'director': movie['Director'],
                          'boxoffice': movie['BoxOffice'],
                          'rt_rating': rt_rating(movie),
                          'title': movie['Title']}
            movies_details.append(movie_dict)
        time.sleep(.2)
        
    
    return movies_details 
      

In [3]:
two = omdb_api(titles[0:2])
two


[{'director': 'Guy Ritchie',
  'boxoffice': 'N/A',
  'rt_rating': 75,
  'title': 'Lock, Stock and Two Smoking Barrels'},
 {'director': 'Martin Scorsese',
  'boxoffice': 'N/A',
  'rt_rating': 98,
  'title': 'Taxi Driver'}]

In [4]:

    


def add_omdb_values_to_movies():
    """Add box office, director and rotten tomatoes rating 
    from the omdb to our pre-existing movies DB"""
    
    #retrieve all movie names and pass to omdb_api to get a 
    #list of dictionaries with values of directora nd box_office and rating
    all_titles = get_movie_names()
    omdb_values = omdb_api(all_titles)
    return omdb_values
#     for value_set in omdb_values:
#         insert_omdb_values(list(value_set.values()))

omdb_values = add_omdb_values_to_movies()
omdb_values

[{'director': 'Guy Ritchie',
  'boxoffice': 'N/A',
  'rt_rating': 75,
  'title': 'Lock, Stock and Two Smoking Barrels'},
 {'director': 'Martin Scorsese',
  'boxoffice': 'N/A',
  'rt_rating': 98,
  'title': 'Taxi Driver'},
 {'director': 'Satoshi Kon',
  'boxoffice': 'N/A',
  'rt_rating': 77,
  'title': 'Perfect Blue'},
 {'director': 'George Lucas',
  'boxoffice': 'N/A',
  'rt_rating': 93,
  'title': 'Star Wars: Episode IV - A New Hope'},
 {'director': 'Mamoru Hosoda',
  'boxoffice': 'N/A',
  'rt_rating': 94,
  'title': 'Wolf Children'},
 {'director': 'Kar-Wai Wong',
  'boxoffice': 'N/A',
  'rt_rating': 87,
  'title': 'Chungking Express'},
 {'director': 'Christopher Nolan',
  'boxoffice': '$53,100,000',
  'rt_rating': 76,
  'title': 'The Prestige'},
 {'director': 'Martin Scorsese',
  'boxoffice': '$125,001,000',
  'rt_rating': 68,
  'title': 'Shutter Island'},
 {'director': 'Akira Kurosawa',
  'boxoffice': 'N/A',
  'rt_rating': 96,
  'title': 'Ran'},
 {'director': 'Akira Kurosawa',
  'bo

In [7]:
def insert_omdb_values(values):
    director = values[0]
    box_office = values[1]
    rt_rating = values[2]
    title = values[3]
    print(director)
    print(box_office)
    insert_q = f"""UPDATE movies
                    SET 
                        director = '{director}', 
                        box_office = '{box_office}', 
                        rt_ratings = '{rt_rating}'
                    WHERE title = "{title}";"""
    cnx, c = connect_to_db()
    c.execute(insert_q)

    c.close
    cnx.close

In [8]:
for value_set in omdb_values:
    values = list(value_set.values())
    insert_omdb_values(values)

Guy Ritchie
N/A
Martin Scorsese
N/A
Satoshi Kon
N/A
George Lucas
N/A
Mamoru Hosoda
N/A
Kar-Wai Wong
N/A
Christopher Nolan
$53,100,000
Martin Scorsese
$125,001,000
Akira Kurosawa
N/A
Akira Kurosawa
N/A
Akira Kurosawa
N/A
Peter Jackson
$314,000,000
Peter Jackson
$339,700,000
Peter Jackson
$364,000,000
David Yates
$381,000,185
Akira Kurosawa
N/A
Hayao Miyazaki, Kirk Wise
$9,855,615
Robert Zemeckis
$330,000,000
Andrei Tarkovsky
$5,233
Andrei Tarkovsky
$268,101
Masaki Kobayashi
N/A
Kenji Mizoguchi
$6,154
Isao Takahata
$408,718
Orson Welles
N/A
Christopher Nolan
$533,316,061
Christopher Nolan
$158,737,441
Hwan-kyung Lee
N/A
Stanley Kubrick
N/A
Aditya Chopra
N/A
Michael Powell, Emeric Pressburger
N/A
Francis Ford Coppola
N/A
Billy Wilder
N/A
Francis Ford Coppola
N/A
Dino Risi
N/A
Mario Monicelli
N/A
Hirokazu Koreeda
$491,773
Elem Klimov
N/A
Christopher Nolan
$292,568,851
Jonathan Demme
N/A
Frank Darabont
N/A
Sion Sono
N/A
Charles Chaplin
N/A
Luis Buñuel
N/A
Anthony Russo, Joe Russo
$664,987,8

In [64]:

###TESTING PURPOSES####
#get_5 titles to check their type and formatting
def get_5_titles():
    q = """SELECT title
            FROM movies
            LIMIT 5"""
    cnx, c = connect_to_db()
    c.execute(q)
    result = c.fetchall()
    return result

get_5_titles()[0][0]
    

'Lock, Stock and Two Smoking Barrels'

In [9]:

##TESTING: see if title = {title} actually matches anything
def check_title_match(values):
    director = values[0]
    box_office = values[1]
    rt_rating = values[2]
    title = values[3]
    insert_q = f"""SELECT title, movie_id
            FROM movies
            WHERE title = "{title}";"""
    cnx, c = connect_to_db()
    c.execute(insert_q)
    print(c.fetchall())
    c.close
    cnx.close
    


In [10]:
values = list(omdb_values[0].values())
check_title_match(values)

[('Lock, Stock and Two Smoking Barrels', '100')]
