In [1]:
import pandas as pd
import urllib.request, urllib.parse, urllib.error
import json

import time # importing time module

In [2]:
with open('APIkeys.json') as f:
    keys = json.load(f)
    omdbapi = keys['OMDBapi']

In [3]:
serviceurl = 'http://www.omdbapi.com/?'
apikey = '&apikey='+omdbapi

## Function for printing JSON dataset

In [4]:
def print_json(json_data):
    list_keys=['Title', 'Year', 'Rated', 'Released', 'Runtime', 'Genre', 'Director', 'Writer', 
               'Actors', 'Plot', 'Language', 'Country', 'Awards', 'Ratings', 
               'Metascore', 'imdbRating', 'imdbVotes', 'imdbID']
    print("-"*50)
    for k in list_keys:
        if k in list(json_data.keys()):
            print(f"{k}: {json_data[k]}")
    print("-"*50)

## Function to download poster

In [5]:
# def save_poster(json_data):
#     import os
#     title = json_data['Title']
#     poster_url = json_data['Poster']
#     # Splits the poster url by '.' and picks up the last string as file extension
#     poster_file_extension=poster_url.split('.')[-1]
#     # Reads the image file from web
#     poster_data = urllib.request.urlopen(poster_url).read()
        
#     savelocation=os.getcwd()+'\\'+'Posters'+'\\'
#     # Creates new directory if the directory does not exist. Otherwise, just use the existing path.
#     if not os.path.isdir(savelocation):
#         os.mkdir(savelocation)
    
#     filename=savelocation+str(title)+'.'+poster_file_extension
#     f=open(filename,'wb')
#     f.write(poster_data)
#     f.close()

## Function to create/update the local movie database with the data retreived from the web


Saves the movie data (Title, Year, Runtime, Country, Metascore, and IMDB rating) into a local SQLite database called 'movies.sqlite'

In [6]:
#this is the query string
#http://www.omdbapi.com/?t=Toy+Story&y=1995

In [7]:
def save_in_database(json_data):
    
    filename = 'output/IMDB_bb.sqlite'
    
    import sqlite3
    conn = sqlite3.connect(str(filename))
    cur=conn.cursor()
    
    title = json_data['Title']
    # Goes through the json dataset and extracts information if it is available
    if json_data['Year']!='N/A':
        year = json_data['Year']
    else:
        year='NA'
    if json_data['Rated']!='N/A':
        rated = json_data['Rated']
    else:
        rated='NA'
    if json_data['Released']!='N/A':
        released = json_data['Released']
    else:
        released='NA'
    if json_data['Runtime']!='N/A':
        runtime = int(json_data['Runtime'].split()[0])
    else:
        runtime=-1
        
    if json_data['Genre']!='N/A':
        genre = json_data['Genre']
    else:
        genre='NA'   
        
    if json_data['Director']!='N/A':
        director = json_data['Director']
    else:
        director='NA'   
        
    if json_data['Writer']!='N/A':
        writer = json_data['Writer']
    else:
        writer='NA'   
                
    if json_data['Actors']!='N/A':
        actors = json_data['Actors']
    else:
        actors='NA' 
        
    if json_data['Language']!='N/A':
        language = json_data['Language']
    else:
        language='NA'       

    if json_data['Country']!='N/A':
        country = json_data['Country']
    else:
        country='NA'       
        
    if json_data['Awards']!='N/A':
        awards = json_data['Awards']
    else:
        awards='NA'            
    if json_data['Metascore']!='N/A':
        metascore = float(json_data['Metascore'])
    else:
        metascore=-1
    if json_data['imdbRating']!='N/A':
        imdb_rating = float(json_data['imdbRating'])
    else:
        imdb_rating=-1
    if json_data['imdbVotes']!='N/A':
        imdb_votes = json_data['imdbVotes']
    else:
        imdb_votes=-1
    if json_data['imdbID']!='N/A':
        imdbId = json_data['imdbID']
        
    
    # SQL commands
    cur.execute('''CREATE TABLE IF NOT EXISTS MovieInfo 
    (Title TEXT, Year TEXT, Rated TEXT, Released TEXT, Runtime INTEGER, Genre TEXT, Director TEXT, Writer TEXT, Actors TEXT, Language TEXT, Country TEXT, Awards TEXT, Metascore REAL, IMDBRating REAL, imdbVotes TEXT, imdbID TEXT)''')
    
    cur.execute('SELECT Title FROM MovieInfo WHERE Title = ? ', (title,))
    row = cur.fetchone()
    
    if row is None:
        cur.execute('''INSERT INTO MovieInfo (Title, Year, Rated, Released, Runtime, Genre, Director, Writer, Actors, Language, Country, Awards, Metascore, IMDBRating, imdbVotes, imdbID )
                VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)''', (title,year,rated,released,runtime, genre, director,writer,actors,language,country,awards,metascore,imdb_rating,imdb_votes,imdbId))
    else:
        print("Record already found. No update made.")
    
    # Commits the change and close the connection to the database
    conn.commit()
    conn.close()

## Function to print contents of the local database

In [8]:
def print_database(database):
    
    import sqlite3
    conn = sqlite3.connect(str(database))
    cur=conn.cursor()
    
    for row in cur.execute('SELECT * FROM MovieInfo'):
        print(row)
    conn.close()

## Function to save the database content in an CSV file

In [9]:
def save_in_csv(filename, database):
    
    if filename.split('.')[-1]!='csv':
        print ("Filename does not have correct extension. Please try again")
        return None
    
    import pandas as pd
    import sqlite3
    
    conn = sqlite3.connect(str(database))
    #cur=conn.cursor()
    
    df=pd.read_sql_query("SELECT * FROM MovieInfo", conn)
    conn.close()
    
    df.to_csv(filename, index = None, header=True)

## Function to search for information about a movie

In [10]:
def search_movie(m_title,year):
    if len(title) < 1 or title=='quit': 
        print("Goodbye now...")
        return None

    try:
        url = serviceurl + urllib.parse.urlencode({'t': title})+ '&' + urllib.parse.urlencode({'y': year}) + apikey
        
        print(f'Retrieving the data of "{title}" now... ')
        uh = urllib.request.urlopen(url)
        data = uh.read()
        json_data=json.loads(data)
        
        if json_data['Response']=='True':
            print_json(json_data)
            save_in_database(json_data)
            
        else:
            try:
                
                #try again this time with only 2 first characters from the movie title
            
                first = title.split(' ')[0].lower()
                first = first.ljust(len(first)+1)
                second = title.split(' ')[1].lower()
                m_title = first+second

                print('--------2nd try: 2 words title --------')
                print(m_title)
                print(year)
                #title = first + ' '+ second
                url = serviceurl + urllib.parse.urlencode({'t': m_title})+ '&' + urllib.parse.urlencode({'y': year}) + apikey
                time.sleep(5)
                save_in_database(json_data)
            except:
                print("Error: ",json_data['Error'])
            
    
    except urllib.error.URLError as e:
        print(f"ERROR: URL Error {e.reason}")

## Load Disney titles to get all the information through function search_movie

In [11]:
#Load Disney titles
#disney_df = pd.read_csv('output/FINAL_2021_disney_movies_total_gross.csv')
disney_df = pd.read_csv('output/delta_2021_disney_movies_total_gross.csv')
disney_df.head()

Unnamed: 0,movie_title,year,release_date,total_gross,inflation_adjusted_gross
0,America's Heart and Soul,2004,"Jul 2, 2004",314000,450421
1,Blame it on the Bellboy,1992,"Mar 6, 1992",2891055,5583671
2,Born in China,2017,"Apr 21, 2017",13873211,15336259
3,Bound by Honor,1993,"Apr 16, 1993",4496583,8432104
4,Endurance,1998,"May 14, 1999",229128,372669


In [12]:
disney_df.count()

movie_title                 32
year                        32
release_date                32
total_gross                 32
inflation_adjusted_gross    32
dtype: int64

## === Test only STARTS here ===

In [13]:
#test with this df to see if it works as free API call is only 1000 calls per day
test_df = disney_df.loc[disney_df['movie_title'].isin(['101 Dalmatians','1492: Conquest of Paradise'])]
test_df

Unnamed: 0,movie_title,year,release_date,total_gross,inflation_adjusted_gross


In [14]:
# #TEST BLOCK: For smaller DF due to 1000 triggers a day only
# #query for all the movies in our disney list and get the latest information from IMDB

# for i in test_df.index: 
#     title = test_df['movie_title'][i].lower()
#     year = test_df['year'][i]
#     print (test_df['movie_title'][i])
#     print (test_df['year'][i])
#     print ('=====================================')
#     search_movie(title,year)
#     time.sleep(10)

## === Test only ENDS here ===

In [15]:
#query for all the movies in our disney list and get the latest information from IMDB

for i in disney_df.index: 
    title = disney_df['movie_title'][i].lower()
    year = disney_df['year'][i]
    print (disney_df['movie_title'][i])
    print (disney_df['year'][i])
    print ('=====================================')
    search_movie(title,year)
    time.sleep(10)

America's Heart and Soul
2004
Retrieving the data of "america's heart and soul" now... 
--------2nd try: 2 words title --------
america's heart
2004
Error:  Movie not found!
Blame it on the Bellboy
1992
Retrieving the data of "blame it on the bellboy" now... 
--------------------------------------------------
Title: Blame It on the Bellboy
Year: 1992
Rated: PG-13
Released: 06 Mar 1992
Runtime: 78 min
Genre: Comedy
Director: Mark Herman
Writer: Mark Herman
Actors: Bronson Pinchot, Dudley Moore, Bryan Brown
Plot: Several strangers converge on a hotel in Venice, Italy, where a bellboy's bad English sets them up to clash against each other in a hilarious case of mistaken identity.
Language: English
Country: United Kingdom, United States
Awards: N/A
Ratings: [{'Source': 'Internet Movie Database', 'Value': '5.5/10'}, {'Source': 'Rotten Tomatoes', 'Value': '19%'}]
Metascore: N/A
imdbRating: 5.5
imdbVotes: 2,668
imdbID: tt0103827
--------------------------------------------------
Record alread

I'll Be Home For Christmas
1998
Retrieving the data of "i'll be home for christmas" now... 
--------------------------------------------------
Title: I'll Be Home for Christmas
Year: 1998
Rated: PG
Released: 13 Nov 1998
Runtime: 86 min
Genre: Comedy, Family
Director: Arlene Sanford
Writer: Tom Nursall, Harris Goldberg, Michael Allin
Actors: Jonathan Taylor Thomas, Jessica Biel, Adam LaVorgna
Plot: A college student faces an impossible journey when he is left stranded in the desert, thousands of miles from home, with no money and only a few days left until Christmas.
Language: English
Country: United States, Canada
Awards: N/A
Ratings: [{'Source': 'Internet Movie Database', 'Value': '5.5/10'}, {'Source': 'Rotten Tomatoes', 'Value': '23%'}]
Metascore: N/A
imdbRating: 5.5
imdbVotes: 10,550
imdbID: tt0155753
--------------------------------------------------
Record already found. No update made.
Jane Austen's Mafia!
1998
Retrieving the data of "jane austen's mafia!" now... 
--------2nd try

Step Up 3D
2010
Retrieving the data of "step up 3d" now... 
--------2nd try: 2 words title --------
step up
2010
Error:  Movie not found!
The Hand That Rocks the Cradle
1992
Retrieving the data of "the hand that rocks the cradle" now... 
--------------------------------------------------
Title: The Hand that Rocks the Cradle
Year: 1992
Rated: R
Released: 10 Jan 1992
Runtime: 110 min
Genre: Drama, Thriller
Director: Curtis Hanson
Writer: Amanda Silver
Actors: Annabella Sciorra, Rebecca De Mornay, Matt McCoy
Plot: After her humiliated husband kills himself, an embittered pregnant widow loses her child, and embarks on a mission of vengeance against a woman and her family.
Language: English
Country: United States, United Kingdom
Awards: 6 wins & 6 nominations
Ratings: [{'Source': 'Internet Movie Database', 'Value': '6.7/10'}, {'Source': 'Rotten Tomatoes', 'Value': '64%'}, {'Source': 'Metacritic', 'Value': '64/100'}]
Metascore: 64
imdbRating: 6.7
imdbVotes: 42,637
imdbID: tt0104389
--------

In [16]:
disney_df.loc[disney_df['movie_title'] =='Who Framed Roger Rabbit']

Unnamed: 0,movie_title,year,release_date,total_gross,inflation_adjusted_gross


In [17]:
#Resolving issues with the parameters and starting from where we stopped and continue to the end of list

for i in range(607,len(disney_df)): 
    title = disney_df['movie_title'][i]
    year = disney_df['year'][i]
    print (disney_df['movie_title'][i])
    print (disney_df['year'][i])
    print ('=====================================')
    search_movie(title,year)
    time.sleep(10)

In [18]:
# #Show downloaded poster of Titanic
# from IPython.display import Image
# Image("Posters/Toy Story.jpg")

## Print the content of the local database

In [19]:
print_database('output/IMDB_bb.sqlite')

('101 Dalmatians', '1996', 'G', '27 Nov 1996', 103, 'Adventure, Comedy, Crime', 'Stephen Herek', 'Dodie Smith, John Hughes', 'Glenn Close, Jeff Daniels, Joely Richardson', 'English, Spanish', 'United States, United Kingdom', 'Nominated for 1 BAFTA Film Award3 wins & 10 nominations total', 49.0, 5.7, '109,762', 'tt0115433')
('102 Dalmatians', '2000', 'G', '22 Nov 2000', 100, 'Adventure, Comedy, Family', 'Kevin Lima', 'Dodie Smith, Kristen Buckley, Brian Regan', 'Glenn Close, Gérard Depardieu, Ioan Gruffudd', 'English', 'United States, United Kingdom', 'Nominated for 1 Oscar. 1 win & 8 nominations total', 35.0, 4.9, '37,045', 'tt0211181')
('1492: Conquest of Paradise', '1992', 'PG-13', '09 Oct 1992', 154, 'Adventure, Biography, Drama', 'Ridley Scott', 'Rose Bosch', 'Gérard Depardieu, Armand Assante, Sigourney Weaver', 'English, Spanish, Latin, Arabic', 'United Kingdom, France, Spain', '2 nominations', 47.0, 6.4, '29,990', 'tt0103594')
('20,000 Leagues Under the Sea', '1954', 'G', '20 Jul

('The Lion King', '1994', 'G', '24 Jun 1994', 88, 'Animation, Adventure, Drama', 'Roger Allers, Rob Minkoff', 'Irene Mecchi, Jonathan Roberts, Linda Woolverton', 'Matthew Broderick, Jeremy Irons, James Earl Jones', 'English, Swahili, Xhosa, Zulu', 'United States', 'Won 2 Oscars. 39 wins & 35 nominations total', 88.0, 8.5, '1,014,686', 'tt0110357')
('The Little Mermaid', '1989', 'G', '17 Nov 1989', 83, 'Animation, Adventure, Comedy', 'Ron Clements, John Musker', 'John Musker, Ron Clements, Hans Christian Andersen', 'Jodi Benson, Samuel E. Wright, Rene Auberjonois', 'English, French', 'United States', 'Won 2 Oscars. 14 wins & 8 nominations total', 88.0, 7.6, '254,101', 'tt0097757')
('The Lizzie McGuire Movie', '2003', 'PG', '02 May 2003', 94, 'Adventure, Comedy, Family', 'Jim Fall', 'Susan Estelle Jansen, Ed Decter, John J. Strauss', 'Hilary Duff, Adam Lamberg, Clayton Snyder', 'English, Italian', 'United States', '2 wins & 3 nominations', 56.0, 5.5, '39,048', 'tt0306841')
('The Lone Ran

## Save the database content into an CSV file

In [20]:
save_in_csv('output/IMDB_movies.csv','output/IMDB_bb.sqlite')

In [21]:
#check the CSV is exported correctly
df=pd.read_csv('output/IMDB_movies.csv')
df

Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Actors,Language,Country,Awards,Metascore,IMDBRating,imdbVotes,imdbID
0,101 Dalmatians,1996,G,27 Nov 1996,103,"Adventure, Comedy, Crime",Stephen Herek,"Dodie Smith, John Hughes","Glenn Close, Jeff Daniels, Joely Richardson","English, Spanish","United States, United Kingdom",Nominated for 1 BAFTA Film Award3 wins & 10 no...,49.0,5.7,109762,tt0115433
1,102 Dalmatians,2000,G,22 Nov 2000,100,"Adventure, Comedy, Family",Kevin Lima,"Dodie Smith, Kristen Buckley, Brian Regan","Glenn Close, Gérard Depardieu, Ioan Gruffudd",English,"United States, United Kingdom",Nominated for 1 Oscar. 1 win & 8 nominations t...,35.0,4.9,37045,tt0211181
2,1492: Conquest of Paradise,1992,PG-13,09 Oct 1992,154,"Adventure, Biography, Drama",Ridley Scott,Rose Bosch,"Gérard Depardieu, Armand Assante, Sigourney We...","English, Spanish, Latin, Arabic","United Kingdom, France, Spain",2 nominations,47.0,6.4,29990,tt0103594
3,"20,000 Leagues Under the Sea",1954,G,20 Jul 1955,127,"Adventure, Drama, Family",Richard Fleischer,"Earl Felton, Jules Verne","Kirk Douglas, James Mason, Paul Lukas",English,United States,Won 2 Oscars. 5 wins & 3 nominations total,83.0,7.2,33152,tt0046672
4,25th Hour,2002,R,10 Jan 2003,135,Drama,Spike Lee,David Benioff,"Edward Norton, Barry Pepper, Philip Seymour Ho...",English,United States,4 wins & 17 nominations,68.0,7.6,176763,tt0307901
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
604,Endurance,1998,G,14 May 1999,83,"Biography, Drama, Sport","Leslie Woodhead, Bud Greenspan",,"Haile Gebrselassie, Shawananness Gebrselassie,...",English,"United States, United Kingdom, Germany",,-1.0,6.3,419,tt0120659
605,Firelight,1997,R,04 Sep 1998,103,"Drama, Romance",William Nicholson,William Nicholson,"Sophie Marceau, Stephen Dillane, Dominique Bel...","English, French","United Kingdom, United States",2 wins & 2 nominations,-1.0,7.3,4397,tt0119125
606,Mars Needs Moms,2010,PG,11 Mar 2011,88,"Animation, Action, Adventure",Simon Wells,"Simon Wells, Wendy Wells, Berkeley Breathed","Seth Green, Joan Cusack, Dan Fogler",English,United States,2 nominations,49.0,5.4,22234,tt1305591
607,The Jungle Book 2,2002,G,14 Feb 2003,72,"Animation, Adventure, Comedy",Steve Trenbirth,"Karl Geurs, Carter Crocker, Evan Spiliotopoulos","John Goodman, Haley Joel Osment, Tony Jay",English,"United States, Australia",1 win & 9 nominations,38.0,5.4,16161,tt0283426


In [22]:
df.count()

Title         609
Year          609
Rated         601
Released      608
Runtime       609
Genre         608
Director      605
Writer        600
Actors        608
Language      609
Country       609
Awards        502
Metascore     609
IMDBRating    609
imdbVotes     609
imdbID        609
dtype: int64

In [23]:
df[df.Title.str.startswith('Blood In')]

Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Actors,Language,Country,Awards,Metascore,IMDBRating,imdbVotes,imdbID
573,"Blood In, Blood Out",1993,R,30 Apr 1993,180,"Crime, Drama",Taylor Hackford,"Ross Thomas, Jimmy Santiago Baca, Jeremy Iacone","Damian Chapa, Jesse Borrego, Benjamin Bratt","English, Spanish, Arabic",United States,2 nominations,47.0,7.9,30773,tt0106469
