In [None]:
def scrape_imdb():
    import csv
    import re
    from requests import get
    from bs4 import BeautifulSoup
    from IPython.core.display import clear_output
    from warnings import warn
    from time import time, sleep
    from random import gauss, shuffle, randint


    start_time = time()
    html_soup = BeautifulSoup(response.text, 'html.parser')
    page_count = 10

    years = [i for i in range(1935, 2020)]
    year_pages = [(year, page) for year in years for page in range(page_count)]
    # shuffle(year_pages)

    headers = {"Accept-Language": "en-US, en;q=0.5"}

    # Preparing the monitoring of the loop
    start_time = time()
    requests = 0

    # Write the header to file before looping
    with open('output/movie_ratings.csv', 'w', newline='') as output_file:
        csv_headers = ['imdb_ids', 'movie', 'year', 'imdb', 'metascore', 'votes']
        writer = csv.DictWriter(output_file, fieldnames=csv_headers)
        writer.writeheader()

    for year, page in year_pages:

        # Redeclaring the lists to store data in
        names = []
        years = []
        imdb_ratings = []
        metascores = []
        votes = []
        imdb_ids = []

        # Make a get request
        start = str((page * 50) + 1)
        url = 'http://www.imdb.com/search/title?release_date=' + str(year) + '&sort=num_votes,desc&start=' + start 
        while(requests / elapsed_time > 0.1):
            sleep(randint(1, 10))
            elapsed_time = time() - start_time
        response = get(url, headers = headers)

        # Pause the loop
        sleep_time = gauss(10, 4)
        sleep(abs(sleep_time))

        # Monitor the requests
        requests += 1
        elapsed_time = time() - start_time
        print('Request:{}; Frequency: {} requests/s\nLast URL: {}'.format(requests, requests/elapsed_time, url))
        clear_output(wait = True)

        # Throw a warning for non-200 status codes
        if response.status_code != 200:
            warn('Request: {}; Status code: {}'.format(requests, response.status_code))
            continue

        # Break the loop if the number of requests is greater than expected
        if requests > 2000:
            warn('Number of requests was greater than expected.')
            break

        # Parse the content of the request with BeautifulSoup
        page_html = BeautifulSoup(response.text, 'html.parser')

        # Select all the 50 movie containers from a single page
        mv_containers = page_html.find_all('div', class_ = 'lister-item mode-advanced')

        # For every movie of these 50
        for container in mv_containers:
            # Scrape the name
            title_html = container.h3.a
            imdb_id = re.search('/title/tt(\d+)/', container.h3.a['href']).group(1)
            imdb_ids.append(imdb_id)
            names.append(title_html.text)

            # Scrape the year
            # year = container.h3.find('span', class_ = 'lister-item-year').text
            years.append(year)

            # Scrape the IMDB rating
            imdb = float(container.strong.text)
            imdb_ratings.append(imdb)

            # Scrape the Metascore
            if container.find('div', class_ = 'ratings-metascore') is not None:
                m_score = container.find('span', class_ = 'metascore').text
                metascores.append(int(m_score))
            else:
                metascores.append(None)

            # Scrape the number of votes
            vote = container.find('span', attrs = {'name':'nv'})['data-value']
            votes.append(int(vote))

        movie_ratings_df = pd.DataFrame({'imdb_ids': imdb_ids,
                                         'movie': names,
                                         'year': years,
                                         'imdb': imdb_ratings,
                                         'metascore': metascores,
                                         'votes': votes})
        # Checkpoint data as you go
        with open('output/movie_ratings.csv', 'a', newline='') as output_file:
            movie_ratings_df.to_csv(output_file, header=False, index=False)

    sleep(1)
    clear_output(wait = True)

    normalized_movie_ratings_df = pd.read_csv('output/movie_ratings.csv', encoding='ISO-8859-1')
    normalized_movie_ratings_df = normalized_movie_ratings_df[['imdb_ids', 'movie', 'year', 'imdb', 'metascore', 'votes']]
    normalized_movie_ratings_df['imdb'] = normalized_movie_ratings_df['imdb'] * 10
    normalized_movie_ratings_df['imdb'] = normalized_movie_ratings_df['imdb'].astype(int)
    normalized_movie_ratings_df.to_csv('output/norm_movie_ratings.csv')

    display(normalized_movie_ratings_df)
scrape_imdb()

In [2]:
def scrape_thenumbers():
    import bs4
    import requests
    import json
    import os
    import re
    import time
    # There are 5860 movies at the time this program was written
    PAGE_COUNT = 59


    class SimpleLogger(object):
        def __init__(self, filename):
            self.file = open(filename, 'w')

        def __del__(self):
            self.file.close()

        def log(self, msg):
            print(msg)
            print(msg, file=self.file)

    def parse_dollars(dollar_str):
        return int(dollar_str.replace('$', '').replace(',', ''))


    def clean_input(input_str):
        return input_str.encode("ascii", errors="ignore").decode()


    def scrape_movie(url_ext):
        page_str = 'https://www.the-numbers.com' + url_ext
        movie_rsp = requests.get(page_str)
        
        try:
            movie_rsp.raise_for_status()
        except requests.HTTPError:
            logger.log('Request for ' + page_str + ' failed!')
            return None
        parsed_movie_rsp = bs4.BeautifulSoup(clean_input(movie_rsp.text), features='html.parser')
        table = parsed_movie_rsp.find(text='Movie Details').parent.find_next('table')
        mpaa_rating = None
        running_time = None
        franchises = None
        keywords = None
        source = None
        genre = None
        production_method = None
        creative_type = None
        production_companies = None
        production_countries = None
        languages = None
        temp = table.find(text='MPAA Rating:')
        if temp:
            result = re.search('/market/mpaa-rating/([\w\d\(\)-]+)', temp.parent.parent.parent.select('a')[0]['href'])
            mpaa_rating = {
                'slug': result.group(1) if result else None,
                'pretty': temp.parent.parent.parent.select('a')[0].text
            }
        temp = table.find(text='Running Time:')
        if temp:
            temp = temp.parent.parent.find_next('td').getText()
            running_time = int(temp.split(' ')[0])
        temp = table.find(text='Franchise:')
        if temp:
            franchises = list()
            for franchise in temp.parent.find_next('td').find_all('a'):
                result = re.search('/movies/franchise/([\w\d\(\)-]+)', franchise['href'])
                franchises.append({
                    'slug': result.group(1) if result else None,
                    'pretty': franchise.text
                })
        temp = table.find(text='Keywords:')
        if temp:
            keywords = list()
            for keyword in temp.parent.find_next('td').find_all('a'):
                result = re.search('/movies/keywords/([\w\d\(\)-]+)', keyword['href'])
                keywords.append({
                    'slug': result.group(1) if result else None,
                    'pretty': keyword.text
                })
        temp = table.find(text='Source:')
        if temp:
            result = re.search('/market/source/([\w\d\(\)-]+)', temp.parent.parent.parent.select('a')[0]['href'])
            source = {
                'slug': result.group(1) if result else None,
                'pretty': temp.parent.parent.parent.select('a')[0].getText()
            }
        temp = table.find(text='Genre:')
        if temp:
            result = re.search('/market/genre/([\w\d\(\)-]+)', temp.parent.parent.parent.select('a')[0]['href'])
            genre = {
                'slug': result.group(1) if result else None,
                'pretty': temp.parent.parent.parent.select('a')[0].getText()
            }
        temp = table.find(text='Production Method:')
        if temp:
            result = re.search('/market/production-method/([\w\d\(\)-]+)', temp.parent.parent.parent.select('a')[0]['href']) 
            production_method = {
                'slug': result.group(1) if result else None,
                'pretty': temp.parent.parent.parent.select('a')[0].getText()
            }
        temp = table.find(text='Creative Type:')
        
        print(table)
        print(temp)
        if temp:
            result = re.search('/market/creative-type/([\w\d\(\)-]+)', temp.parent.parent.parent.select('a')[0]['href'])
            creative_type = {
                'slug': result.group(1) if result else None,
                'pretty': temp.parent.parent.parent.select('a')[0].getText()
            }
        temp = table.find(text='Production Companies:')
        if temp:
            production_companies = list()
            for production_company in temp.parent.find_next('td').find_all('a'):
                result = re.search('/movies/production-company/([\w\d\(\)-]+)', production_company['href'])
                production_companies.append({
                    'slug': result.group(1) if result else None,
                    'pretty': production_company.text
                })
        temp = table.find(text='Production Countries:')
        if temp:
            production_countries = list()
            for production_country in temp.parent.find_next('td').find_all('a'):
                result = re.search('/([\w\d\(\)-]+)/movies', production_country['href'])
                production_countries.append({
                    'slug': result.group(1) if result else None, 
                    'pretty': production_country.text
                })
        temp = table.find(text='Languages:')
        if temp:
            languages = list()
            for language in temp.parent.find_next('td').find_all('a'):
                result = re.search('/language/([\w\d\(\)-]+)/movies', language['href'])
                languages.append({
                    'slug': result.group(1) if result else None,
                    'pretty': language.text
                })
        movie_dict = {
            'mpaa-rating': mpaa_rating,
            'running-time': running_time,
            'franchises': franchises,
            'keywords': keywords,
            'source': source,
            'genre': genre,
            'production_method': production_method,
            'creative_type': creative_type,
            'production_companies': production_companies,
            'production_countries': production_countries,
            'languages': languages
        }
        return movie_dict['creative_type']

    print(scrape_movie('/movie/Star-Wars-Ep-VIII-The-Last-Jedi'))
    return

    def scrape_list_page(url_ext):
        count = 0
        page_str = 'https://www.the-numbers.com/movie/budgets/all' + url_ext
        movie_lst_rsp = requests.get(page_str)
        # Halt if there was an issue with the request
        try:
            movie_lst_rsp.raise_for_status()
        except requests.HTTPError:
            logger.log('Request for ' + page_str + ' failed!')
            return None
        else:
            logger.log('Request for ' + page_str + ' succeeded')
        parsed_movie_lst_rsp = bs4.BeautifulSoup(clean_input(movie_lst_rsp.text), features="html.parser")
        for table_row in parsed_movie_lst_rsp.find_all('tr')[1:]:
            table_cells = table_row.find_all('td')
            i = 0
            movie_dict = dict()
            result = re.search('/box-office-chart/daily/(\d{4}/\d{1,2}/\d{1,2})', table_cells[i + 1].a['href'])
            movie_dict['release_date'] = {
                'slug': result.group(1) if result else None,
                'pretty': table_cells[i + 1].a.text
            }
            movie_dict['title'] = {
                'slug': re.search('/movie/([\w\d\(\)-]+)#tab=summary', table_cells[i + 2].b.a['href']).group(1),
                'pretty': table_cells[i + 2].b.a.text
            }
            # Scrape the movie page
            movie_dict['production_budget'] = parse_dollars(table_cells[i + 3].getText())
            movie_dict['domestic_gross'] = parse_dollars(table_cells[i + 4].getText())
            movie_dict['worldwide_gross'] = parse_dollars(table_cells[i + 5].getText())
            movie_dict.update(scrape_movie(table_cells[i + 2].select('a')[0]['href']))
            yield movie_dict
            count += 1
        logger.log('Read ' + str(count) + ' entries from page ' + page_str)


    def scrape_the_numbers_generator():
        url_ext_lst = [''] + ['/' + str(x) + '01' for x in range(1, PAGE_COUNT)]
        for url_ext in url_ext_lst:
            for page in scrape_list_page(url_ext):
                if page is None:
                    logger.log('Page {} returned None'.format(url_ext))
                    break
                else:
                    yield page


    def store_the_numbers(filename):
        start_total = time.time()
        i = 1
        start_row = time.time()
        entries = list()
        for entry in scrape_the_numbers_generator():
            entries.append(entry)
            exec_time_row = time.time() - start_row
            logger.log('In {:.3f} seconds - {:0>4} {}'.format(exec_time_row, i, entry['title']['pretty']))
            start_row = time.time()
            i += 1
            if i % 100 == 0:
                with open(filename, 'w', newline='') as outfile:
                    json.dump(entries, outfile)
        with open(filename, 'w', newline='') as outfile:
                    json.dump(entries, outfile)
        exec_time_total = time.time() - start_total
        logger.log('{} total entries in {:.3f} seconds'.format(i - 1, exec_time_total))


    if __name__ == '__main__':
        filename = 'output/the_numbers.json'        
        # We really don't want to overwrite any existing good DB file, it takes a long time to scrape all of the data
        if os.path.isfile(filename):
            exit()
        logger = SimpleLogger('log.txt')
        store_the_numbers(filename)

scrape_thenumbers()

<table>
<tr><td><b>Production Budget:</b></td><td>$317,000,000</td></tr>
<meta content="2017-12-13" itemprop="dateCreated"/>
<tr><td><b>Domestic Releases:</b></td>
<td>December 15th, 2017 (Wide) by <a href="/market/distributor/Walt-Disney">Walt Disney</a><br/>December 15th, 2017 (IMAX) by <a href="/market/distributor/Walt-Disney">Walt Disney</a></td></tr>
<tr><td><b>International Releases:</b></td>
<td>December 13th, 2017 (Wide) (<a href="/movie/Star-Wars-Ep-VIII-The-Last-Jedi/Belgium#tab=summary" rel="nofollow">Belgium</a>)
<br/>December 13th, 2017 (Wide) (<a href="/movie/Star-Wars-Ep-VIII-The-Last-Jedi/Brazil#tab=summary">Brazil</a>)
<br/>December 13th, 2017 (Wide) (<a href="/movie/Star-Wars-Ep-VIII-The-Last-Jedi/Colombia#tab=summary" rel="nofollow">Colombia</a>)
<br/>December 13th, 2017 (Wide) (<a href="/movie/Star-Wars-Ep-VIII-The-Last-Jedi/Denmark#tab=summary">Denmark</a>)
<br/>December 13th, 2017 (Wide) (<a href="/movie/Star-Wars-Ep-VIII-The-Last-Jedi/Finland#tab=summary" rel="no

In [91]:
def normalize_movies():
    import json
    import re
    import time
    import tmdbsimple as tmdb
    from requests import HTTPError
    import sys
    
    tmdb.API_KEY = 'af3f6fb737c9085bd424c403dc83c196'
    
    def simple_test():
        search = tmdb.Search()
        results = search.movie(**{'query': 'Star Wars Episode VIII The Last Jedi', 
                                  'year': '2017'})
        movie = tmdb.Movies(results['results'][0]['id'])
        results = movie.external_ids()
        print(movie.external_ids()['imdb_id'])
        sys.exit()
    # simple_test()
    
    class SimpleLogger(object):
        def __init__(self, filename):
            self.file = open(filename, 'w')

        def __del__(self):
            self.file.close()

        def log(self, msg):
            print(msg)
            print(msg, file=self.file)
            
    def clean_string(in_str):
        return re.sub('[^A-Za-z0-9 \-]+', '', in_str)
    
    def normalize_movie_title(in_str):
        in_str = clean_string(in_str)
        in_str = re.sub(' ([Ee]p) ', ' \g<1>isode ', in_str)
        return in_str
            
    def search_imdb_id(title, year):
        # print('\"{}\" ({})'.format(title, year))
        search = tmdb.Search()
        query_dict = {'query': title}
        if year is not None:
            query_dict['year'] = str(year)
        results = search.movie(**query_dict)
        if (not 'results' in results) or (not results['results']) or (not 'id' in results['results'][0]):
            return None
        movie = tmdb.Movies(results['results'][0]['id'])
        results = movie.external_ids()
        if not 'imdb_id' in results or results['imdb_id'] is  None:
            return None
        result = re.search('tt(\d+)', results['imdb_id'])
        return result.group(1) if result else None
    
    
    def normalize_the_numbers():
        with open('data/the_numbers/the_numbers.json', 'r') as input_file:
            the_numbers = json.load(input_file)
        for i, movie in enumerate(the_numbers):
            year = None
            if movie['release_date']['slug'] is not None:
                result = re.search('^(\d{4}).*', movie['release_date']['slug'])
                year = int(result.group(1)) if result is not None else None
                if 1900 < year < 2030:
                    year = str(year)
                else:
                    year = None
            try:
                imdb_id = search_imdb_id(normalize_movie_title(movie['title']['slug']), year)
                if imdb_id is None:
                    time.sleep(0.26)
                    imdb_id = search_imdb_id(normalize_movie_title(movie['title']['pretty']), year)
                the_numbers[i]['imdb_id'] = imdb_id
            except HTTPError:
                print('HTTPError: {} ({})'.format(title, year))
            # API Limited to 40 calls every 10 seconds, search_imdb_id calls API twice for each movie
            logger.log('{:0>4} tt{: <10} {} ({})'.format(i, imdb_id if imdb_id is not None else '', movie['title']['pretty'], year))
            time.sleep(0.51)
        with open('output/the_numbers_norm.json', 'w', newline='') as outfile:
            json.dump(the_numbers, outfile)
    
    logger = SimpleLogger('normalize_log.txt')
    normalize_the_numbers()

normalize_movies()

0000 tt0499549    Avatar (2009)
0001 tt1298650    Pirates of the Caribbean: On Stranger Tides (2011)
0002 tt4154796    Avengers: Endgame (2019)
0003 tt2395427    Avengers: Age of Ultron (2015)
0004 tt2527336    Star Wars Ep. VIII: The Last Jedi (2017)
0005 tt2488496    Star Wars Ep. VII: The Force Awakens (2015)
0006 tt4154756    Avengers: Infinity War (2018)
0007 tt0449088    Pirates of the Caribbean: At Worlds End (2007)
0008 tt0974015    Justice League (2017)
0009 tt2379713    Spectre (2015)
0010 tt1345836    The Dark Knight Rises (2012)
0011 tt3778644    Solo: A Star Wars Story (2018)
0012 tt1210819    The Lone Ranger (2013)
0013 tt0401729    John Carter (2012)
0014 tt0398286    Tangled (2010)
0015 tt0413300    Spider-Man 3 (2007)
0016 tt3498820    Captain America: Civil War (2016)
0017 tt2975590    Batman v Superman: Dawn of Justice (2016)
0018 tt0903624    The Hobbit: An Unexpected Journey (2012)
0019 tt0417741    Harry Potter and the Half-Blood Prince (2009)
0020 tt1170358    Th

0181 tt0242653    The Matrix Revolutions (2003)
0182 tt1677720    Ready Player One (2018)
0183 tt0371606    Chicken Little (2005)
0184 tt0317919    Mission: Impossible III (2006)
0185 tt0808151    Angels & Demons (2009)
0186 tt0332452    Troy (2004)
0187 tt0938283    The Last Airbender (2010)
0188 tt0389790    Bee Movie (2007)
0189 tt0436339    G-Force (2009)
0190 tt0397892    Bolt (2008)
0191 tt1646987    Wrath of the Titans (2012)
0192 tt0442933    Beowulf (2007)
0193 tt1077368    Dark Shadows (2012)
0194 tt2334879    White House Down (2013)
0195 tt0780653    The Wolfman (2010)
0196 tt2034800    The Great Wall (2016)
0197 tt3332064    Pan (2015)
0198 tt1305591    Mars Needs Moms (2011)
0199 tt5834426    Moonfall (None)
0200 tt0424095    Flushed Away (2006)
0201 tt1277953    Madagascar 3: Europe's Most Wanted (2012)
0202 tt1229238    Mission: ImpossibleGhost Protocol (2011)
0203 tt1646971    How to Train Your Dragon 2 (2014)
0204 tt0120855    Tarzan (1999)
0205 tt0864835    Mr. Peabod

0370 tt0112462    Batman Forever (1995)
0371 tt0133152    Planet of the Apes (2001)
0372 tt0368891    National Treasure (2004)
0373 tt1853728    Django Unchained (2012)
0374 tt0970866    Little Fockers (2010)
0375 tt0111503    True Lies (1994)
0376 tt0765429    American Gangster (2007)
0377 tt0844471    Cloudy with a Chance of Meatballs (2009)
0378 tt1386588    The Other Guys (2010)
0379 tt0339291    Lemony Snicket's A Series of Unfortunate Events (2004)
0380 tt0993846    The Wolf of Wall Street (2013)
0381 tt0848537    Epic (2013)
0382 tt0116213    Eraser (1996)
0383 tt0116583    The Hunchback of Notre Dame (1996)
0384 tt0120917    The Emperor's New Groove (2000)
0385 tt1320253    The Expendables 2 (2012)
0386 tt0386117    Where the Wild Things Are (2009)
0387 tt0449010    Eragon (2006)
0388 tt1267297    Hercules (2014)
0389 tt1243957    The Tourist (2010)
0390 tt0146675    End of Days (1999)
0391 tt0327162    The Stepford Wives (2004)
0392 tt0423294    Surf's Up (2007)
0393 tt0450259

0561 tt0337741    Something's Gotta Give (2003)
0562 tt0369436    Four Christmases (2008)
0563 tt0119094    Face/Off (1997)
0564 tt0960731    Bedtime Stories (2008)
0565 tt0257044    Road to Perdition (2002)
0566 tt1564367    Just Go With It (2011)
0567 tt0287978    Daredevil (2003)
0568 tt1059786    Eagle Eye (2008)
0569 tt0118880    Con Air (1997)
0570 tt1302067    Yogi Bear (2010)
0571 tt0159365    Cold Mountain (2003)
0572 tt1037705    The Book of Eli (2010)
0573 tt0119137    Flubber (1997)
0574 tt0171363    The Haunting (1999)
0575 tt0117705    Space Jam (1996)
0576 tt2713180    Fury (2014)
0577 tt6182908    Smallfoot (2018)
0578 tt0383216    The Pink Panther (2006)
0579 tt1222817    Zookeeper (2011)
0580 tt0043456    The Day the Earth Stood Still (2008)
0581 tt0118883    Conspiracy Theory (1997)
0582 tt0120828    Six Days, Seven Nights (1998)
0583 tt0166813    Spirit: Stallion of the Cimarron (2002)
0584 tt0244244    Swordfish (2001)
0585 tt           Fantasia 2000 (Theatrical Re

0764 tt1325004    The Twilight Saga: Eclipse (2010)
0765 tt0164184    The Sum of All Fears (2002)
0766 tt2637276    Ted 2 (2015)
0767 tt0227445    The Score (2001)
0768 tt0480687    Hall Pass (2011)
0769 tt0113845    Money Train (1995)
0770 tt0285531    Dreamcatcher (2003)
0771 tt0132347    Mystery Men (1999)
0772 tt0140352    The Insider (1999)
0773 tt1389072    Downsizing (2017)
0774 tt1482459    Doctor Seuss' The Lorax (2012)
0775 tt0758774    Body of Lies (2008)
0776 tt1611224    Abraham Lincoln: Vampire Hunter (2012)
0777 tt0116830    Last Man Standing (1996)
0778 tt0462396    The Last Legion (2007)
0779 tt0137494    Entrapment (1999)
0780 tt0365474    The X Files: Fight the Future (1998)
0781 tt2369135    Need for Speed (2014)
0782 tt0120815    Saving Private Ryan (1998)
0783 tt0207201    What Women Want (2000)
0784 tt0443272    Lincoln (2012)
0785 tt0268380    Ice Age (2002)
0786 tt0112384    Apollo 13 (1995)
0787 tt0133093    The Matrix (1999)
0788 tt5220122    Hotel Transylvan

0967 tt2296777    Sherlock Gnomes (2018)
0968 tt6811018    The Kid Who Would Be King (2019)
0969 tt1431045    Deadpool (2016)
0970 tt2179136    American Sniper (2014)
0971 tt1051904    Goosebumps (2015)
0972 tt0095956    Rambo III (1988)
0973 tt0114319    Sabrina (1995)
0974 tt0485985    Red Tails (2012)
0975 tt0425123    Just Like Heaven (2005)
0976 tt2234155    The Internship (2013)
0977 tt0158622    The Flintstones in Viva Rock Vegas (2000)
0978 tt0379865    Leatherheads (2008)
0979 tt5442430    Life (2017)
0980 tt1314228    Did You Hear About the Morgans? (2009)
0981 tt           Che, Part 1: The Argentine (2008)
0982 tt1000774    Sex and the City (2008)
0983 tt1220634    Resident Evil: Afterlife (2010)
0984 tt1232200    That's My Boy (2012)
0985 tt0118971    Devil's Advocate (1997)
0986 tt1071875    Ghost Rider: Spirit of Vengeance (2012)
0987 tt0116136    Dragonheart (1996)
0988 tt0367479    After the Sunset (2004)
0989 tt0238112    Captain Corelli's Mandolin (2001)
0990 tt218041

1167 tt0110725    On Deadly Ground (1994)
1168 tt1186367    Ninja Assassin (2009)
1169 tt0350028    Raising Helen (2004)
1170 tt3799694    The Nice Guys (2016)
1171 tt0126916    For Love of the Game (1999)
1172 tt0141109    Jack Frost (1998)
1173 tt1392197    Marmaduke (2010)
1174 tt0117765    Striptease (1996)
1175 tt1212419    Hereafter (2010)
1176 tt0264935    Murder by Numbers (2002)
1177 tt0112401    Assassins (1995)
1178 tt1608290    Zoolander 2 (2016)
1179 tt1034303    Defiance (2009)
1180 tt0367959    Hannibal Rising (2007)
1181 tt0160916    The Story of Us (1999)
1182 tt1517260    The Host (2013)
1183 tt0264395    Basic (2003)
1184 tt2199571    Run All Night (2015)
1185 tt0309377    Blood Work (2002)
1186 tt0963178    The International (2009)
1187 tt0116225    Escape from L.A. (1996)
1188 tt0315297    Twisted (2004)
1189 tt0362270    The Life Aquatic with Steve Zissou (2004)
1190 tt0129167    The Iron Giant (1999)
1191 tt2274648    Hellboy (2019)
1192 tt1240982    Your Highnes

1371 tt1486185    Red Riding Hood (2011)
1372 tt0265349    The Mothman Prophecies (2002)
1373 tt0097742    Licence to Kill (1989)
1374 tt1255919    Holmes & Watson (2018)
1375 tt1598828    One for the Money (2012)
1376 tt0179626    15 Minutes (2001)
1377 tt0399295    Lord of War (2005)
1378 tt0108255    Super Mario Bros. (1993)
1379 tt0104412    Hero (1992)
1380 tt1568338    Man on a Ledge (2012)
1381 tt1032751    The Warrior's Way (2010)
1382 tt0119640    McHale's Navy (1997)
1383 tt1149361    Micmacs (2010)
1384 tt           Les Bronzs 3: amis pour la vie (2006)
1385 tt1220911    Pourquoi j'ai pas mang mon pre (2015)
1386 tt0298203    8 Mile (2002)
1387 tt2066051    Rocketman (2019)
1388 tt0183790    A Knights Tale (2001)
1389 tt0288045    The Medallion (2003)
1390 tt1053810    The Big Year (2011)
1391 tt0167404    The Sixth Sense (1999)
1392 tt0396269    Wedding Crashers (2005)
1393 tt1155076    The Karate Kid (2010)
1394 tt6146586    John Wick: Chapter 3  Parabellum (2019)
1395 tt2

1570 tt0379786    Serenity (2005)
1571 tt0465602    Shoot 'Em Up (2007)
1572 tt0312329    Against the Ropes (2004)
1573 tt1462900    Yi dai zong shi (2013)
1574 tt0232500    The Fast and the Furious (2001)
1575 tt0256415    Sweet Home Alabama (2002)
1576 tt1142988    The Ugly Truth (2009)
1577 tt0287717    Spy Kids 2: The Island of Lost Dreams (2002)
1578 tt0111280    Star Trek: Generations (1994)
1579 tt2091256    Captain Underpants: The First Epic Movie (2017)
1580 tt3450650    Paul Blart: Mall Cop 2 (2015)
1581 tt0119715    Mouse Hunt (1997)
1582 tt4501244    Why Him? (2016)
1583 tt1067583    Water for Elephants (2011)
1584 tt1055292    Life as We Know It (2010)
1585 tt0174856    The Hurricane (1999)
1586 tt1411250    Riddick (2013)
1587 tt0125022    Heartbreakers (2001)
1588 tt0278435    Enough (2002)
1589 tt2304933    The 5th Wave (2016)
1590 tt0465580    Push (2009)
1591 tt1979320    Rush (2013)
1592 tt0225071    Angel Eyes (2001)
1593 tt0279889    Joe Somebody (2001)
1594 tt0142

1775 tt1674784    Trespass (2011)
1776 tt0416496    Bandidas (2006)
1777 tt1321869    The Lovers (2015)
1778 tt           Black Water Transit (2008)
1779 tt1093357    The Darkest Hour (2011)
1780 tt3890160    Baby Driver (2017)
1781 tt1790864    The Maze Runner (2014)
1782 tt2726560    The Longest Ride (2015)
1783 tt0106226    The Age of Innocence (1993)
1784 tt0876563    Gake no ue no Ponyo (2009)
1785 tt4073790    The Darkest Minds (2018)
1786 tt0163579    Chill Factor (1999)
1787 tt0138304    The Astronaut's Wife (1999)
1788 tt0167203    I Dreamed of Africa (2000)
1789 tt3148834    Thunder and the House of Magic (2014)
1790 tt0082979    Reds (1981)
1791 tt0104257    A Few Good Men (1992)
1792 tt0208003    Big Momma's House (2000)
1793 tt0242445    Exit Wounds (2001)
1794 tt2823054    Mike and Dave Need Wedding Dates (2016)
1795 tt1800741    Step Up Revolution (2012)
1796 tt0417148    Snakes on a Plane (2006)
1797 tt0330793    The Punisher (2004)
1798 tt0204626    The Watcher (2000)


1975 tt2364841    Runner Runner (2013)
1976 tt0120008    The Replacement Killers (1998)
1977 tt4849438    Baahubali 2: The Conclusion (2017)
1978 tt0370032    Ultraviolet (2006)
1979 tt0395972    North Country (2005)
1980 tt1438176    Fright Night (2011)
1981 tt5109784    mother! (2017)
1982 tt2080374    Steve Jobs (2015)
1983 tt0271367    Eight Legged Freaks (2002)
1984 tt0116320    Fled (1996)
1985 tt1017460    Splice (2010)
1986 tt0411061    88 Minutes (2008)
1987 tt0327247    The Whole Ten Yards (2004)
1988 tt0091225    Howard the Duck (1986)
1989 tt0482572    Pride and Glory (2008)
1990 tt0402901    The Cave (2005)
1991 tt0335121    Godsend (2004)
1992 tt0318283    Alex & Emma (2003)
1993 tt0324554    Wicker Park (2004)
1994 tt0402399    The New World (2005)
1995 tt0765447    Evening (2007)
1996 tt1549920    The Last Stand (2013)
1997 tt0120710    In Dreams (1999)
1998 tt0131646    Wing Commander (1999)
1999 tt0218817    Antitrust (2001)
2000 tt2101341    Dead Man Down (2013)
2001

2173 tt0256524    The Curse of the Jade Scorpion (2001)
2174 tt1837703    The Fifth Estate (2013)
2175 tt0115710    Blood and Wine (1997)
2176 tt1747958    Blood Ties (2013)
2177 tt0119227    The Grimm Brothers' Snow White (1997)
2178 tt1224378    Viy (2015)
2179 tt1137470    Accidental Love (2015)
2180 tt2967008    The Face of an Angel (2015)
2181 tt0116322    Flipper (1996)
2182 tt0387131    The Constant Gardener (2005)
2183 tt1175491    W. (2008)
2184 tt0335345    The Passion of the Christ (2004)
2185 tt0107614    Mrs. Doubtfire (1993)
2186 tt0095953    Rain Man (1988)
2187 tt1454029    The Help (2011)
2188 tt4846340    Hidden Figures (2016)
2189 tt1205489    Gran Torino (2008)
2190 tt0936501    Taken (2009)
2191 tt1408253    Ride Along (2014)
2192 tt0103855    The Bodyguard (1992)
2193 tt0108052    Schindlers List (1993)
2194 tt0333780    Legally Blonde 2: Red, White & Blonde (2003)
2195 tt0331632    Scooby-Doo 2: Monsters Unleashed (2004)
2196 tt0357413    Anchorman: The Legend of

2371 tt0122459    Return to Me (2000)
2372 tt1234548    The Men Who Stare at Goats (2009)
2373 tt1007028    Zack and Miri Make a Porno (2008)
2374 tt0238948    Double Take (2001)
2375 tt0172493    Girl, Interrupted (1999)
2376 tt0171580    Nurse Betty (2000)
2377 tt0464154    Piranha 3D (2010)
2378 tt1433108    Faster (2010)
2379 tt0078504    The Wiz (1978)
2380 tt0335559    Win a Date with Tad Hamilton! (2004)
2381 tt0158811    Muppets From Space (1999)
2382 tt0217756    Ready to Rumble (2000)
2383 tt1742650    I Don't Know How She Does It (2011)
2384 tt0196857    Play it to the Bone (1999)
2385 tt1833673    Dhoom 3 (2013)
2386 tt0363473    Beyond the Sea (2004)
2387 tt0347149    Hauru no ugoku shiro (2005)
2388 tt0120645    Meet the Deedles (1998)
2389 tt0112389    The Thief and the Cobbler (1995)
2390 tt0356443    The Bridge of San Luis Rey (2005)
2391 tt0790665    Flood (2007)
2392 tt1156398    Zombieland (2009)
2393 tt0080684    Star Wars Ep. V: The Empire Strikes Back (1980)
2394

2572 tt3531824    Nerve (2016)
2573 tt0082198    Conan the Barbarian (1982)
2574 tt0120703    How Stella Got Her Groove Back (1998)
2575 tt0101452    Bill & Ted's Bogus Journey (1991)
2576 tt0483726    Man of the Year (2006)
2577 tt3231054    Risen (2016)
2578 tt1666186    Vampires Suck (2010)
2579 tt0078869    The Black Hole (1979)
2580 tt1179947    The American (2010)
2581 tt0120094    Selena (1997)
2582 tt1268799    A Very Harold & Kumar 3D Christmas (2011)
2583 tt0338013    Eternal Sunshine of the Spotless Mind (2004)
2584 tt1572315    Texas Chainsaw 3D (2013)
2585 tt0449467    Babel (2006)
2586 tt1371150    This is Where I Leave You (2014)
2587 tt0918927    Doubt (2008)
2588 tt0372588    Team America: World Police (2004)
2589 tt0112722    Copycat (1995)
2590 tt0795461    Scary Movie V (2013)
2591 tt0815236    She's Out of My League (2010)
2592 tt1013753    Milk (2008)
2593 tt0064782    Paint Your Wagon (1969)
2594 tt1226753    The Debt (2011)
2595 tt0288477    Ghost Ship (2002)
25

2773 tt0056264    Mutiny on The Bounty (1962)
2774 tt0245429    Sen to Chihiro no Kamikakushi (2002)
2775 tt1135985    Sex Drive (2008)
2776 tt0120670    Firestorm (1998)
2777 tt0810922    Take Me Home Tonight (2011)
2778 tt1870529    Won't Back Down (2012)
2779 tt6495770    Action Point (2018)
2780 tt0058085    The Fall of the Roman Empire (1964)
2781 tt0107719    The Nutcracker (1993)
2782 tt           George Balanchine\'s The Nutcracker (2011)
2783 tt0116745    Kansas City (1996)
2784 tt           Out of the Inferno (2016)
2785 tt0418819    George A. Romero's Land of the Dead (2005)
2786 tt           Indignes (2006)
2787 tt0384806    The Amityville Horror (2005)
2788 tt0268126    Adaptation (2002)
2789 tt1058017    The Invention of Lying (2009)
2790 tt0120669    Fear and Loathing in Las Vegas (1998)
2791 tt0362225    Ne le dis  personne (2008)
2792 tt           Left Behind (2001)
2793 tt0099810    The Hunt (None)
2794 tt2004420    Neighbors (2014)
2795 tt0443453    Borat (2006)
2796

2971 tt1265990    The Roommate (2011)
2972 tt0425430    The Messengers (2007)
2973 tt0250720    See Spot Run (2001)
2974 tt0101764    Double Impact (1991)
2975 tt0255819    Baby Boy (2001)
2976 tt0245686    Joe Dirt (2001)
2977 tt0430770    The Women (2008)
2978 tt0425112    Hot Fuzz (2007)
2979 tt0497465    Vicky Cristina Barcelona (2008)
2980 tt0204175    Boys and Girls (2000)
2981 tt7690670    Superfly (2018)
2982 tt0110413    Lon (1994)
2983 tt1403981    Remember Me (2010)
2984 tt0283139    White Oleander (2002)
2985 tt1131734    Jennifer's Body (2009)
2986 tt0186045    Drowning Mona (2000)
2987 tt0093818    Radio Days (1987)
2988 tt0319524    How to Deal (2003)
2989 tt2467046    Left Behind (2014)
2990 tt0367085    Soul Plane (2004)
2991 tt0095687    My Stepmother Is an Alien (1988)
2992 tt1716777    People Like Us (2012)
2993 tt0139809    The Thirteenth Floor (1999)
2994 tt0380277    The Cookout (2004)
2995 tt0079550    Meteor (1979)
2996 tt0091203    Highlander (1986)
2997 tt162

3173 tt0293416    Metropolis (2002)
3174 tt           Ck Ni Yinning (2015)
3175 tt0114660    Things to Do in Denver when You're Dead (1995)
3176 tt2032557    The Reluctant Fundamentalist (2013)
3177 tt0167260    The Return (2004)
3178 tt0252299    Buffalo Soldiers (2003)
3179 tt           Steal (Canadian Release) (2003)
3180 tt1020558    Centurion (2010)
3181 tt0785035    Ong-Bak 2 (2009)
3182 tt0117653    Silent Trigger (1996)
3183 tt0805570    Midnight Meat Train (2008)
3184 tt1551641    Winnie Mandela (2013)
3185 tt1535612    The Son of No One (2011)
3186 tt           All the Queen's Men (2002)
3187 tt0484111    The Good Night (2007)
3188 tt1487931    Khumba (2013)
3189 tt1640711    A Few Best Men (2012)
3190 tt1971325    Autmata (2014)
3191 tt3856124    Chiamatemi Francesco - Il papa della gente (2015)
3192 tt0469640    Bathory (2008)
3193 tt           San suk si gin (2010)
3194 tt0406728    Dungeons & Dragons 2: The Elemental Might (2005)
3195 tt4411618    In Dubious Battle (2017)

3373 tt           A Little Bit of Heaven (2012)
3374 tt2857458    Mortadelo y Filemn contra Jimmy el Cachondo (2014)
3375 tt0095489    The Land Before Time (1988)
3376 tt0073195    Jaws (1975)
3377 tt0070047    The Exorcist (1973)
3378 tt2582846    The Fault in Our Stars (2014)
3379 tt0163651    American Pie (1999)
3380 tt1621045    Think Like a Man (2012)
3381 tt1929263    Heaven is for Real (2014)
3382 tt0091129    The Golden Child (1986)
3383 tt0084726    Star Trek II: The Wrath of Khan (1982)
3384 tt0303714    Barbershop (2002)
3385 tt0086567    WarGames (1983)
3386 tt0109040    Ace Ventura: Pet Detective (1994)
3387 tt1591479    Act of Valor (2012)
3388 tt0090329    Witness (1985)
3389 tt0462590    Step Up (2006)
3390 tt1706593    Chronicle (2012)
3391 tt0115641    Beavis and Butt-Head Do America (1996)
3392 tt0066206    Patton (1970)
3393 tt1636826    Project X (2012)
3394 tt5027774    Three Billboards Outside Ebbing, Missouri (2017)
3395 tt3741834    Lion (2016)
3396 tt1563742  

3575 tt0473700    September Dawn (2007)
3576 tt0077651    Halloween (2018)
3577 tt0119217    Good Will Hunting (1997)
3578 tt0391198    The Grudge (2004)
3579 tt4160708    Dont Breathe (2016)
3580 tt0083131    Stripes (1981)
3581 tt0489270    Saw III (2006)
3582 tt4094724    The Purge: Election Year (2016)
3583 tt6857166    Book Club (2018)
3584 tt0204946    Bring it On (2000)
3585 tt5726086    Insidious: The Last Key (2018)
3586 tt0160862    She's All That (1999)
3587 tt0890870    Saw IV (2007)
3588 tt0455612    Madea's Family Reunion (2006)
3589 tt0375210    White Noise (2005)
3590 tt0090863    The Color of Money (1986)
3591 tt3195644    Insidious Chapter 3 (2015)
3592 tt0104868    The Mighty Ducks (1992)
3593 tt4925292    Lady Bird (2017)
3594 tt0929632    Precious (Based on the Novel Push by Sapphire) (2009)
3595 tt1412386    The Best Exotic Marigold Hotel (2012)
3596 tt7784604    Hereditary (2018)
3597 tt5164432    Love, Simon (2018)
3598 tt0096928    Bill & Ted's Excellent Advent

3774 tt           The Oxford Murders (2010)
3775 tt3202120    Barbecue (2014)
3776 tt1320291    The Reef (2010)
3777 tt           White Noise 2: The Light (2008)
3778 tt2300975    Jessabelle (2014)
3779 tt2923316    American Heist (2015)
3780 tt1319716    It's a Wonderful Afterlife (2010)
3781 tt1568139    You Got Served: Beat The World (2011)
3782 tt1097643    Fifty Dead Men Walking (2009)
3783 tt2556874    Plastic (2014)
3784 tt           Jungle Shuffle (2015)
3785 tt1712192    Message from the King (2017)
3786 tt1725969    Ethel & Ernest (2016)
3787 tt0479341    Adam Resurrected (2009)
3788 tt1147687    The Devil's Tomb (2009)
3789 tt           Good Intentions (2009)
3790 tt0213985    Partition (2007)
3791 tt1701990    Detention (2012)
3792 tt1913166    Nurse 3D (2014)
3793 tt1694021    Grizzly (2015)
3794 tt3352390    Friend Request (2016)
3795 tt1376195    Gunless (2010)
3796 tt1091722    Adventureland (2009)
3797 tt1602620    Amour (2012)
3798 tt0343996    The Lost City (2006)
37

3976 tt0119494    The Land Girls (1998)
3977 tt           The Wendell Baker Story (2007)
3978 tt1235189    Wild Target (2010)
3979 tt0964539    Pathology (2008)
3980 tt1181614    Wuthering Heights (2012)
3981 tt3606888    A Street Cat Named Bob (2016)
3982 tt0360323    10th & Wolf (2006)
3983 tt2494384    Aloft (2015)
3984 tt0961108    Fireflies in the Garden (2011)
3985 tt0342272    Dear Wendy (2005)
3986 tt           Akira (2001)
3987 tt0475783    Welcome to Dongmakgol (2005)
3988 tt2102502    Feichang Xingyun (2013)
3989 tt4057916    Don Gato, el inicio de la pandilla (2015)
3990 tt0473188    The Death and Life of Bobby Z (2007)
3991 tt5153288    Backstabbing For Beginners (2018)
3992 tt2112277    Swelter (2014)
3993 tt           Maurice Richard (2007)
3994 tt2628316    Green Street Hooligans: Underground (2015)
3995 tt           Christmas in Beverly Hills (2009)
3996 tt           Henry Joseph Church (2015)
3997 tt4060866    Code of Honor (2016)
3998 tt4882548    Burn Your Maps (201

4176 tt0250274    The Brothers (2001)
4177 tt1125849    The Wrestler (2008)
4178 tt0097216    Do the Right Thing (1989)
4179 tt0082340    Escape from New York (1981)
4180 tt0161100    The Wood (1999)
4181 tt0114814    The Usual Suspects (1995)
4182 tt0097981    A Nightmare On Elm Street: The Dream Child (1989)
4183 tt0283111    National Lampoons Van Wilder (2002)
4184 tt0038499    Duel in the Sun (1946)
4185 tt9024106    Unplanned (2019)
4186 tt0218839    Best in Show (2000)
4187 tt0455590    The Last King of Scotland (2006)
4188 tt0310281    A Mighty Wind (2003)
4189 tt4504044    The Prodigy (2019)
4190 tt5734576    The Possession of Hannah Grace (2018)
4191 tt0096054    School Daze (1988)
4192 tt0462244    Daddy Day Camp (2007)
4193 tt0095690    Mystic Pizza (1988)
4194 tt0117786    Mr. Nice Guy (1998)
4195 tt0049934    War and Peace (1956)
4196 tt0120148    Sliding Doors (1998)
4197 tt0114609    Tales from the Hood (1995)
4198 tt0097474    Halloween 5: The Revenge of Michael Myers (

4374 tt0053290    Solomon and Sheba (1959)
4375 tt0083630    The Beastmaster (1982)
4376 tt0795438    Not Easily Broken (2009)
4377 tt3014666    Moms Night Out (2014)
4378 tt1082807    The Belko Experiment (2017)
4379 tt4871980    The Perfect Match (2016)
4380 tt0259974    Digimon: The Movie (2000)
4381 tt0332375    Saved! (2004)
4382 tt0338135    Les invasions barbares (2003)
4383 tt0075147    Robin and Marian (1976)
4384 tt0077572    Force 10 from Navarone (1978)
4385 tt0245120    The Forsaken (2001)
4386 tt8632862    Fahrenheit 11/9 (2018)
4387 tt0098546    UHF (1989)
4388 tt1529567    Sea Rex 3D: Journey to a Prehistoric World (2010)
4389 tt0456554    Grandmas Boy (2006)
4390 tt0120831    Slums of Beverly Hills (1998)
4391 tt0064116    Once Upon a Time in the West (1969)
4392 tt0227005    Made (2001)
4393 tt           La mala educacin (2004)
4394 tt1182345    Moon (2009)
4395 tt4337690    90 Minutes in Heaven (2015)
4396 tt3216348    Incarnate (2016)
4397 tt0415949    Keeping Up wi

4575 tt0188863    Love Stinks (1999)
4576 tt0770802    Samsara (2012)
4577 tt0796375    You Kill Me (2007)
4578 tt           Cafarnam (2018)
4579 tt1645080    The Art of Getting By (2011)
4580 tt0318761    Thumbsucker (2005)
4581 tt0873886    Red State (2011)
4582 tt0366780    MirrorMask (2005)
4583 tt0092615    The Barbarians (1987)
4584 tt2076220    Holy Motors (2012)
4585 tt2910814    The Signal (2014)
4586 tt0273982    Poolhall Junkies (2003)
4587 tt0126859    The Loss of Sexual Innocence (1999)
4588 tt1839642    The Face of Love (2013)
4589 tt2382396    Joe (2014)
4590 tt0095904    Prison (1988)
4591 tt0120122    Shooting Fish (1998)
4592 tt1074929    Adoration (2009)
4593 tt0206226    Psycho Beach Party (2000)
4594 tt           The Big Tease (2000)
4595 tt4383288    Polina danser sa vie (2016)
4596 tt2403393    Desert Dancer (2014)
4597 tt2876428    Guten Tag, Ramon (2015)
4598 tt0319769    Mondays in the Sun (2003)
4599 tt2893490    Manglehorn (2015)
4600 tt           Tau ming c

4778 tt0108517    Warlock: The Armageddon (1993)
4779 tt0118541    8 Heads in a Duffel Bag (1997)
4780 tt0077405    Days of Heaven (1978)
4781 tt0268690    Thirteen Conversations About One Thing (2002)
4782 tt3367294    Compadres (2016)
4783 tt0155776    Jawbreaker (1999)
4784 tt0115632    Basquiat (1996)
4785 tt0468565    Tsotsi (2006)
4786 tt1462054    Letters to God (2010)
4787 tt0147612    Happiness (1998)
4788 tt0337996    DysFunkTional Family (2003)
4789 tt3099498    Tusk (2014)
4790 tt4218696    The Wall (2017)
4791 tt0363589    Elephant (2003)
4792 tt1667307    Damsels in Distress (2012)
4793 tt0364569    Oldboy (2003)
4794 tt1640459    Hobo with a Shotgun (2011)
4795 tt2690138    The D Train (2015)
4796 tt1920849    Bachelorette (2012)
4797 tt1232776    Fish Tank (2010)
4798 tt           Everybody Wants to Be Italian (2008)
4799 tt1686018    Creature (2011)
4800 tt3253232    The Bounce Back (2016)
4801 tt0116361    Freeway (1996)
4802 tt0785025    Love's Abiding Joy (2006)
480

4977 tt0379557    Touching the Void (2004)
4978 tt1645089    Inside Job (2010)
4979 tt0029852    Alexander's Ragtime Band (1938)
4980 tt0415978    Me and You and Everyone We Know (2005)
4981 tt3787590    We Are Your Friends (2015)
4982 tt0433387    Harsh Times (2006)
4983 tt0165798    Ghost Dog: The Way of the Samurai (2000)
4984 tt2326612    Captive (2015)
4985 tt0290212    Full Frontal (2002)
4986 tt7158430    Hearts Beat Loud (2018)
4987 tt5816564    The Resurrection of Gavin Stone (2017)
4988 tt1185616    Waltz with Bashir (2008)
4989 tt0369994    Strangers with Candy (2006)
4990 tt0102536    Night on Earth (1992)
4991 tt0107668    Nemesis (1993)
4992 tt0367027    Shortbus (2006)
4993 tt           Son of Rambow: A Home Movie (2008)
4994 tt0349159    The Book of Mormon Movie, Volume 1: The Journey (2003)
4995 tt0171359    Hamlet (2000)
4996 tt3172532    The Diary of a Teenage Girl (2015)
4997 tt0912593    No End In Sight (2007)
4998 tt2692904    Locke (2014)
4999 tt0162973    Get Re

5176 tt           Mutant World (2015)
5177 tt3526286    #Horror (2015)
5178 tt3781616    Checkmate (2015)
5179 tt0029583    Snow White and the Seven Dwarfs (1937)
5180 tt0063592    Shalako (1968)
5181 tt4285496    El abrazo de la serpiente (2016)
5182 tt5311542    Werk ohne Autor (2018)
5183 tt3966404    Mustang (2015)
5184 tt0300270    The Holy Girl (2005)
5185 tt3230082    La chambre bleue (2014)
5186 tt0374639    Incident at Loch Ness (2004)
5187 tt1480658    Eddie: The Sleepwalking Cannibal (2013)
5188 tt1447793    My Girlfriend's Boyfriend (2010)
5189 tt2640460    Kurmanjan datka (2014)
5190 tt0464054    House at the End of the Drive (2014)
5191 tt0060153    Batman - The Movie (1966)
5192 tt0120735    Lock, Stock and Two Smoking Barrels (1999)
5193 tt3551840    The Ballad of Gregorio Cortez (1983)
5194 tt0028216    San Francisco (1936)
5195 tt0154420    Festen (1998)
5196 tt0117958    Trees Lounge (1996)
5197 tt1432078    The Girlfriend Experience (2009)
5198 tt0433398    Journey 

5372 tt           The Poker House (2009)
5373 tt0448090    Proud (2005)
5374 tt           Steppin: The Movie (2008)
5375 tt1134674    Zombies of Mass Destruction (2010)
5376 tt2149137    Snow White: A Deadly Summer (2012)
5377 tt1838722    Truth or Die (2012)
5378 tt1930294    Black Rock (2013)
5379 tt3289362    Hidden Away (2014)
5380 tt           My Last Day Without You (2013)
5381 tt2446502    Zombie Hunter (2013)
5382 tt3359872    Doc Holliday's Revenge (2014)
5383 tt1604100    A Fine Step (2014)
5384 tt2165765    Fear Clinic (2015)
5385 tt4676372    The Pet (2015)
5386 tt2655734    Bang Bang Baby (2014)
5387 tt5022702    Hush (2016)
5388 tt           1982 (2016)
5389 tt3034146    The Horror Network (2015)
5390 tt3406296    Chemical Cut (2016)
5391 tt           Por amor en el caserio (2015)
5392 tt8361028    Cam (2018)
5393 tt8327962    Alien Psychosis (2018)
5394 tt0825346    And Then Came Love (2007)
5395 tt0424136    Hard Candy (2006)
5396 tt0330136    Charly (2002)
5397 tt20112

5570 tt2112209    Mutual Friends (2014)
5571 tt4273494    Rise Of the Entrepreneur - The Search For A Bet… (2014)
5572 tt0024034    42nd Street (1933)
5573 tt0100142    Metropolitan (1990)
5574 tt0456149    Moartea domnului Lazarescu (2006)
5575 tt           Gory Gory Hallelujah (2005)
5576 tt3843282    Twenty-Two (2017)
5577 tt2359024    Blue Ruin (2014)
5578 tt0374900    Napoleon Dynamite (2004)
5579 tt0071853    Monty Python and the Holy Grail (1975)
5580 tt0451176    Quinceanera (2006)
5581 tt           Dogtown and Z-Boys (2002)
5582 tt1259998    Heroes (2008)
5583 tt           Tarnation (2004)
5584 tt1560957    I Want Your Money (2010)
5585 tt0082307    E tu vivrai nel terrore - L'aldil (1983)
5586 tt0079302    Home Movies (1980)
5587 tt0261755    Jackpot (2001)
5588 tt           Fabled (2004)
5589 tt0402249    The Dark Hours (2005)
5590 tt           Viskningar och rop (1973)
5591 tt0091578    My Beautiful Laundrette (1986)
5592 tt           Show Me (2005)
5593 tt3867652    Pancak

5767 tt3109200    Dude, Where's My Dog (2014)
5768 tt2343473    Echo Dr. (2014)
5769 tt           Closure (2015)
5770 tt           Lunchtime Heroes (2015)
5771 tt3677412    Open Secret (2015)
5772 tt2797642    The Night Visitor (2015)
5773 tt           Tiger Orange (2015)
5774 tt0068833    The Last House on the Left (1972)
5775 tt3470600    Sing (None)
5776 tt           The Foot Fist Way (2008)
5777 tt           Dawn of the Crescent Moon (2015)
5778 tt2319456    Queen Crab (2015)
5779 tt2955096    Happy Christmas (2014)
5780 tt0428959    Peace, Propaganda and the Promised Land (2005)
5781 tt0431830    Absentia (2013)
5782 tt0138704    Pi (1998)
5783 tt           I Love You  Don't Touch Me! (1998)
5784 tt0138987    20 Dates (1999)
5785 tt0390521    Super Size Me (2004)
5786 tt           Supporting Characters (2013)
5787 tt           The FP (2012)
5788 tt3672742    Turbo Kid (2015)
5789 tt1861343    Hayride (2012)
5790 tt0052646    The Brain That Wouldn't Die (1962)
5791 tt2334896    The

In [108]:
def rectify_the_numbers():
    import csv
    import json
    import os
    import pandas as pd
    import re
    
    def rectify_single(the_numbers, output_dir):
        single_columns = \
        ['release_date', 'release_date_norm', 'title', 'title_norm', 'mpaa-rating', 
         'mpaa-rating_norm', 'running-time', 'source', 'source_norm', 'genre', 
         'genre_norm', 'production_method', 'production_method_norm', 
         'creative_type', 'creative_type_norm', 'production_budget', 'domestic_gross', 
         'worldwide_gross', 'imdb_id']
        
        with open(os.path.join(output_dir, 'the_numbers_rectified.csv'), 'w', newline='') as outfile:
            header_writer = csv.writer(outfile)
            header_writer.writerow(columns)
        i = 0
        for entry in the_numbers:
            rectified_entry = dict()
            for key, value in entry.items():
                if key is None:
                    continue
                if type(value) == dict and 'slug' in value and 'pretty' in value:
                    rectified_entry[key]  = value['pretty']
                    rectified_entry[key + '_norm'] = value['slug']
                else:
                    rectified_entry[key] = value
            with open('output/the_numbers_rectified.csv', 'a', newline='') as outfile:
                row_writer = csv.DictWriter(outfile, columns)
                row_writer.writerow(rectified_entry)
            if rectified_entry['release_date_norm'] is not None:
                result = re.search('^(\d{4}).*', rectified_entry['release_date_norm'])
                year = result.group(1) if result is not None else None
            log = '{:0>4} {}'.format(i, rectified_entry['title'])
            if year is not None:
                log += ' ({})'.format(year)
            print(log)
            i += 1
    
    def rectify_lst(df, imdb_id, lst):
        return pd.concat([pd.DataFrame([imdb_id, el], columns=['imdb']) for el in lst])
    
    def rectify_lsts(the_numbers, output_dir):
        lst_columns = ['franchises', 'keywords', 'production_companies', 'production_countries', 'languages']
        pretty_columns = [(col, 'pretty', col) for col in lst_columns]
        slug_columns = [(col, 'slug', col + '_norm') for col in lst_columns]
        all_columns = pretty_columns + slug_columns
        dataframe_dict = {dest_col: pd.DataFrame([], columns=['imdb_id', dest_col]) 
                          for source_col, specifier, dest_col in all_columns}
        for entry in the_numbers:
            if 'imdb_id' not in entry:
                continue
            imdb_id = entry['imdb_id'] 
            for source_col, specifier, dest_col in all_columns:
                if source_col in entry:
                    # print(source_col, ' ', specifier, ' ', dest_col)
                    # dataframe_dict[dest_col] = dataframe_dict[dest_col].append(pd.concat([pd.DataFrame([imdb_id, el], columns=dataframe_dict[dest_col].columns) for el in entry[source_col][specifier]]))
                    dataframe_dict[dest_col].append(pd.concat([pd.DataFrame([[imdb_id, el[specifier]]], columns=dataframe_dict[dest_col].columns) for el in entry[source_col]]))
        display(dataframe_dict['franchises'])
        
    def rectify():
        with open('output/the_numbers_norm.json', 'r') as infile:
            the_numbers = json.load(infile)
        output_dir = os.path.join('output', 'rectified')
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        # rectify_single(the_numbers, output_dir)
        rectify_lsts(the_numbers, output_dir)
        
    rectify()
    
rectify_the_numbers()

TypeError: 'NoneType' object is not iterable

In [94]:
def merge_data():
    import pandas as pd
    
    df_movie_ratings = pd.read_csv('output/norm_movie_ratings.csv', encoding = "ISO-8859-1")
    df_the_numbers = pd.read_csv('output/the_numbers_rectified.csv', encoding = "ISO-8859-1")
    display(df_movie_ratings.head())
    display(df_the_numbers.head())
    df_merged = pd.merge(df_movie_ratings, df_the_numbers, left_on='imdb_ids', right_on='imdb_id')
    df_merged.to_csv('output/merged.csv', index=False)
    
merge_data()

Unnamed: 0.1,Unnamed: 0,imdb_ids,movie,year,imdb,metascore,votes
0,0,26029,The 39 Steps,1935,77,,47653
1,1,26138,Bride of Frankenstein,1935,78,,40030
2,2,26778,A Night at the Opera,1935,79,,28959
3,3,26752,Mutiny on the Bounty,1935,77,87.0,19293
4,4,27125,Top Hat,1935,78,,16019


Unnamed: 0,release_date,release_date_norm,title,title_norm,mpaa-rating,mpaa-rating_norm,running-time,franchises,franchises_norm,keywords,...,production_companies,production_companies_norm,production_countries,production_countries_norm,languages,languages_norm,production_budget,domestic_gross,worldwide_gross,imdb_id
0,"Dec 17, 2009",2009/12/17,Avatar,Avatar,PG-13,PG-13-(US),162.0,Avatar,Avatar,3-D;Invented Language;Visual Effects;Alien Inv...,...,Dune Entertainment;20th Century Fox;Ingenious ...,Dune-Entertainment;20th-Century-Fox;Ingenious-...,United States,United-States,English;Na'vi,English;Navi,425000000,760507625,2789705275,499549.0
1,"May 20, 2011",2011/05/20,Pirates of the Caribbean: On Stranger Tides,Pirates-of-the-Caribbean-On-Stranger-Tides,PG-13,PG-13-(US),136.0,Pirates of the Caribbean,Pirates-of-the-Caribbean,Pirates;Zombies;Visual Effects;Romance;Epilogu...,...,Walt Disney Pictures,Walt-Disney-Pictures,United States,United-States,English,English,410600000,241063875,1045663875,1298650.0
2,"Apr 23, 2019",2019/04/23,Avengers: Endgame,Avengers-Endgame-(2019),PG-13,PG-13-(US),181.0,Marvel Cinematic Universe;Avengers,Marvel-Cinematic-Universe;Avengers,Ensemble;Marvel Comics;Animal Lead;Non-Chronol...,...,Marvel Studios,Marvel-Studios,United States,United-States,English,English,400000000,858373000,2795473000,4154796.0
3,"Apr 22, 2015",2015/04/22,Avengers: Age of Ultron,Avengers-Age-of-Ultron,PG-13,PG-13-(US),141.0,Marvel Cinematic Universe;Avengers,Marvel-Cinematic-Universe;Avengers,Marvel Comics;Robot;End of the World;Hallucina...,...,Marvel Studios,Marvel-Studios,United States,United-States,English,English,330600000,459005868,1403013963,2395427.0
4,"Dec 13, 2017",2017/12/13,Star Wars Ep. VIII: The Last Jedi,Star-Wars-Ep-VIII-The-Last-Jedi,PG-13,PG-13-(US),150.0,Star Wars,Star-Wars,Space Opera;Robot;Good vs. Evil;Outer Space;Dy...,...,Lucasfilm;Walt Disney Pictures,Lucasfilm;Walt-Disney-Pictures,United States,United-States,English,English,317000000,620181382,1316721747,2527336.0
