In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException

import numpy as np
import pandas as pd
import warnings
import unidecode


warnings.filterwarnings('ignore')
import time, sys

def timeit(method):
    """
    Decorator: Compute the execution time of a function
    :param method: the function
    :return: the method runtime
    """

    def timed(*arguments, **kw):
        ts = time.time()
        result = method(*arguments, **kw)
        te = time.time()

        sys.stdout.write('Time:  %r %2.2f sec\n' % (method.__name__.strip("_"), te - ts))
        sys.stdout.write('------------------------------------\n')
        sys.stdout.flush()
        return result

    return timed

In [2]:
def remove_non_ascii(text):

    return ''.join(i for i in text if ord(i)<128)

In [3]:
wrong_list = []

In [4]:
driver = webdriver.Chrome('/Users/federico/Documents/datascience/jupyter/chromedriver')

In [5]:
@timeit
def get_games(games_url):
    game_list = []
    
    driver.get(games_url)
    games = driver.find_elements_by_xpath('//td[@class="name"]')
    for p in range(len(games)):
        game_list.append(games[p].text.split('\n')[0])

    return game_list

In [6]:
@timeit
def get_gametable_data(system_url, game_list):
    dev_list = []
    gen_list = []
    ser_list = []

    for game in game_list:
        driver.get(system_url)
        try:
            driver.find_element_by_link_text(game).click()
            gametable = driver.find_elements_by_xpath('//table[@class="gameDataTable"]')

            st = gametable[0].text.split('\n')
            try:
                dev_list.append(' '.join(st[1].split()[1:]))
            except IndexError:
                dev_list.append(None)
            try:
                gen_list.append(' '.join(st[2].split()[1:]))
            except IndexError:
                gen_list.append(None)
            try:
                ser_list.append(' '.join(st[3].split()[1:]))
            except IndexError:
                ser_list.append(None)
        except NoSuchElementException:
            dev_list.append(None)
            gen_list.append(None)
            ser_list.append(None)
    return dev_list, gen_list, ser_list
    

In [7]:
@timeit
def get_releasetable_data(system_url, game_list, region):
    jap_name = []
    jap_date = []
    us_name = []
    us_date = []
    eu_name = []
    eu_date = []


    for game in game_list:
        if game == 'Family BASIC':
            jap_name.append('Family BASIC')
            jap_date.append('21 Jun 1984')
            us_name.append(None)
            us_date.append(None)
            eu_name.append(None)
            eu_date.append(None)
            continue
        if game == 'Family BASIC V3':
            jap_name.append('Family BASIC V3')
            jap_date.append('21 Feb 1985')
            us_name.append(None)
            us_date.append(None)
            eu_name.append(None)
            eu_date.append(None)
            continue
            
        driver.get(system_url)
        try:
            driver.find_element_by_link_text(game).click()
            release_table = driver.find_elements_by_xpath('//table[@class="gameRelease"]')[0]

            release_table = release_table.text.split('\n')[1:]
            release_flags = driver.find_elements_by_xpath('//td[@class="flag"]')
            if len(release_flags) > 3:
                release_flags = release_flags[:3]

            countries = []

            for flag in release_flags:
                country = flag.find_element_by_tag_name('img').get_attribute('src')[-6:-4]
                countries.append(country)

                
            if len(countries) == 1:
                if countries[0] == 'jp':
                    if len(release_table) % 2 != 0:
                        jap_name.append(release_table[1])
                        jap_date.append(release_table[2])
                        us_name.append(None)
                        us_date.append(None)
                        eu_name.append(None)
                        eu_date.append(None)
                    else: #casi F-Zero
                        jap_name.append(release_table[0])
                        jap_date.append(release_table[1])
                        us_name.append(None)
                        us_date.append(None)
                        eu_name.append(None)
                        eu_date.append(None)                        

                elif countries[0] == 'us':
                    jap_name.append(None)
                    jap_date.append(None)
                    us_name.append(release_table[0])
                    us_date.append(release_table[1])
                    eu_name.append(None)
                    eu_date.append(None)
                elif countries[0] == 'eu':
                    jap_name.append(None)
                    jap_date.append(None)
                    us_name.append(None)
                    us_date.append(None)
                    eu_name.append(release_table[0])
                    eu_date.append(release_table[1])


            elif len(countries) == 2:
                if len(release_table) % 2 != 0:
                    if countries[0] == 'jp':
                        jap_name.append(release_table[1])
                        jap_date.append(release_table[2])
                        if countries[1] == 'us':
                            us_name.append(release_table[3])
                            us_date.append(release_table[4])
                            eu_name.append(None)
                            eu_date.append(None)
                        else:
                            eu_name.append(release_table[3])
                            eu_date.append(release_table[4])
                            us_name.append(None)
                            us_date.append(None)
                else: #casi F-Zero
                    if countries[0] == 'jp':
                        jap_name.append(release_table[0])
                        jap_date.append(release_table[1])
                        if countries[1] == 'us':
                            us_name.append(release_table[2])
                            us_date.append(release_table[3])
                            eu_name.append(None)
                            eu_date.append(None)
                        else:
                            eu_name.append(release_table[2])
                            eu_date.append(release_table[3])
                            us_name.append(None)
                            us_date.append(None)                    


                if countries[0] == 'us':
                    us_name.append(release_table[0])
                    us_date.append(release_table[1])

                    if countries[1] == 'jp':
                        if len(release_table) % 2 != 0:                        
                            jap_name.append(release_table[3])

                            jap_date.append(release_table[4])
                            eu_name.append(None)
                            eu_date.append(None)
                        else:
                            jap_name.append(release_table[2])

                            jap_date.append(release_table[3])
                            eu_name.append(None)
                            eu_date.append(None)                            
                    else:
                        eu_name.append(release_table[2])
                        eu_date.append(release_table[3])
                        jap_name.append(None)
                        jap_date.append(None)

                if countries[0] == 'eu':
                    eu_name.append(release_table[0])
                    eu_date.append(release_table[1])
                    if countries[1] == 'jp':
                        if len(release_table) % 2 != 0:                        
                            jap_name.append(release_table[3])

                            jap_date.append(release_table[4])
                            eu_name.append(None)
                            eu_date.append(None)
                        else:
                            jap_name.append(release_table[2])

                            jap_date.append(release_table[3])
                            eu_name.append(None)
                            eu_date.append(None)                
                    else:
                        us_name.append(release_table[2])
                        us_date.append(release_table[3])
                        jap_name.append(None)
                        jap_date.append(None)


            elif len(countries) == 3:
                if len(release_table) % 2 != 0:                        
                    if countries[0] == 'jp':
                        jap_name.append(release_table[1])
                        jap_date.append(release_table[2])

                        if countries[1] == 'us':
                            us_name.append(release_table[3])
                            us_date.append(release_table[4])
                            eu_name.append(release_table[5])
                            eu_date.append(release_table[6])
                        else:
                            eu_name.append(release_table[3])
                            eu_date.append(release_table[4])
                            us_name.append(release_table[5])

                            us_date.append(release_table[6])


                    elif countries[0] == 'us':
                        us_name.append(release_table[0])

                        us_date.append(release_table[1])

                        if countries[1] == 'jp':
                            jap_name.append(release_table[3])
                            jap_date.append(release_table[4])
                            eu_name.append(release_table[5])
                            eu_date.append(release_table[6])

                        else:
                            eu_name.append(release_table[2])

                            eu_date.append(release_table[3])

                            jap_name.append(release_table[5])

                            jap_date.append(release_table[6])


                    elif countries[0] == 'eu':
                        eu_name.append(release_table[0])

                        eu_date.append(release_table[1])

                        if countries[1] == 'jp':
                            jap_name.append(release_table[3])

                            jap_date.append(release_table[4])
                            us_name.append(release_table[5])

                            us_date.append(release_table[6])

                        else:
                            us_name.append(release_table[2])
                            us_date.append(release_table[3])
                            jap_name.append(release_table[5])
                            jap_date.append(release_table[6])
                            
                else: #casi F-Zero                        
                    if countries[0] == 'jp':
                        jap_name.append(release_table[0])
                        jap_date.append(release_table[1])

                        if countries[1] == 'us':
                            us_name.append(release_table[2])
                            us_date.append(release_table[3])
                            eu_name.append(release_table[4])
                            eu_date.append(release_table[5])
                        else:
                            eu_name.append(release_table[2])
                            eu_date.append(release_table[3])
                            us_name.append(release_table[4])

                            us_date.append(release_table[5])


                    elif countries[0] == 'us':
                        us_name.append(release_table[0])

                        us_date.append(release_table[1])

                        if countries[1] == 'jp':
                            jap_name.append(release_table[2])
                            jap_date.append(release_table[3])
                            eu_name.append(release_table[4])
                            eu_date.append(release_table[5])

                        else:
                            eu_name.append(release_table[2])

                            eu_date.append(release_table[3])

                            jap_name.append(release_table[4])

                            jap_date.append(release_table[5])


                    elif countries[0] == 'eu':
                        eu_name.append(release_table[0])

                        eu_date.append(release_table[1])

                        if countries[1] == 'jp':
                            jap_name.append(release_table[2])

                            jap_date.append(release_table[3])
                            us_name.append(release_table[4])

                            us_date.append(release_table[5])

                        else:
                            us_name.append(release_table[2])
                            us_date.append(release_table[3])
                            jap_name.append(release_table[4])
                            jap_date.append(release_table[6])

###
                            
        except (IndexError, NoSuchElementException):
            
            wrong_list.append(game)
        
            if region == 'jp':      
                jap_name.append(game)
                jap_date.append(None)
                us_name.append(None)
                us_date.append(None)
                eu_name.append(None)
                eu_date.append(None)
                print("Something wrong with game:", game)
            elif region == 'us':
                jap_name.append(None)
                jap_date.append(None)
                us_name.append(game)
                us_date.append(None)
                eu_name.append(None)
                eu_date.append(None)
                print("Something wrong with game:", game)
            elif region == 'eu':
                jap_name.append(None)
                jap_date.append(None)
                us_name.append(None)
                us_date.append(None)
                eu_name.append(game)
                eu_date.append(None)
                print("Something wrong with game:", game)
            
                
    return jap_name, jap_date, us_name, us_date, eu_name, eu_date

In [8]:
#@timeit

def getDev (game, df, w):
    roles = ['Executive', 'Sub', 'Design', 'Level', 'Programming', 'Assistant', 'Associate', 
             'Program', 'General', 'Chief']
    producer_flag = False
    director_flag = False
    
    title = remove_non_ascii(game)
            
    if title.startswith("the-"):
        title = title[4:]
                              
    print ("Mobygames:", title)
    driver.get("https://www.mobygames.com/game/nes/" + title + "/credits")
    devs = driver.find_elements_by_xpath('//tr[@class="crln"]')
    for d in devs:
        
        if producer_flag == False:
            role_check = True
            if 'Producer' in d.text:
                for role in roles:
                    if role in d.text:
                        role_check = False
                if role_check == True:
                    df['Producer'][w] = str(d.text).replace('Producers', '').replace('Producer', "").replace('Lead', "")
                    producer_flag = True
                    
        if director_flag == False:
            role_check = True
            if 'Director' in d.text:
                for role in roles:
                    if role in d.text:
                        role_check = False
                if role_check == True:
                    df['Director'][w] = str(d.text).replace('Game', '').replace('Directors', '').replace('Director', "").replace('Lead', "")
                    director_flag = True
                    
        if producer_flag == True and director_flag == True:
            break
    
    if producer_flag == False:
        driver.get("http://kyoto-report.wikidot.com/" + game)

        devs = driver.find_elements_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div[3]/table[2]/tbody/tr/td[2]/p[1]')
        
        if len(devs) > 0:
            producer_list = str(devs[0].text).split('\n⌂ ')
            producer_value = ""
            for k in range(1,len(producer_list)-1):
                producer_value = producer_value + producer_list[k].split('\n')[0] + ", "
            producer_value = producer_value + producer_list[len(producer_list)-1].split('\n')[0] 
            df['Producer'][w] = producer_value
            
            if len(producer_list) == 2:
                producer_value = producer_list[1].split('\n')[0]
            producer_flag = True
    
    if producer_flag == False:
        driver.get("http://kyoto-report.wikidot.com/" + game + "-credits")
        devs = driver.find_elements_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div[3]/p[2]')
        
        if len(devs) > 0:
            producer_list = devs[0].text.split('\n⌂ ')
            producer_value = ""

            for k in range(1,len(producer_list)-1):
                producer_value = producer_value + producer_list[k].split('\n')[0] + ", "
                producer_value = producer_value + producer_list[len(producer_list)-1].split('\n')[0] 
                
            if len(producer_list) == 2:
                producer_value = producer_list[1].split('\n')[0]
            
            df['Producer'][w] = producer_value
            producer_flag = True

    if director_flag == False:
        driver.get("http://kyoto-report.wikidot.com/" + game)

        devs = driver.find_elements_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div[3]/table[2]/tbody/tr/td[1]/p[1]')
        
        if len(devs) > 0:

            director_list = str(devs[0].text).split('\n⌂ ')
            director_value = ""
            for j in range(1,len(director_list)-1):
                director_value = director_value + director_list[j].split('\n')[0] + ", "
            director_value = director_value + director_list[len(director_list)-1].split('\n')[0] 
            if len(director_list) == 2:
                director_value = director_list[1].split('\n')[0]
            df['Director'][w] = director_value

            director_flag = True
        
    if director_flag == False:
        director_list = []
        director_value = ""

        driver.get("http://kyoto-report.wikidot.com/" + game + "-credits")
        devs = driver.find_elements_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div[3]/p[3]')
       
        if len(devs) > 0:
            director_list = devs[0].text.split('\n⌂ ')
            for k in range(1,len(director_list)-1):
                director_value = director_value + director_list[k].split('\n')[0] + ", "
                director_value = director_value + director_list[len(director_list)-1].split('\n')[0]   
                if len(director_list) == 2:
                    director_value = director_list[1].split('\n')[0]                
                df['Director'][w] = director_value
                
                director_flag = True


    return df

In [9]:
@timeit
def main(scrapeDev=False):
    jp_url = 'https://nindb.net/nes/games-jp.html' #Japan
    pw_url = "https://nindb.net/nes/np-jp.html" #Additional Japanese games (Nintendo Power)
    us_url = 'https://nindb.net/nes/games-us.html' #North America
    eu_url = 'https://nindb.net/nes/games-eu.html' #Europe
    

    
    print('Getting game titles...')
    game_list = get_games(jp_url)
    game_list_pw = [] #[g for g in get_games(pw_url) if g not in game_list] #Nintendo Power
    
    #game_list_us = [g for g in get_games(us_url) if g not in game_list + game_list_pw]
    
    #game_list_eu = [] #[g for g in get_games(eu_url) if g not in game_list + game_list_pw + game_list_us]
    
    
    print('Getting release data...')
    jap_name, jap_date, us_name, us_date, eu_name, eu_date = get_releasetable_data(jp_url, game_list, 'jp')
    jap_name_pw, jap_date_pw, us_name_pw, us_date_pw, eu_name_pw, eu_date_pw = get_releasetable_data(pw_url, game_list_pw, 'jp')
    #jap_name_us, jap_date_us, us_name_us, us_date_us, eu_name_us, eu_date_us = get_releasetable_data(us_url, game_list_us, 'usa')
    #jap_name_eu, jap_date_eu, us_name_eu, us_date_eu, eu_name_eu, eu_date_eu = get_releasetable_data(eu_url, game_list_eu, 'eu')
    
    
    
    print('Getting game data...')
    dev_list, gen_list, ser_list = get_gametable_data(jp_url, game_list)
    dev_list_pw, gen_list_pw, ser_list_pw = get_gametable_data(pw_url, game_list_pw)
    #dev_list_us, gen_list_us, ser_list_us = get_gametable_data(us_url, game_list_us)
    #dev_list_eu, gen_list_eu, ser_list_eu = get_gametable_data(eu_url, game_list_eu)

    
    game_list_us = [g for g in get_games(us_url) if g not in us_name]
    jap_name_us, jap_date_us, us_name_us, us_date_us, eu_name_us, eu_date_us = get_releasetable_data(us_url, game_list_us, 'us')
    dev_list_us, gen_list_us, ser_list_us = get_gametable_data(us_url, game_list_us)
    
    
    game_list_eu = [g for g in get_games(eu_url) if g not in set(eu_name + eu_name_us)]
    jap_name_eu, jap_date_eu, us_name_eu, us_date_eu, eu_name_eu, eu_date_eu = get_releasetable_data(eu_url, game_list_eu, 'eu')
    dev_list_eu, gen_list_eu, ser_list_eu = get_gametable_data(eu_url, game_list_eu)
    

    #try:
    print('Building dataframe...')
    df = pd.DataFrame()
    df['JP Title'] = jap_name + jap_name_pw + jap_name_us + jap_name_eu
    df['JP Release Date'] = jap_date + jap_date_pw + jap_date_us + jap_date_eu
    df['US Title'] = us_name + us_name_pw + us_name_us + us_name_eu
    df['US Release Date'] = us_date + us_date_pw + us_date_us + us_date_eu
    df['EU Title'] = eu_name + eu_name_pw + eu_name_us + eu_name_eu
    df['EU Release Date'] = eu_date + eu_date_pw + eu_date_us + eu_date_eu

    df['System'] = ["FC"] * len(df)
    df['Developer'] = dev_list + dev_list_pw + dev_list_us + dev_list_eu
    df['Genre'] = gen_list + gen_list_pw + gen_list_us + gen_list_eu
    df['Series'] = ser_list + ser_list_pw + ser_list_us + ser_list_eu
    #except:
        #return game_list + game_list_pw + jap_name_us + jap_name_eu, us_name + us_name_pw + us_name_us + us_name_eu
        
    
    df = df.drop_duplicates().reset_index(drop=True)
    
    
    if scrapeDev == True:
        print('Getting developers data...')

        df['Producer'] = [None] * len(df)
        df['Director'] = [None] * len(df)
        for w in range(len(df)):
            if df['JP Title'][w] == 'Mother':
                df['US Title'][w] = "EarthBound Beginnings"
                df['EU Title'][w] = df['US Title'][w]
            if df['JP Title'][w] == 'Yume Koujou: Dokidoki Panic':
                df['JP Title'][w] == 'Yume Kōjō: Dokidoki Panic'


            if df['US Title'][w] is not None:
                title = (df['US Title'][w]).lower().replace(' & ', '-').replace(' ', '-').replace(':-', '-').replace("'", '').replace("ū", '').replace(".", '').replace("!", '').replace("ō", '')
                if title == 'star-fox':
                    title = 'star-fox_'
                elif title == 'battleclash':
                    title = 'battle-clash'
                df = getDev (title, df, w)
            elif df['EU Title'][w] is not None:
                title = (df['EU Title'][w]).lower().replace(' & ', '-').replace(' ', '-').replace(':-', '-').replace("'", '').replace("ū", '').replace(".", '').replace("!", '').replace("ō", '')
                df = getDev (title, df, w)
            elif df['JP Title'][w] is not None:
                title = (df['JP Title'][w]).lower().replace(' & ', '-').replace(' ', '-').replace(':-', '-').replace("'", '').replace("ū", '').replace(".", '').replace("!", '').replace("ō", '')

                df = getDev (title, df, w)
    
    
    print('Done!')
    return df


In [10]:
df  = main(False) 
#True = Prende i dev. Default = False
#Il drop_duplicate lo fa di default prima di prendere i dev
#Fa tutte le regioni

#SIDE EFFECT:
#1. I giochi rilasciati "ad episodi" su Disk System appaiono una volta sola, con due date di uscita
#2. I giochi nella Wrong List rilasciati in più regioni appaiono una volta per regione, con le altre regioni "None"


Getting game titles...
Time:  'get_games' 1.95 sec
------------------------------------
Getting release data...
Time:  'get_releasetable_data' 47.59 sec
------------------------------------
Time:  'get_releasetable_data' 0.00 sec
------------------------------------
Getting game data...
Time:  'get_gametable_data' 11.02 sec
------------------------------------
Time:  'get_gametable_data' 0.00 sec
------------------------------------
Time:  'get_games' 0.60 sec
------------------------------------
Something wrong with game: 10-Yard Fight
Something wrong with game: Slalom
Something wrong with game: Rad Racer
Something wrong with game: R.C. Pro-Am
Something wrong with game: World Class Track Meet
Something wrong with game: Anticipation
Something wrong with game: Super Mario Bros. + Duck Hunt
Something wrong with game: Super Team Games
Something wrong with game: Dance Aerobics
Something wrong with game: Cobra Triangle
Something wrong with game: Dragon Warrior
Something wrong with game: Fax

In [11]:
df

Unnamed: 0,JP Title,JP Release Date,US Title,US Release Date,EU Title,EU Release Date,System,Developer,Genre,Series
0,Donkey Kong,15 Jul 1983,Donkey Kong,Jun 1986,Donkey Kong,1986,FC,Nintendo,Platformer,Donkey Kong Series
1,Donkey Kong Jr.,15 Jul 1983,Donkey Kong Jr.,Jun 1986,Donkey Kong Jr.,1987,FC,Nintendo,Platformer,Donkey Kong Series
2,Popeye,15 Jul 1983,Popeye,Jun 1986,Popeye,1986,FC,Nintendo,Action,Popeye Series
3,Gomoku Narabe Renju,27 Aug 1983,,,,,FC,Nintendo,Tabletop Game,
4,Mahjong,27 Aug 1983,,,,,FC,Nintendo,Mahjong,Mahjong Series
...,...,...,...,...,...,...,...,...,...,...
137,,,,,Battletoads & Double Dragon,,FC,,,
138,,,,,Mario Bros.,1993,FC,Nintendo,Platformer,
139,,,,,Mega Man 4,,FC,,,
140,,,,,Mega Man 5,,FC,,,


In [12]:
mapper = pd.read_html("https://nesdir.github.io/")[0].drop_duplicates(subset=['Title'])
mapper = mapper[mapper['Publisher'] == 'Nintendo'].reset_index(drop=True)


In [13]:
for i in range(len(mapper)):
    if 'StarTropics' in mapper['Title'].iloc[i]:
        mapper['Mapper'].iloc[i] = 'MMC6'
    elif 'Gumshoe' in mapper['Title'].iloc[i]:
        mapper['Mapper'].iloc[i] = 'GNROM'
    elif 'Super Mario Bros. / Duck Hunt' in mapper['Title'].iloc[i]:
        mapper['Mapper'].iloc[i] = 'MHROM'
    else:
        if mapper['Mapper'].iloc[i] == 0:
            mapper['Mapper'].iloc[i] = 'NROM'
        elif mapper['Mapper'].iloc[i] in [1, 105]:
            mapper['Mapper'].iloc[i] = 'MMC1'
        elif mapper['Mapper'].iloc[i] == 2:
            mapper['Mapper'].iloc[i] = 'UNROM'
        elif mapper['Mapper'].iloc[i] == 3:
            mapper['Mapper'].iloc[i] = 'CNROM'
        elif mapper['Mapper'].iloc[i] in [4, 37, 47, 118, 119]:
            mapper['Mapper'].iloc[i] = 'MMC3'
        elif mapper['Mapper'].iloc[i] == 5:
            mapper['Mapper'].iloc[i] = 'MMC5'
        elif mapper['Mapper'].iloc[i] == 7:
            mapper['Mapper'].iloc[i] = 'AxROM' 
        elif mapper['Mapper'].iloc[i] == 9:
            mapper['Mapper'].iloc[i] = 'MMC2' 
        elif mapper['Mapper'].iloc[i] == 10:
            mapper['Mapper'].iloc[i] = 'MMC4' 
        elif mapper['Mapper'].iloc[i] == 66:
            mapper['Mapper'].iloc[i] = 'GxROM'


In [14]:
mapper['Media Type'] = mapper['Mapper'].apply(lambda x: 'Cartridge ('+ str(x) + ')')

In [15]:
mapper

Unnamed: 0,CRC32,Title,Region,Catalog ID,Publisher,Board,Mapper,Media Type
0,D91104F1,4-nin Uchi Mahjong,Japan,HVC-FJ,Nintendo,HVC-NROM-128,NROM,Cartridge (NROM)
1,99A9F57E,Anticipation,USA,NES-AP-USA,Nintendo,NES-SEROM,MMC1,Cartridge (MMC1)
2,2B462010,Balloon Fight,Japan,HVC-BF,Nintendo,HVC-RROM,NROM,Cartridge (NROM)
3,4318A2F8,Barker Bill's Trick Shooting,USA,NES-ZT-USA,Nintendo,NES-SCROM,MMC1,Cartridge (MMC1)
4,78A48B23,Baseball,Japan,HVC-BA,Nintendo,HVC-NROM-128,NROM,Cartridge (NROM)
...,...,...,...,...,...,...,...,...
102,1E4D3831,Yoshi no Tamago,Japan,HVC-YO,Nintendo,HVC-SFROM,MMC1,Cartridge (MMC1)
103,52B58732,Yoshi's Cookie,Canada,NES-CH-CAN,Nintendo,NES-TLROM,MMC3,Cartridge (MMC3)
104,BA322865,Zelda II: The Adventure of Link,USA,NES-AL-USA,Nintendo,NES-SKROM,MMC1,Cartridge (MMC1)
105,7AE0BF3C,Zelda no Densetsu 1: The Hyrule Fantasy,Japan,HVC-ZL,Nintendo,HVC-SNROM,MMC1,Cartridge (MMC1)


In [28]:
game = 'legend-of-zelda'

roles = ['Executive', 'Sub', 'Design', 'Level', 'Programming', 'Assistant', 'Associate', 
         'Program', 'General', 'Chief']
producer_flag = False
director_flag = False

title = remove_non_ascii(game)

if title.startswith("the-"):
    title = title[4:]

driver.get("https://www.mobygames.com/game/nes/" + title + "/credits")
devs = driver.find_elements_by_xpath('//tr[@class="crln"]')
for d in devs:

    if producer_flag == False:
        role_check = True
        if 'Producer' in d.text:
            for role in roles:
                if role in d.text:
                    role_check = False
            if role_check == True:
                print ("Producer", str(d.text).replace('Producers', '').replace('Producer', "").replace('Lead', ""))
                producer_flag = True

    if director_flag == False:
        role_check = True
        if 'Director' in d.text:
            for role in roles:
                if role in d.text:
                    role_check = False
            if role_check == True:
                print ("Director", str(d.text).replace('Producers', '').replace('Producer', "").replace('Lead', ""))
                director_flag = True

    if producer_flag == True and director_flag == True:
        break

Producer  Shigeru Miyamoto (S. Miyahon)
Director Director Shigeru Miyamoto (S. Miyahon), Takashi Tezuka (Ten Ten)


In [46]:
mapper[50:100]

Unnamed: 0,CRC32,Title,Region,Catalog ID,Publisher,Board,Mapper
50,92A2185C,Mike Tyson's Punch-Out!!,Japan,HVC-PT,Nintendo,HVC-PEEOROM,9
51,20CC079D,Mother,Japan,HVC-MX,Nintendo,HVC-TKROM,4
52,F6B9799C,NES Open Tournament Golf,USA,NES-UG-USA,Nintendo,NES-SNROM,1
53,B9B4D9E0,NES Play Action Football,USA,NES-NB-USA,Nintendo,NES-TLSROM,118
54,E149E0B2,NTF2 System Cartridge,USA,NTC-NTF2-SYS,Nintendo,HVC-SIROM,1
55,0B0E128F,Nintendo World Championships 1990,USA,NES-90-USA,Nintendo,NES-EVENT,105
56,A22657FA,Nintendo World Cup,USA,NES-XZ-USA,Nintendo,NES-TLROM,4
57,D19ADDEB,Pin-Bot,USA,NES-IO-USA,Nintendo,NES-TQROM,119
58,035DC2E9,Pinball,Japan,HVC-PN,Nintendo,HVC-NROM-128,0
59,5581E835,Popeye,Japan,HVC-PP,Nintendo,HVC-NROM-128,0
