In [62]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException

import pandas as pd
import warnings
import unidecode


warnings.filterwarnings('ignore')
import time, sys

def timeit(method):
    """
    Decorator: Compute the execution time of a function
    :param method: the function
    :return: the method runtime
    """

    def timed(*arguments, **kw):
        ts = time.time()
        result = method(*arguments, **kw)
        te = time.time()

        sys.stdout.write('Time:  %r %2.2f sec\n' % (method.__name__.strip("_"), te - ts))
        sys.stdout.write('------------------------------------\n')
        sys.stdout.flush()
        return result

    return timed

In [63]:
def remove_non_ascii(text):

    return ''.join(i for i in text if ord(i)<128)

In [64]:
driver = webdriver.Chrome('/Users/federico/Documents/datascience/jupyter/chromedriver')

In [65]:
@timeit
def get_games(games_url):
    game_list = []
    
    driver.get(games_url)
    games = driver.find_elements_by_xpath('//td[@class="name"]')
    for p in range(len(games)):
        game_list.append(games[p].text.split('\n')[0])

    return game_list

In [66]:
@timeit
def get_gametable_data(system_url, game_list):
    dev_list = []
    gen_list = []
    ser_list = []

    for game in game_list:
        driver.get(system_url)
        try:
            driver.find_element_by_link_text(game).click()
            gametable = driver.find_elements_by_xpath('//table[@class="gameDataTable"]')

            st = gametable[0].text.split('\n')
            try:
                dev_list.append(' '.join(st[1].split()[1:]))
            except IndexError:
                dev_list.append(None)
            try:
                gen_list.append(' '.join(st[2].split()[1:]))
            except IndexError:
                gen_list.append(None)
            try:
                ser_list.append(' '.join(st[3].split()[1:]))
            except IndexError:
                ser_list.append(None)
        except NoSuchElementException:
            dev_list.append(None)
            gen_list.append(None)
            ser_list.append(None)
    return dev_list, gen_list, ser_list
    

In [67]:
@timeit
def get_releasetable_data(system_url, game_list, region):
    jap_name = []
    jap_date = []
    us_name = []
    us_date = []
    eu_name = []
    eu_date = []


    for game in game_list:
        #if game ==  'F-Zero':
         #   jap_name.append('F-Zero')
          #  jap_date.append(None)
           # us_name.append('F-Zero')
            #us_date.append(None)
            #eu_name.append('F-Zero')
            #eu_date.append(None)
            #continue

            
        driver.get(system_url)
        try:
            driver.find_element_by_link_text(game).click()
            release_table = driver.find_elements_by_xpath('//table[@class="gameRelease"]')[0]

            release_table = release_table.text.split('\n')[1:]
            release_flags = driver.find_elements_by_xpath('//td[@class="flag"]')
            if len(release_flags) > 3:
                release_flags = release_flags[:3]

            countries = []

            for flag in release_flags:
                country = flag.find_element_by_tag_name('img').get_attribute('src')[-6:-4]
                countries.append(country)



            if len(release_table) % 2 != 0:
                if len(countries) == 1:
                    if countries[0] == 'jp':
                        jap_name.append(release_table[0])
                        jap_date.append(release_table[2])
                        us_name.append(None)
                        us_date.append(None)
                        eu_name.append(None)
                        eu_date.append(None)
                   
                    elif countries[0] == 'us':
                        jap_name.append(None)
                        jap_date.append(None)
                        us_name.append(release_table[0])
                        us_date.append(release_table[2])
                        eu_name.append(None)
                        eu_date.append(None)
                    elif countries[0] == 'eu':
                        jap_name.append(None)
                        jap_date.append(None)
                        us_name.append(None)
                        us_date.append(None)
                        eu_name.append(release_table[0])
                        eu_date.append(release_table[2])


                elif len(countries) == 2:
                    if countries[0] == 'jp':
                        jap_name.append(release_table[0])
                        jap_date.append(release_table[2])
                        if countries[1] == 'us':
                            us_name.append(release_table[3])
                            us_date.append(release_table[4])
                            eu_name.append(None)
                            eu_date.append(None)
                        else:
                            eu_name.append(release_table[3])
                            eu_date.append(release_table[4])
                            us_name.append(None)
                            us_date.append(None)

                    if countries[0] == 'us':
                        us_name.append(release_table[0])
                        us_date.append(release_table[1])

                        if countries[1] == 'jp':
                            jap_name.append(release_table[2])

                            jap_date.append(release_table[4])
                            eu_name.append(None)
                            eu_date.append(None)
                        else:
                            eu_name.append(release_table[2])
                            eu_date.append(release_table[3])
                            jap_name.append(None)
                            jap_date.append(None)

                    if countries[0] == 'eu':
                        eu_name.append(release_table[0])
                        eu_date.append(release_table[1])
                        if countries[1] == 'jp':
                            jap_name.append(release_table[2])
                            jap_date.append(release_table[4])
                            us_name.append(None)
                            us_date.append(None)
                        else:
                            us_name.append(release_table[2])
                            us_date.append(release_table[3])
                            jap_name.append(None)
                            jap_date.append(None)


                elif len(countries) == 3:
                    if countries[0] == 'jp':
                        jap_name.append(release_table[0])
                        jap_date.append(release_table[2])

                        if countries[1] == 'us':
                            us_name.append(release_table[3])
                            us_date.append(release_table[4])
                            eu_name.append(release_table[5])
                            eu_date.append(release_table[6])
                        else:
                            eu_name.append(release_table[3])
                            eu_date.append(release_table[4])
                            us_name.append(release_table[5])

                            us_date.append(release_table[6])


                    elif countries[0] == 'us':
                        us_name.append(release_table[0])

                        us_date.append(release_table[1])

                        if countries[1] == 'jp':
                            jap_name.append(release_table[2])
                            jap_date.append(release_table[4])
                            eu_name.append(release_table[5])
                            eu_date.append(release_table[6])

                        else:
                            eu_name.append(release_table[2])

                            eu_date.append(release_table[3])

                            jap_name.append(release_table[4])

                            jap_date.append(release_table[6])


                    elif countries[0] == 'eu':
                        eu_name.append(release_table[0])

                        eu_date.append(release_table[1])

                        if countries[1] == 'jp':
                            jap_name.append(release_table[2])

                            jap_date.append(release_table[4])
                            us_name.append(release_table[5])

                            us_date.append(release_table[6])

                        else:
                            us_name.append(release_table[2])
                            us_date.append(release_table[3])
                            jap_name.append(release_table[4])
                            jap_date.append(release_table[6])
                            
            else: #casi F-Zero
                
                if len(countries) == 1:
                    if countries[0] == 'jp':
                        jap_name.append(release_table[0])
                        jap_date.append(release_table[1])
                        us_name.append(None)
                        us_date.append(None)
                        eu_name.append(None)
                        eu_date.append(None)

                    elif countries[0] == 'us':
                        jap_name.append(None)
                        jap_date.append(None)
                        us_name.append(release_table[0])
                        us_date.append(release_table[1])
                        eu_name.append(None)
                        eu_date.append(None)
                    elif countries[0] == 'eu':
                        jap_name.append(None)
                        jap_date.append(None)
                        us_name.append(None)
                        us_date.append(None)
                        eu_name.append(release_table[0])
                        eu_date.append(release_table[1])


                elif len(countries) == 2:
                    if countries[0] == 'jp':
                        jap_name.append(release_table[0])
                        jap_date.append(release_table[1])
                        if countries[1] == 'us':
                            us_name.append(release_table[2])
                            us_date.append(release_table[3])
                            eu_name.append(None)
                            eu_date.append(None)
                        else:
                            eu_name.append(release_table[2])
                            eu_date.append(release_table[3])
                            us_name.append(None)
                            us_date.append(None)

                    if countries[0] == 'us':
                        us_name.append(release_table[0])
                        us_date.append(release_table[1])

                        if countries[1] == 'jp':
                            jap_name.append(release_table[2])

                            jap_date.append(release_table[3])
                            eu_name.append(None)
                            eu_date.append(None)
                        else:
                            eu_name.append(release_table[2])
                            eu_date.append(release_table[3])
                            jap_name.append(None)
                            jap_date.append(None)

                    if countries[0] == 'eu':
                        eu_name.append(release_table[0])
                        eu_date.append(release_table[1])
                        if countries[1] == 'jp':
                            jap_name.append(release_table[2])
                            jap_date.append(release_table[3])
                            us_name.append(None)
                            us_date.append(None)
                        else:
                            us_name.append(release_table[2])
                            us_date.append(release_table[3])
                            jap_name.append(None)
                            jap_date.append(None)


                elif len(countries) == 3:
                    if countries[0] == 'jp':
                        jap_name.append(release_table[0])
                        jap_date.append(release_table[1])

                        if countries[1] == 'us':
                            us_name.append(release_table[2])
                            us_date.append(release_table[3])
                            eu_name.append(release_table[4])
                            eu_date.append(release_table[5])
                        else:
                            eu_name.append(release_table[2])
                            eu_date.append(release_table[3])
                            us_name.append(release_table[4])

                            us_date.append(release_table[5])


                    elif countries[0] == 'us':
                        us_name.append(release_table[0])

                        us_date.append(release_table[1])

                        if countries[1] == 'jp':
                            jap_name.append(release_table[2])
                            jap_date.append(release_table[3])
                            eu_name.append(release_table[4])
                            eu_date.append(release_table[5])

                        else:
                            eu_name.append(release_table[2])

                            eu_date.append(release_table[3])

                            jap_name.append(release_table[4])

                            jap_date.append(release_table[5])


                    elif countries[0] == 'eu':
                        eu_name.append(release_table[0])

                        eu_date.append(release_table[1])

                        if countries[1] == 'jp':
                            jap_name.append(release_table[2])

                            jap_date.append(release_table[3])
                            us_name.append(release_table[4])

                            us_date.append(release_table[5])

                        else:
                            us_name.append(release_table[2])
                            us_date.append(release_table[3])
                            jap_name.append(release_table[4])
                            jap_date.append(release_table[5])
                            
                            
                            
                            
                            
        except (IndexError, NoSuchElementException):
        
            if region == 'jp':      
                jap_name.append(game)
                jap_date.append(None)
                us_name.append(None)
                us_date.append(None)
                eu_name.append(None)
                eu_date.append(None)
                print("Something wrong with game:", game)
            elif region == 'us':
                jap_name.append(None)
                jap_date.append(None)
                us_name.append(game)
                us_date.append(None)
                eu_name.append(None)
                eu_date.append(None)
                print("Something wrong with game:", game)
            elif region == 'eu':
                jap_name.append(None)
                jap_date.append(None)
                us_name.append(None)
                us_date.append(None)
                eu_name.append(game)
                eu_date.append(None)
                print("Something wrong with game:", game)
            
                
    return jap_name, jap_date, us_name, us_date, eu_name, eu_date

In [68]:
#@timeit

def getDev (game, df, w):
    print (game)
    roles = ['Executive', 'Sub', 'Design', 'Level', 'Programming', 'Assistant', 'Associate', 
             'Program', 'General', 'Chief']
    producer_flag = False
    director_flag = False
    
    title = remove_non_ascii(game)
            
    if title.startswith("the-"):
        title = title[4:]
                              
    driver.get("https://www.mobygames.com/game/wii/" + title + "/credits")
    devs = driver.find_elements_by_xpath('//tr[@class="crln"]')
    for d in devs:
        
        if producer_flag == False:
            role_check = True
            if 'Producer' in d.text:
                for role in roles:
                    if role in d.text:
                        role_check = False
                if role_check == True:
                    df['Producer'][w] = str(d.text).replace('Producers', '').replace('Producer', "").replace('Lead', "")
                    producer_flag = True
                    
        if director_flag == False:
            role_check = True
            if 'Director' in d.text:
                for role in roles:
                    if role in d.text:
                        role_check = False
                if role_check == True:
                    df['Director'][w] = str(d.text).replace('Game', '').replace('Directors', '').replace('Director', "").replace('Lead', "")
                    director_flag = True
                    
        if producer_flag == True and director_flag == True:
            break
    
    if producer_flag == False:
        driver.get("http://kyoto-report.wikidot.com/" + game)

        devs = driver.find_elements_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div[3]/table[2]/tbody/tr/td[2]/p[1]')
        
        if len(devs) > 0:
            producer_list = str(devs[0].text).split('\n⌂ ')
            producer_value = ""
            for k in range(1,len(producer_list)-1):
                producer_value = producer_value + producer_list[k].split('\n')[0] + ", "
            producer_value = producer_value + producer_list[len(producer_list)-1].split('\n')[0] 
            df['Producer'][w] = producer_value
            
            if len(producer_list) == 2:
                producer_value = producer_list[1].split('\n')[0]
            producer_flag = True
    
    if producer_flag == False:
        driver.get("http://kyoto-report.wikidot.com/" + game + "-credits")
        devs = driver.find_elements_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div[3]/p[2]')
        
        if len(devs) > 0:
            producer_list = devs[0].text.split('\n⌂ ')
            producer_value = ""

            for k in range(1,len(producer_list)-1):
                producer_value = producer_value + producer_list[k].split('\n')[0] + ", "
                producer_value = producer_value + producer_list[len(producer_list)-1].split('\n')[0] 
                
            if len(producer_list) == 2:
                producer_value = producer_list[1].split('\n')[0]
            
            df['Producer'][w] = producer_value
            producer_flag = True

    if director_flag == False:
        driver.get("http://kyoto-report.wikidot.com/" + game)

        devs = driver.find_elements_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div[3]/table[2]/tbody/tr/td[1]/p[1]')
        
        if len(devs) > 0:

            director_list = str(devs[0].text).split('\n⌂ ')
            director_value = ""
            for j in range(1,len(director_list)-1):
                director_value = director_value + director_list[j].split('\n')[0] + ", "
            director_value = director_value + director_list[len(director_list)-1].split('\n')[0] 
            if len(director_list) == 2:
                director_value = director_list[1].split('\n')[0]
            df['Director'][w] = director_value

            director_flag = True
        
    if director_flag == False:
        director_list = []
        director_value = ""

        driver.get("http://kyoto-report.wikidot.com/" + game + "-credits")
        devs = driver.find_elements_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div[3]/p[3]')
       
        if len(devs) > 0:
            director_list = devs[0].text.split('\n⌂ ')
            for k in range(1,len(director_list)-1):
                director_value = director_value + director_list[k].split('\n')[0] + ", "
                director_value = director_value + director_list[len(director_list)-1].split('\n')[0]   
                if len(director_list) == 2:
                    director_value = director_list[1].split('\n')[0]                
                df['Director'][w] = director_value
                
                director_flag = True


    return df

In [69]:
@timeit
def main():
    # snes, gb, gbc
    super_famicom_url = 'https://nindb.net/wii/games-jp.html'
    pw_url = "https://nindb.net/snes/np-jp.html"
    us_url = 'https://nindb.net/snes/games-us.html'
    # gbc = 'https://nindb.net/gb/gbcolor-jp.html'
    
    # SUPER FAMICOM
    print('Getting game titles...')
    game_list = get_games(super_famicom_url)[0:15]
    game_list_pw = [g for g in get_games(pw_url) if g not in game_list][0:15]
    #game_list_us = [g for g in get_games(us_url) if g not in game_list]
    print('Getting release data...')
    jap_name, jap_date, us_name, us_date, eu_name, eu_date = get_releasetable_data(super_famicom_url, game_list, 'jp')
    jap_name_pw, jap_date_pw, us_name_pw, us_date_pw, eu_name_pw, eu_date_pw = get_releasetable_data(pw_url, game_list_pw, 'jp')
    #jap_name_us, jap_date_us, us_name_us, us_date_us, eu_name_us, eu_date_us = get_releasetable_data(us_url, game_list_us, 'usa')
    print('Getting game data...')
    dev_list, gen_list, ser_list = get_gametable_data(super_famicom_url, game_list)
    dev_list_pw, gen_list_pw, ser_list_pw = get_gametable_data(pw_url, game_list_pw)
    #dev_list_us, gen_list_us, ser_list_us = get_gametable_data(us_url, game_list_us)

    #game_list_us = [g for g in get_games(us_url) if g not in us_name]
    #jap_name_us, jap_date_us, us_name_us, us_date_us, eu_name_us, eu_date_us = get_releasetable_data(us_url, game_list_us, 'us')
    #dev_list_us, gen_list_us, ser_list_us = get_gametable_data(us_url, game_list_us)
    

    #try:
    print('Building dataframe...')
    df = pd.DataFrame()
    df['JP Title'] = game_list + game_list_pw #+ jap_name_us
    df['JP Release Date'] = jap_date + jap_date_pw #+ jap_date_us
    df['US Title'] = us_name + us_name_pw #+ us_name_us
    df['US Release Date'] = us_date + us_date_pw #+ us_date_us
    df['EU Title'] = eu_name + eu_name_pw #+ eu_name_us
    df['EU Release Date'] = eu_date + eu_date_pw #+ eu_date_us

    df['System'] = ["SFC"] * len(df)
    df['Developer'] = dev_list + dev_list_pw #+ dev_list_us
    df['Genre'] = gen_list + gen_list_pw #+ gen_list_us
    df['Series'] = ser_list + ser_list_pw #+ ser_list_us
    #except:
        #return jap_name_us, jap_date_us, us_name_us, us_date_us, eu_name_us, eu_date_us
        
    
    print('Getting developers data...')

    df['Producer'] = [None] * len(df)
    df['Director'] = [None] * len(df)
    for w in range(len(df)):
        if df['JP Title'][w] == 'Mario no Super Picross':
            df['US Title'][w] = "Mario's Super Picross"
            df['EU Title'][w] = df['US Title'][w]

            
        if df['US Title'][w] is not None:
            title = (df['US Title'][w]).lower().replace(' & ', '-').replace(' ', '-').replace(':-', '-').replace("'", '').replace("ū", '').replace(".", '').replace("!", '').replace("ō", '')
            if title == 'star-fox':
                title = 'star-fox_'
            elif title == 'battleclash':
                title = 'battle-clash'
            df = getDev (title, df, w)
        elif df['EU Title'][w] is not None:
            title = (df['EU Title'][w]).lower().replace(' & ', '-').replace(' ', '-').replace(':-', '-').replace("'", '').replace("ū", '').replace(".", '').replace("!", '').replace("ō", '')
            df = getDev (title, df, w)
        elif df['JP Title'][w] is not None:
            title = (df['JP Title'][w]).lower().replace(' & ', '-').replace(' ', '-').replace(':-', '-').replace("'", '').replace("ū", '').replace(".", '').replace("!", '').replace("ō", '')

            df = getDev (title, df, w)
    
    
    print('Done!')
    return df


In [70]:
#'famicom-tantei' in'famicom-tantei-club-ushiro-ni-tatsu-shjo'

In [71]:
df  = main()

Getting game titles...
Time:  'get_games' 1.31 sec
------------------------------------
Time:  'get_games' 0.69 sec
------------------------------------
Getting release data...
Time:  'get_releasetable_data' 6.98 sec
------------------------------------
Time:  'get_releasetable_data' 6.00 sec
------------------------------------
Getting game data...
Time:  'get_gametable_data' 2.08 sec
------------------------------------
Time:  'get_gametable_data' 2.15 sec
------------------------------------
Building dataframe...
Getting developers data...
wii-play
warioware-smooth-moves
wii-sports
the-legend-of-zelda-twilight-princess
pokémon-battle-revolution
excite-truck
fire-emblem-radiant-dawn
eyeshield-21-field-saiky-no-senshi-tachi
super-paper-mario
big-brain-academy-wii-degree
donkey-kong-barrel-blast
mario-party-8
endless-ocean
mario-strikers-charged
super-mario-galaxy
f-zero
fire-emblem-monsh-no-nazo
fire-emblem-seisen-no-keifu
kirbys-dream-course
marios-super-picross
earthbound
panel-de-p

In [72]:
df

Unnamed: 0,JP Title,JP Release Date,US Title,US Release Date,EU Title,EU Release Date,System,Developer,Genre,Series,Producer,Director
0,Hajimete no Wii,02 Dec 2006,Wii Play,12 Feb 2007,Wii Play,08 Dec 2006,SFC,Nintendo,Minigames,Wii Play Series,Katsuya Eguchi,Motoi Okamoto
1,Odoru Made in Wario,02 Dec 2006,WarioWare: Smooth Moves,15 Jan 2007,WarioWare: Smooth Moves,12 Jan 2007,SFC,"Nintendo, Intelligent Systems",Minigames,Super Mario Series,"Yoshio Sakamoto, Ryouichi Kitanishi",Taku Sugioka
2,Wii Sports,02 Dec 2006,Wii Sports,19 Nov 2006,Wii Sports,08 Dec 2006,SFC,Nintendo,Sports,Wii Sports Series,Katsuya Eguchi,"Keizo Ohta, Takayuki Shimamura, Yoshikazu Yam..."
3,Zelda no Densetsu: Twilight Princess,02 Dec 2006 02 Dec 2006,The Legend of Zelda: Twilight Princess,19 Nov 2006 11 Dec 2006,The Legend of Zelda: Twilight Princess,08 Dec 2006 15 Dec 2006,SFC,Nintendo,Action Adventure,The Legend of Zelda Series,Shigeru Miyamoto,Eiji Aonuma
4,Pokémon Battle Revolution,14 Dec 2006,Pokémon Battle Revolution,26 Jun 2007,Pokémon Battle Revolution,07 Dec 2007,SFC,Genius Sonority,RPG Battle,Pokémon Series,"Hiroyuki Jinnai, Hiroaki Tsuru",Manabu Yamana
5,Excite Truck,18 Jan 2007,Excite Truck,19 Nov 2006,Excite Truck,16 Feb 2007,SFC,Monster Games,Racing,Excitebike Series,"Kensuke Tanabe, Keisuke Terasaki",
6,Fire Emblem: Akatsuki no Megami,22 Feb 2007,Fire Emblem: Radiant Dawn,05 Nov 2007,Fire Emblem: Radiant Dawn,14 Mar 2008,SFC,Intelligent Systems,Strategy RPG,Fire Emblem Series,"Toru Narihiro, Hitoshi Yamagami",Taeko Kaneda
7,Eyeshield 21: Field Saikyō no Senshi-Tachi,08 Mar 2007,,,,,SFC,Nintendo,Sport (American Football),Eyeshield 21 Series,,
8,Super Paper Mario,19 Apr 2007,Super Paper Mario,09 Apr 2007,Super Paper Mario,14 Sep 2007,SFC,Intelligent Systems,Action RPG,Super Mario Series,"Kensuke Tanabe, Ryouichi Kitanishi",Art Chie Kawabe
9,Wii de Yawaraka Atama Juku,26 Apr 2007,Big Brain Academy: Wii Degree,11 Jun 2007,Big Brain Academy for Wii,20 Jul 2007,SFC,Nintendo,"Education, Minigames",Big Brain Academy Series,Hiroyuki Kimura,Tomoaki Yoshinobu


In [21]:
super_famicom_url = 'https://nindb.net/snes/games-jp.html'
game_list = df
jap_name, jap_date, us_name, us_date, eu_name, eu_date = get_releasetable_data(super_famicom_url, game_list, 'jp')


Time:  'get_releasetable_data' 0.34 sec
------------------------------------


ValueError: too many values to unpack (expected 6)

In [22]:
df

['Super Mario World',
 'F-Zero',
 'Pilotwings',
 'SimCity',
 'Zelda no Densetsu: Kamigami no Triforce',
 'Mario Paint',
 'Super Mario Kart',
 'Star Fox',
 'Super Scope 6',
 'Super Scope Space Bazooka']

In [None]:
release_table

In [49]:
release_table

['F-Zero', '21 Nov 1990', 'F-Zero', 'Aug 1991', 'F-Zero', 'Jun 1992']

In [25]:
df

Unnamed: 0,JP Title,JP Release Date,US Title,US Release Date,EU Title,EU Release Date,System,Developer,Genre,Series,Producer,Director
0,Super Mario World,21 Nov 1990,Super Mario World,Aug 1991,Super Mario World,Apr 1992,SFC,Nintendo,Platformer,Super Mario Series,Shigeru Miyamoto,Takashi Tezuka
1,F-Zero,,F-Zero,,F-Zero,,SFC,Nintendo,Racing,F-Zero Series,Shigeru Miyamoto,
2,Pilotwings,21 Nov 1990,Pilotwings,Aug 1991,Pilotwings,Jan 1993,SFC,Nintendo,Flight Simulation,Pilotwings Series,Shigeru Miyamoto,Tadashi Sugiyama
3,SimCity,26 Apr 1991,Sim City,Aug 1991,Sim City,Sep 1992,SFC,"Nintendo, Maxis",Simulation,SimCity Series,Shigeru Miyamoto,"Hideki Konno, Tadashi Sugiyama"
4,Zelda no Densetsu: Kamigami no Triforce,21 Nov 1991,The Legend of Zelda: A Link to the Past,Apr 1992,The Legend of Zelda: A Link to the Past,Sep 1992,SFC,Nintendo,Action / Adventure,The Legend of Zelda Series,Shigeru Miyamoto,Takashi Tezuka
5,Mario Paint,14 Jul 1992,Mario Paint,Aug 1992,Mario Paint,Dec 1992,SFC,Nintendo,Art Utility,Super Mario Series,(uncredited) Gunpei Yokoi,Hirofumi Matsuoka
6,Super Mario Kart,27 Aug 1992,Super Mario Kart,Sep 1992,Super Mario Kart,Jan 1993,SFC,Nintendo,Racing,Super Mario Series,Shigeru Miyamoto,"Tadashi Sugiyama, Hideki Konno"
7,Star Fox,21 Feb 1993,Star Fox,Mar 1993,Starwing,Jun 1993,SFC,Nintendo / Argonaut,Shooting,Star Fox Series,Shigeru Miyamoto,Katsuya Eguchi
8,Super Scope 6,18 Mar 1993,Super Scope 6,Feb 1992,Nintendo Scope 6,1992,SFC,Nintendo,Light Gun,,Gunpei Yokoi,"Makoto Kano, Masao Yamamoto"
9,Super Scope Space Bazooka,21 Jun 1993,BattleClash,Oct 1992,BattleClash,Mar 1993,SFC,Nintendo / Intelligent Systems,Light Gun,BattleClash Series,Gunpei Yokoi,"Makoto Kanoh, Masao Yamamoto"


In [None]:
df

In [None]:
snes_sales = open('snes_sales.txt', 'r')
games_and_sales = snes_sales.readlines()
games = games_and_sales[0:int(len(games_and_sales)/2)]
sales = games_and_sales[int(len(games_and_sales)/2):]


In [None]:
games_and_sales

In [None]:
df['WW Sales'] = [None] * len(df)
for i in range(len(df)):
    for j in range(len(games)):
        if df['US Title'][i] is not None:
            if games[j].split(' - ')[0].lower() in df['US Title'][i].lower():
                df['WW Sales'][i] = sales[j].split('\n')[0].replace(",", ".")

In [None]:
df

In [None]:
block

In [None]:
super_famicom_url = 'https://nindb.net/snes/games-jp.html'
game_list = get_games(super_famicom_url)
jap_name, jap_date, us_name, us_date, eu_name, eu_date = get_releasetable_data(super_famicom_url, game_list)

In [None]:
release_text 


In [None]:
for name in image_names:
    if 'jp.png' in name:
        print(image_names.index(name))
    elif 'us.png' in name:
        print(image_names.index(name))
    elif 'eu.png' in name:
        print(image_names.index(name))

In [None]:
game = game_list[8]
driver.get(super_famicom_url)
driver.find_element_by_link_text(game).click()
flags = driver.find_elements_by_xpath('//td[@class="flag"]')


image_links = driver.find_elements_by_xpath("//img")

for element in image_links:
    flg = element.get_attribute("src")
    if flg is not None:
        print(flg)

In [None]:
df

In [None]:
director_list = str(devs[0].text).split('\n⌂ ')
director_value = ""
for i in range(1,len(director_list)):
    director_value = director_value + 
    print()
    

In [None]:
producer_value

In [53]:
game = "sim-city"

In [None]:
producer_flag = False
director_flag = False

devs

In [83]:
driver.get("http://kyoto-report.wikidot.com/" + game + "-credits")
devs = driver.find_elements_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div[3]/p[2]')

producer_list = devs[0].text.split('\n⌂ ')
producer_value = ""

for k in range(1,len(producer_list)-1):
    producer_value = producer_value + producer_list[k].split('\n')[0] + ", "
    producer_value = producer_value + producer_list[len(producer_list)-1].split('\n')[0] 

if len(producer_list) == 2:
    producer_value = producer_list[1].split('\n')[0]

In [84]:
producer_list

['Producer', 'Shigeru Miyamoto\nNintendo Co. Ltd.']

In [85]:
producer_value

'Shigeru Miyamoto'

In [100]:
colors = pd.read_html('https://www.nintendo.co.jp/n02/dmg/index.html')

In [106]:
colors[0]

Unnamed: 0,0,1,2,3,4
0,ソフト名,発売日,カートリッジの分類,ゲームジャンル,NINTENDOPOWER
1,モバイルゴルフ＋モバイルアダプタGBセット,2001.5.11,,ゴルフゲーム,
2,,,,,
3,とっとこハム太郎２ ハムちゃんず大集合でちゅ,2001.4.21,,おはなしアドベンチャー,
4,ゼルダの伝説 ふしぎの木の実 〜時空の章〜,2001.2.27,,アクションアドベンチャー,
...,...,...,...,...,...
62,テニス,1989.5.29,,スポーツ,
63,アレイウェイ,1989.4.21,,アクション,
64,役満,1989.4.21,,テーブル,
65,ベースボール,1989.4.21,,スポーツ,


In [108]:
colors[1]

Unnamed: 0,0,1,2,3
0,カートリッジの分類,カートリッジの分類,カートリッジの分類,カートリッジの分類
1,カートリッジの種類,製品概要,ゲームボーイ等,ゲームボーイカラー
2,ゲームボーイカラー専用カートリッジ,ゲームボーイカラーおよびゲームボーイアドバンス専用ソフトです。カラー画面（32000色中最大...,使用不可,
3,ゲームボーイ＆カラー共通カートリッジ,ゲームボーイ＆カラー共通ソフトです。ゲームボーイカラーおよびゲームボーイアドバンスはカラー画...,,
4,［無印］は従来のゲームボーイ用ソフトです。全てのゲームボーイ本体で使用でき、ゲームボーイカラ...,［無印］は従来のゲームボーイ用ソフトです。全てのゲームボーイ本体で使用でき、ゲームボーイカラ...,,


In [26]:
def remove_non_ascii(text):

    return ''.join(i for i in text if ord(i)<128)

In [27]:
remove_non_ascii("Pùppò")

'Ppp'