In [100]:
import pandas as pd
games_sample = pd.read_csv('games_sample.csv')

games_sample.drop(['Unnamed: 0'], axis=1, inplace=True)

games_sample.head()

Unnamed: 0,Title,platform,release_date,summary,meta_score,user_review
0,Pokemon Mystery Dungeon: Explorers of Darkness,DS,2008-04-20,"In this pair of action-packed adventures, play...",59,8.5
1,Z.H.P. Unlosing Ranger vs Darkdeath Evilman,PSP,2010-10-25,"Known as ZettaiHero Keikakuin Japan, Z.H.P. is...",81,9.7
2,Elemental Gearbolt,PlayStation,1998-06-30,In a fantastic world divided by class and race...,76,tbd
3,King Arthur,Xbox,2004-11-16,Live the true story behind the epic legend of ...,61,tbd
4,King Arthur,GameCube,2004-11-18,Live the true story behind the epic legend of ...,60,tbd


In [101]:
import requests

def search_wikipedia(game):
    search_url = 'https://en.wikipedia.org/w/api.php'
    search_params = {
        'action': 'query',
        'format': 'json',
        'list': 'search',
        'srsearch': game,
    }

    search_response = requests.get(search_url, params=search_params)
    search_data = search_response.json()

    if search_data["query"]["searchinfo"]["totalhits"] > 0:
        return search_data["query"]["search"][0]['title']
    else:
        return ''

In [102]:
def get_wikipedia_extract(game):      
    summary_url = 'https://en.wikipedia.org/w/api.php'
    summary_params = {
        'action': 'query',
        'format': 'json',
        'titles': game,
        'prop': 'extracts',
        'exintro': True,
        'explaintext': True,
    }

    response = requests.get(summary_url, params=summary_params)
    data = response.json()
    if int(next(iter(data['query']['pages'].keys()))) < 0:
        return ''
    page_extract = next(iter(data['query']['pages'].values()))['extract']
    return page_extract


In [103]:
from bs4 import BeautifulSoup

def get_genre(raw_html): 
    if raw_html == '':
        return ''
    soup = BeautifulSoup(raw_html, 'html.parser')
    tbody = soup.find('tbody')
    if tbody == None:
        return ''
    genreElement = tbody.find('a', {'title': 'Video game genre'})
    if genreElement == None:
        return ''
    genreElement = genreElement.find_parent()
    if genreElement == None:
        return ''
    genreElement = genreElement.find_parent()
    if genreElement == None:
        return ''
    genreElement = genreElement.find('td')
    if genreElement == None:
        return ''
    genre = genreElement.find('a')
    if genre == None:
        return ''
    return genre.text

def get_modes(raw_html):
    if raw_html == '':
        return []
    soup = BeautifulSoup(raw_html, 'html.parser')
    tbody = soup.find('tbody')
    if tbody == None:
        return []
    modesElement = tbody.find(text='Mode(s)')
    if modesElement == None:
        return []
    modesElement = modesElement.find_parent()
    if modesElement == None:
        return []
    modesElement = modesElement.find_parent()
    if modesElement == None:
        return []
    modesElement = modesElement.find('td')
    if modesElement == None:
        return []
    modes = modesElement.findAll('a', recursive=False)
    if modes == None:
        return []
    return list(map(lambda mode: mode.text, modes))

def get_from_infobox(game): 
    url = 'https://en.wikipedia.org/w/api.php'
    params = {
        'action': 'parse',
        'page': game,
        'format': 'json',
        'prop': 'text',
        'redirects': ''
    }
 
    response = requests.get(url, params=params)
    data = response.json()
    if 'error' in data.keys():
        return {'genre': '', 'modes': []}
    raw_html = data['parse']['text']['*']
    info = {'genre': get_genre(raw_html), 'modes': get_modes(raw_html)}
    return info

In [104]:
wikipedia_extracts = []
genres = []
modes = []

for game in games_sample['Title']:
    page_title = search_wikipedia(game + "(video game)")
    if page_title != '':
        wikipedia_extracts.append(get_wikipedia_extract(page_title))
        info = get_from_infobox(page_title)
        genres.append(info['genre'])
        modes.append(info['modes'])
    else:
        wikipedia_extracts.append('')
        genres.append('')
        modes.append('')

games_sample['Wikipedia'] = wikipedia_extracts
games_sample['Genre'] = genres
games_sample['Modes'] = modes
games_sample.head()

Unnamed: 0,Title,platform,release_date,summary,meta_score,user_review,Wikipedia,Genre,Modes
0,Pokemon Mystery Dungeon: Explorers of Darkness,DS,2008-04-20,"In this pair of action-packed adventures, play...",59,8.5,Pokémon Mystery Dungeon: Explorers of Time and...,Roguelike,[Single player]
1,Z.H.P. Unlosing Ranger vs Darkdeath Evilman,PSP,2010-10-25,"Known as ZettaiHero Keikakuin Japan, Z.H.P. is...",81,9.7,Z.H.P. Unlosing Ranger VS Darkdeath Evilman is...,Tactical role-playing game,[Single-player]
2,Elemental Gearbolt,PlayStation,1998-06-30,In a fantastic world divided by class and race...,76,tbd,"Elemental Gearbolt, full title Genseikyokō Sei...",Light gun shooter,"[Single-player, multiplayer]"
3,King Arthur,Xbox,2004-11-16,Live the true story behind the epic legend of ...,61,tbd,King Arthur is an action-adventure game based ...,Action-adventure,"[Single-player, multiplayer]"
4,King Arthur,GameCube,2004-11-18,Live the true story behind the epic legend of ...,60,tbd,King Arthur is an action-adventure game based ...,Action-adventure,"[Single-player, multiplayer]"


In [105]:
games_sample.to_csv('wikipedia_games.csv')