In [11]:
import pandas as pd
games_sample = pd.read_csv('games_sample.csv')

games_sample.drop(['Unnamed: 0'], axis=1, inplace=True)

games_sample.head()

Unnamed: 0,Title,platform,release_date,summary,meta_score,user_review
0,Pokemon Mystery Dungeon: Explorers of Darkness,DS,2008-04-20,"In this pair of action-packed adventures, play...",59,8.5
1,Z.H.P. Unlosing Ranger vs Darkdeath Evilman,PSP,2010-10-25,"Known as ZettaiHero Keikakuin Japan, Z.H.P. is...",81,9.7
2,Elemental Gearbolt,PlayStation,1998-06-30,In a fantastic world divided by class and race...,76,tbd
3,King Arthur,Xbox,2004-11-16,Live the true story behind the epic legend of ...,61,tbd
4,King Arthur,GameCube,2004-11-18,Live the true story behind the epic legend of ...,60,tbd


In [12]:
import requests

def get_wikipedia_extract(game):
    url = 'https://en.wikipedia.org/w/api.php'
    params = {
        'action': 'query',
        'format': 'json',
        'titles': game,
        'prop': 'extracts',
        'exintro': True,
        'explaintext': True,
    }
 
    response = requests.get(url, params=params)
    data = response.json()
    if int(next(iter(data['query']['pages'].keys()))) < 0:
        return ''
    page_extract = next(iter(data['query']['pages'].values()))['extract']
    return page_extract

In [13]:
from bs4 import BeautifulSoup

def get_genre(raw_html): 
    if raw_html == '':
        return ''
    soup = BeautifulSoup(raw_html, 'html.parser')
    tbody = soup.find('tbody')
    if tbody == None:
        return ''
    genreElement = tbody.find('a', {'title': 'Video game genre'})
    if genreElement == None:
        return ''
    genre = genreElement.find_parent().find_parent().find('td').find('a')
    if genre == None:
        return ''
    return genre.text

def get_modes(raw_html):
    if raw_html == '':
        return []
    soup = BeautifulSoup(raw_html, 'html.parser')
    tbody = soup.find('tbody')
    if tbody == None:
        return []
    modesElement = tbody.find(text='Mode(s)')
    if modesElement == None:
        return []
    modesElement = modesElement.find_parent()
    if modesElement == None:
        return []
    modes = modesElement.find_parent().find('td').findAll('a', recursive=False)
    if modes == None:
        return []
    return list(map(lambda mode: mode.text, modes))

def get_from_infobox(game): 
    url = 'https://en.wikipedia.org/w/api.php'
    params = {
        'action': 'parse',
        'page': game,
        'format': 'json',
        'prop': 'text',
        'redirects': ''
    }
 
    response = requests.get(url, params=params)
    data = response.json()
    if 'error' in data.keys():
        return {'genre': '', 'modes': []}
    raw_html = data['parse']['text']['*']
    info = {'genre': get_genre(raw_html), 'modes': get_modes(raw_html)}
    return info

In [14]:
wikipedia_extracts = []
genres = []
modes = []

for game in games_sample['Title']:
    wikipedia_extracts.append(get_wikipedia_extract(game))
    info = get_from_infobox(game)
    genres.append(info['genre'])
    modes.append(info['modes'])

games_sample['Wikipedia'] = wikipedia_extracts
games_sample['Genre'] = genres
games_sample['Modes'] = modes
games_sample.head()

Unnamed: 0,Title,platform,release_date,summary,meta_score,user_review,Wikipedia,Genre,Modes
0,Pokemon Mystery Dungeon: Explorers of Darkness,DS,2008-04-20,"In this pair of action-packed adventures, play...",59,8.5,,,[]
1,Z.H.P. Unlosing Ranger vs Darkdeath Evilman,PSP,2010-10-25,"Known as ZettaiHero Keikakuin Japan, Z.H.P. is...",81,9.7,,,[]
2,Elemental Gearbolt,PlayStation,1998-06-30,In a fantastic world divided by class and race...,76,tbd,"Elemental Gearbolt, full title Genseikyokō Sei...",Light gun shooter,"[Single-player, multiplayer]"
3,King Arthur,Xbox,2004-11-16,Live the true story behind the epic legend of ...,61,tbd,"King Arthur (Welsh: Brenin Arthur, Cornish: Ar...",,[]
4,King Arthur,GameCube,2004-11-18,Live the true story behind the epic legend of ...,60,tbd,"King Arthur (Welsh: Brenin Arthur, Cornish: Ar...",,[]


In [15]:
games_sample.to_csv('games_clean.csv')