# Scraping roguelike articles on the web

This Python notebook describes the data collection process for roguelike universe. If you have not yet installed the requirements, you can do it by running:

`pip install -r requirements.txt`

## Sourcing game titles

We prepared a [list of roguelike games](https://en.wikipedia.org/wiki/List_of_roguelikes) from Wikipedia as a starting point. For indicators of out-of-genre influences, we sourced 10,000+ video game titles from Pastebin as uploaded by the user ________. 

First, we setup read/write functions that are friendly with international unicode characters, because the web may contain all kinds of character code points.

In [75]:
import os
import io
import json
import pandas as pd

def read_json(path):
    data = ''
    with io.open(path, 'r', encoding='utf-8') as f:
        data = json.loads(f.read())
        print(__message('Loaded {}'.format(path)))
    return data
    
def save_json(path, data):
    with io.open(path, 'w', encoding='utf-8') as f:
        try:
            output = json.dumps(data, indent=2, ensure_ascii=False)
            f.write(output)
        except UnicodeEncodeError:
            f.write(output.encode('utf-8'))
    print(__message('Written to {}'.format(path)))
    
def __success(text):
    return '  (SUCC) {}'.format(text).encode('utf-8')
    
def __failure(text):
    return '!!FAIL!! {}'.format(text).encode('utf-8')
    
def __warning(text):
    return '??WARN?? {}'.format(text).encode('utf-8')
    
def __message(text):
    return '   |MSG| {}'.format(text).encode('utf-8')

Here is a sample of the list of roguelike games from Wikipedia:

In [66]:
roguelikes = pd.read_csv(os.path.join(os.getcwd(), 'roguelikes.csv'), skip_blank_lines=True)
roguelikes.head()

Unnamed: 0,Name,RogueTemple,Link,Released,Updated,Developer,Theme,Influences
0,100 Rogues,http://roguebasin.roguelikedevelopment.org/ind...,http://www.100rogues.com/,2010/05/06,2010/05/06,Dinofarm Games,Fantasy,Rogue
1,1Quest,http://roguebasin.roguelikedevelopment.org/ind...,http://www.ratzngodz.fr,2014/02/20,2015/02/07,Ratz 'N' Godz,Fantasy,"DCSS, Dominion 4"
2,3059,http://roguebasin.roguelikedevelopment.org/ind...,https://sites.google.com/site/free3069/3059---...,2005/00/00,2005/06/11,Phr00t,"Science Fiction, Alien Planets, Futuristic",nethack
3,3069,http://roguebasin.roguelikedevelopment.org/ind...,http://sites.google.com/site/free3069/,2009/07/06,2009/10/06,Phr00t,"Science Fiction, Alien Planets, Futuristic",3059
4,3079,http://roguebasin.roguelikedevelopment.org/ind...,http://sites.google.com/site/3079game/,2011/10/25,2015/02/13,Phr00t,"Science Fiction, Alien Planets, Futuristic","3059, 3069, Fallout, Minecraft"


In addition, roguelike-like games:

In [82]:
roguelikelikes = pd.read_csv(os.path.join(os.getcwd(), 'roguelike-likes.csv'), skip_blank_lines=True)
roguelikelikes.head()

Unnamed: 0,Name,Released,Updated,Developer,Theme,Influences
0,ToeJam & Earl,1991,,Johnson Voorsanger Productions,Fantasy,
1,Diablo,1996,,Blizzard North,Fantasy,
2,Diablo II,2000,,Blizzard Entertainment,Fantasy,
3,Lost Labyrinth,2001,2011.0,Lost Labyrinth,Fantasy,
4,Strange Adventures In Infinite Space,2002,2004.0,"Rich Carlson, Iikka Keränen",Space science fiction,


And a sample of the list of video games:

In [52]:
video_games = pd.read_json(os.path.join(os.getcwd(), 'games.json'))
video_games.head(10)

Unnamed: 0,title,year
0,$hop-n-$pree,2009
1,'43 - One Year After,1986
2,'89 Denno Kyusei Uranai,1988
3,'Nam 1965-1975,1991
4,'Splosion Man,2009
5,'Til Death Do Us Part,2013
6,(Almost) Total Mayhem,2011
7,(Not) Just another Space Shooter,2004
8,(T)Raumschiff Surprise - Periode 1,2004
9,*NSYNC Hotline Phone and Fantasy CD-Rom Game,2001


## Building a corpus 

Before we can do any text analysis, we need to build a corpus in which to operate on.

### 1. RogueTemple

RogueTemple Wiki collects a detailed description of roguelike games.

In [76]:
import requests

def scrape_mediawiki_url(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
    }
    response = requests.get(url, headers=headers, timeout=(9.1, 12.1))
    soup = bs4.BeautifulSoup(response.text, 'lxml')
    
    content = [node.text.strip() for node in soup.select('#mw-content-text') if node.text]
    return ''.join(content)

In [None]:
# Sample scrape
content = scrape_mediawiki_url('http://roguebasin.roguelikedevelopment.org/index.php?title=100_Rogues')
print(content)

### 2. Wikipedia

Searching on Wikipedia can be done with two dozens line of code.

In [77]:
import wikipedia

def scrape_wiki_id(pageid):
    page = wikipedia.page(pageid=pageid)    
    print_wiki_page(page)
    
def scrape_wiki(title):
    try:
        searchstring = title
        page = wikipedia.page(searchstring, auto_suggest=False)
#         print_wiki_page(page)
        return page
    except wikipedia.DisambiguationError:
        try:
            searchstring = '{} (video game)'.format(title).replace(' ', '_')
            page = wikipedia.page(searchstring, auto_suggest=False)
#             print_wiki_page(page)
            return page
        except wikipedia.DisambiguationError:
            try:
                searchstring = '{} (Unix video game)'.format(title).replace(' ', '_')
                page = wikipedia.page(searchstring, auto_suggest=False)
    #             print_wiki_page(page)
                return page
            except:
                print(__warning(u'Wikipedia cannot find "{}"'.format(searchstring)))
        except:
            print(__warning(u'Wikipedia cannot find "{}"'.format(searchstring)))
    except wikipedia.PageError:
        try:
            page = wikipedia.page(title, auto_suggest=False)
#             print_wiki_page(page)
            return page
        except:
            print(__warning(u'Search term "{}" returned nothing'.format(searchstring)))
    
def print_wiki_page(page):
    print(page.title)
    print(page.content)
    print(page.references)  

In [78]:
# Test Wikipedia crawl
print(scrape_wiki('Rogue Legacy'))

<WikipediaPage 'Rogue Legacy'>


### 3. DuckDuckGo

We also source a list of potential interesting webpages via an internet search engine, DuckDuckGo.

In [79]:
import bs4
import time
import requests
import urllib.parse

def scrape_duckduckgo(keywords, developer=""):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
    }
    searchstring = u'"{}" AND {} AND game AND (interview OR mortem OR history OR develop)'.format(keywords, developer)
    q = u'http://duckduckgo.com/html/?q={}'.format(urllib.parse.quote(searchstring.encode('utf-8')))
    print(q)
                                                   
    response = requests.get(q, headers=headers, timeout=(9.1, 12.1))
    soup = bs4.BeautifulSoup(response.text, 'lxml')
    
    links = []
    links = [node.get('href') for node in soup.select('a.result__a')]
    return links

In [63]:
scrape_duckduckgo('Ancient Domains of Mystery', 'Thomas Biskup')

http://duckduckgo.com/html/?q=%22Ancient%20Domains%20of%20Mystery%22%20AND%20Thomas%20Biskup%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29


['https://store.steampowered.com/video/333300',
 'https://lgdb.org/game/adom-ancient-domains-of-mystery',
 'https://en.wikipedia.org/wiki/Ancient_Domains_of_Mystery',
 'https://strategywiki.org/wiki/Ancient_Domains_of_Mystery',
 'https://www.ranker.com/review/ancient-domains-of-mystery/455750',
 'https://ancient-domains-of-mystery.ru.uptodown.com/windows/download',
 'https://www.rockpapershotgun.com/tag/adom-ancient-domains-of-mystery/',
 'https://www.cultureofgaming.com/ancient-domains-of-mystery-adom-review/',
 'https://www.youtube.com/watch?v=ChtBuBrFYc8',
 'http://RuTracker.org/forum/viewtopic.php?t=5451914',
 'https://classicreload.com/ancient-domains-of-mystery.html',
 'https://www.facebook.com/ADOMAncientDomainsOfMystery/',
 'https://steamcommunity.com/sharedfiles/filedetails/?l=russian&id=258925365',
 'https://alchetron.com/Ancient-Domains-of-Mystery',
 'https://www.turkaramamotoru.com/en/ancient-domains-of-mystery-181815.html',
 'https://www.giantbomb.com/ancient-domains-of-my

## Scrape the corpus

With the functions above we can collect a corpus.

In [80]:
def scrape(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
    }
    try:
        print(__message('Scraping {} ...'.format(url)))
        response = requests.get(url, headers=headers, timeout=(9.1, 12.1))
    except Exception as e:
        print(__failure('Failed to load {}'.format(url)))
        print(e)
        return None
    
    html = response.text
    if html and any(word in html.lower() for word in ['tutorial']):
        return None
    if html and any(word in html.lower() for word in ['rogue', 'procedural', 'generation', 'interview', 'mortem', 'review', 'history', 'develop', 'idea', 'inspir']):
        print(__message('Found article'))
        soup = bs4.BeautifulSoup(response.text, 'lxml')
        selections = soup.select('body > p') + soup.select('div > p') + soup.select('table td')
        content = [node.text.strip() for node in selections]
        return ''.join(content)
    return None

In [71]:
# For Roguelike games, we build a corpus with RogueTemple and DuckDuckGo
corpus = []
    
for index, roguelike in roguelikes.iterrows():
    print(roguelike)
    title = roguelike['Name']
    if not isinstance(title, str):
        continue
    text = []
    
    rogue_temple = scrape_mediawiki_url(roguelike['RogueTemple'])
    text.append(rogue_temple)

    developers = str(roguelike['Developer']).replace(',', ' OR ')
    links = scrape_duckduckgo(title, developers)
    
    for link in links[:10]:
        if 'roguebasin.roguelikedevelopment.org' in link \
            or 'roguebasin.com' in link \
            or 'wikipedia' in link:
            continue
        content = scrape(link)
        if content:
            text.append(content)
    
    corpus.append({"title": title, "text": text})
  
save_json('corpus.json', corpus)

b'   |MSG| Loaded corpus.json'
Name                                                  100 Rogues
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                   http://www.100rogues.com/
Released                                              2010/05/06
Updated                                               2010/05/06
Developer                                         Dinofarm Games
Theme                                                    Fantasy
Influences                                                 Rogue
Name: 0, dtype: object
http://duckduckgo.com/html/?q=%22100%20Rogues%22%20AND%20Dinofarm%20Games%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping http://www.dinofarmgames.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://100rogues.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.facebook.com/100-Rogues-109776630046/ ...'
b'   |MSG| Scraping https://www.patreon.c

http://duckduckgo.com/html/?q=%223089%22%20AND%20Phr00t%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://www.youtube.com/watch?v=atp6rqHoAec ...'
b'   |MSG| Scraping https://store.steampowered.com/video/263360?l=czech ...'
b'   |MSG| Scraping http://small-games.info/?go=game&c=4&i=12408 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://steamcommunity.com/app/414510/discussions/0/458606877314643131/ ...'
b'   |MSG| Scraping https://translate.google.ru/ ...'
b'   |MSG| Scraping https://indiegamereviewer.com/review-3089-an-rpg-roguelike-shooter-by-indie-dev-phr00t/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://twitter.com/phr00t_/status/951280635439669253 ...'
b'   |MSG| Scraping https://www.reddit.com/r/3089/ ...'
b'   |MSG| Scraping http://3089game.wikia.com/ ...'
b'   |MSG| Scraping https://www.facebook.com/IndieGameHq/?ref=py_c ...'
Name                                                      7KBRLL
RogueTe

b'   |MSG| Scraping https://www.thisisbarry.com/single-post/2016/10/26/Donnie-Darko-2001-Full-Plot-and-Ending-Explained ...'
Name                                                    Alphaman
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                                         NaN
Released                                              1995/00/00
Updated                                               1995/00/00
Developer                                       Jeffrey R. Olson
Theme                                           Post-Apocalyptic
Influences                                                   NaN
Name: 11, dtype: object
http://duckduckgo.com/html/?q=%22Alphaman%22%20AND%20Jeffrey%20R.%20Olson%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://archive.org/details/Alphaman11 ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=6y_U3ra7TfA ...'
b'   |MSG| Scraping https://www.lds.org/gen

b'   |MSG| Scraping https://www.choiceofgames.com/2016/11/end-game-and-victory-design/ ...'
b'   |MSG| Scraping https://www.coursera.org/lecture/gamification/1-5-history-of-gamification-7Wp4p ...'
Name                         Anoxic Depths: Caves of the Yendori
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                    http://studiotectorum.com/anoxic-depths/
Released                                              2015/06/03
Updated                                               2015/08/16
Developer                             Studio Tectorum (Tectorum)
Theme                                          Underwater, SCUBA
Influences                                         Anoxic Depths
Name: 17, dtype: object
http://duckduckgo.com/html/?q=%22Anoxic%20Depths%3A%20Caves%20of%20the%20Yendori%22%20AND%20Studio%20Tectorum%20%28Tectorum%29%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://www.youtube.com/watch?v

b'   |MSG| Scraping https://www.reddit.com/r/roguelikes/comments/79im83/break_out_of_harekas_underground_version_04/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://github.com/anaseto/boohu/blob/master/README.md ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://archive.org/details/boohu-0.5 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.reddit.com/r/roguelikedev/comments/70mhce/i_wrote_a_short_roguelike_named_boohu_in_go_and_i/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://linuxfr.org/ ...'
b'   |MSG| Scraping https://linuxfr.org/news ...'
Name                                                        BOSS
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link           https://web.archive.org/web/20130118205257/htt...
Released                                              1990/11/00
Updated                                               2012/10/12
Developer                           Robert Gulledge, Jason Black
Theme          

http://duckduckgo.com/html/?q=%22Cardinal%20Quest%22%20AND%20Ido%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://gamedevelopment.tutsplus.com/articles/cardinal-quest-how-i-made-60000-from-my-indie-game--gamedev-12073 ...'
b'   |MSG| Scraping http://cardinal-quest-2.wikia.com/wiki/Cardinal_Quest ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://steamcommunity.com/app/378030/discussions/0/1489992080500826687/?l=russian ...'
b'   |MSG| Scraping http://y8.games/cardinal-quest-2/ ...'
b'   |MSG| Scraping http://blog.tametick.com/2011/12/cardinal-quest-12-and-whats-next.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://test.keygames.com/cardinal-quest-2-game/ ...'
b'   |MSG| Scraping http://www.kizi.land/cardinal-quest.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.pcgamer.com/cardinal-quest/ ...'
b'   |MSG| Scraping http://www.daliengames.com/game/278/cardinal_quest/ ...'
b'   |MSG| Found

b'   |MSG| Scraping https://www.classicgames.me/caverns-of-xaskazien.html ...'
b'   |MSG| Scraping http://www.old-games.com/download/3903/caverns-of-xaskazien ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://alchetron.com/Caverns-of-Xaskazien ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.thefullwiki.org/Caverns_of_Xaskazien ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://en.wikibedia.ru/wiki/Caverns_of_Xaskazien ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://vikimy.com/l-en/Caverns_of_Xaskazien ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://dictionary.sensagent.com/caverns%20of%20xaskazien/en-en/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://goldenageofgames.com/caverns-of-xaskazien/ ...'
b'   |MSG| Found article'
Name                                                 Chasm Lords
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                      https://chasmlords.com
Released  

b'   |MSG| Scraping https://translate.google.ru/ ...'
b'   |MSG| Scraping https://www.rogerebert.com/demanders/lodge-49-is-interesting-but-undercooked-showcase-for-wyatt-russell ...'
b'   |MSG| Scraping https://www.therussell.com.au/ ...'
b'   |MSG| Scraping https://crpgaddict.blogspot.com/2010/02/rogue-story-and-gameplay.html ...'
b'   |MSG| Found article'
Name                                                     Cogmind
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                       http://www.gridsagegames.com/cogmind/
Released                                              2015/05/19
Updated                                               2018/05/08
Developer                              Grid Sage Games (Kyzrati)
Theme                                                     Sci-Fi
Influences                                            Battletech
Name: 39, dtype: object
http://duckduckgo.com/html/?q=%22Cogmind%22%20AND%20Grid%20Sage%20Games%20%28Kyzrati%29%20AND%20

http://duckduckgo.com/html/?q=%22CryptoRl%22%20AND%20Gornova%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://gamejolt.com/games/cryptorl2/106921 ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=3iim3CCygU0 ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=fLJBzhcSWTk ...'
b'   |MSG| Scraping https://www.theguardian.com/technology/gamesblog/2012/dec/06/video-games-as-art ...'
b'   |MSG| Scraping http://www.java-gaming.org/index.php?;topic=36835.0 ...'
b'   |MSG| Scraping https://my.vanderbilt.edu/developmentalpsychologyblog/2014/04/effect-of-video-games-on-child-development/ ...'
b'   |MSG| Scraping https://www.proprofs.com/quiz-school/story.php?title=basic-world-history-quiz ...'
b'   |MSG| Scraping https://devhub.io/developer/Gornova ...'
b'   |MSG| Found article'
Name                                                   CryptoRl2
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link           htt

b'   |MSG| Scraping https://biginterview.com/blog/2013/10/teamwork-interview-questions.html ...'
b'   |MSG| Scraping https://www.facebook.com/shawndecker ...'
b'   |MSG| Scraping https://wikiext.com/dsg-decker-saved-game ...'
b'!!FAIL!! Failed to load https://wikiext.com/dsg-decker-saved-game'
HTTPSConnectionPool(host='wikiext.com', port=443): Max retries exceeded with url: /dsg-decker-saved-game (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')],)",),))
Name                                                   Deliantra
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                   http://www.deliantra.net/
Released                                              2006/05/30
Updated                                               2017/06/01
Developer      The Deliantra Development Team, Marc Lehmann, ...
Theme                                                    Fantasy
Infl

Name                                         Doom, the Roguelike
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                 http://doom.chaosforge.org/
Released                                              2003/00/00
Updated                                               2013/03/13
Developer                                     Kornel Kisielewicz
Theme                                                       Gore
Influences                                                  Doom
Name: 56, dtype: object
http://duckduckgo.com/html/?q=%22Doom%2C%20the%20Roguelike%22%20AND%20Kornel%20Kisielewicz%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping http://www.roguelikeradio.com/2015/12/episode-112-interview-with-kornel.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://archive.org/details/Doom_the_Roguelike ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://chaosforge.org/presskit/ ...'


b'   |MSG| Found article'
b'   |MSG| Scraping https://steamcommunity.com/discussions/forum/12/1711816076697243697/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.boardgamegeek.com/thread/1945882/thoughts-and-questions ...'
b'   |MSG| Scraping https://blackmod.net/threads/game-dungeon-blade-v2-0-0-mod-free-vip-14-20000-diamonds-x-10-dmg-no-cdtime-new-update-04-08-2018.716/ ...'
b'   |MSG| Scraping https://wiki2.org/en/Dungeon_Hack ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://ru.modapkdown.com/dandy-dungeon-hack-and-cheats/launchstudio-wqafa-dandy_dungeon/ ...'
b'   |MSG| Scraping https://www.turkaramamotoru.com/en/dungeon-hack-2170.html ...'
b'   |MSG| Found article'
Name                                    Dungeon Monkey Unlimited
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                 http://www.gearheadrpg.com/
Released                                              2010/00/00
Updated                            

b'   |MSG| Scraping http://www.openculture.com/2014/06/samuel-beckett-drives-andre-the-giant-to-school.html ...'
b'!!FAIL!! Failed to load http://www.openculture.com/2014/06/samuel-beckett-drives-andre-the-giant-to-school.html'
HTTPConnectionPool(host='www.openculture.com', port=80): Read timed out. (read timeout=12.1)
Name                              Elona: Eternal League of Nefia
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                         http://ylvania.org/
Released                                              2008/01/31
Updated                                               2009/05/17
Developer                                                    Noa
Theme                                                    Fantasy
Influences                  ADOM, AngBand, NetHack, Falcon's Eye
Name: 67, dtype: object
http://duckduckgo.com/html/?q=%22Elona%3A%20Eternal%20League%20of%20Nefia%22%20AND%20Noa%20AND%20game%20AND%20%28interview%20OR%20morte

b'   |MSG| Scraping http://www.roguetemple.com/7drl/2016/ ...'
b'   |MSG| Scraping https://mashable.com/2016/06/14/game-of-thrones-lannister-theory ...'
b'   |MSG| Scraping https://winteriscoming.net/2016/12/27/the-rich-history-of-sigils-in-game-of-thrones/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.jesperjuul.net/ludologist/2004/02/22/the-definitive-history-of-games-and-stories-ludology-and-narratology/ ...'
Name                                               Fabula Divina
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                 http://www.fabuladivina.com
Released                                              2012/01/23
Updated                                               2012/01/30
Developer                                     Cannon Technolgies
Theme                                                    Fantasy
Influences                   Ultima, Torchlight, ADOM, Minecraft
Name: 73, dtype: object
http://duckduckgo.com/ht

b'   |MSG| Scraping https://www.theguardian.com/science/head-quarters/2013/sep/19/neuroscience-psychology ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=P4Um97AUqp4 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.9news.com.au/national/2018/08/06/10/14/sydney-man-admits-foreign-fighting-charge ...'
b'   |MSG| Scraping http://rec.games.roguelike.development.narkive.com/NpsRuvvU/7drl-success-well-you-judge-flatlinerl ...'
b'   |MSG| Found article'
Name                              FOR - A Fallout-like Roguelike
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                               https://github.com/lcarst/for
Released                                              1000/00/00
Updated                                               1000/00/00
Developer                                           User:w00tles
Theme                                     Post-apocalyptic scifi
Influences                                        Fallout, Crawl
Name: 

Name                                                Gatecrashers
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link            http://forums.te4.org/viewtopic.php?f=40&t=39055
Released                                              2013/11/08
Updated                                               2013/11/26
Developer                                            GuyNamedJoe
Theme                                                    Fantasy
Influences                           TOME 4, Shiren the Wanderer
Name: 84, dtype: object
http://duckduckgo.com/html/?q=%22Gatecrashers%22%20AND%20GuyNamedJoe%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://www.thebalancecareers.com/manager-interview-questions-and-best-answers-2061211 ...'
b'   |MSG| Scraping https://te4.org/games/gatecrashers ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://icebreakerideas.com/who-am-i-game/ ...'
b'   |MSG| Scraping https://www.wrike.co

http://duckduckgo.com/html/?q=%22Gore%20Grounds%22%20AND%20Badscribbler%20Badscribbler%40gmail.com%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
Name                                                        Grid
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                             http://www.caffeineoverdose.me/
Released                                              2015/00/00
Updated                                               2015/01/16
Developer                             Andrew Wright (aka roocey)
Theme                                            Science Fiction
Influences         Brogue, The Binding of Isaac, Slimy Lichmummy
Name: 91, dtype: object
http://duckduckgo.com/html/?q=%22Grid%22%20AND%20Andrew%20Wright%20%28aka%20roocey%29%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://andrewarticlesandstories.wordpress.com/ ...'
b'   |MSG| Scraping https://www.yout

http://duckduckgo.com/html/?q=%22Hack%22%20AND%20Jay%20Fenlason%20OR%20%20//%20Kenny%20Woodland%20OR%20%3Cbr/%3E%20//%20Mike%20Thome%3Cbr/%3E%20//%20and%20Jon%20Payne%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://archive.org/details/HACK103 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.qwant.com/game/hack?l=de ...'
b'   |MSG| Scraping https://nethackwiki.com/wiki/Jay_Fenlason%27s_Hack ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://groups.google.com/d/topic/rec.games.hack/3qVaxNFkcVc ...'
b'   |MSG| Scraping http://www.delorie.com/gnu/docs/nethack/nethack.6.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://nethack.wikia.com/wiki/Game_history ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.mankier.com/6/hack ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://nethackwiki.com/wiki/Game_history ...'
b'   |MSG| Found article'
Name                                    

b'   |MSG| Found article'
b'   |MSG| Scraping https://translate.google.ru/ ...'
b'   |MSG| Scraping https://www.steamkiwi.com/app/434880/history ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://m-torrent.org/load/pc_games/easy_game/hieroglyphika_2016_pc_124_repack_ot_other_39_s/99-1-0-24420 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://gamess.org/hieroglyphika-review/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://gamesbae.com/hieroglyphika-v130/ ...'
b'   |MSG| Scraping http://gamestorrent.co/hieroglyphika.html ...'
b'   |MSG| Scraping https://rawg.io/games/hieroglyphika ...'
b'   |MSG| Found article'
Name                                                   HumFallRL
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                  http://vedrl.wordpress.com
Released                                              2013/03/07
Updated                                               2014/09/11
Developer                       

http://duckduckgo.com/html/?q=%22iNetHack%22%20AND%20Dirk%20Zimmermann%20%28port%29%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://rawg.io/games/inethack2 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://futureshocksoftware.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://itunes.apple.com/pt/app/inethack2/id962114968 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.youtube.com/watch?v=S-BAZJE6CF4 ...'
b'   |MSG| Scraping http://game.feng.com/game/read/index-id-1616056.shtml ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.facebook.com/ZIMMERMANN.Sydney ...'
b'   |MSG| Scraping http://blog.dirkz.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://appsftw.com/app/inethack2 ...'
b'   |MSG| Found article'
Name                                                Infra Arcana
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                  https://sit

b'   |MSG| Found article'
b'   |MSG| Scraping https://attnam.com/ ...'
b'   |MSG| Scraping http://www.pcgaming.ws/viewgame.php?game=iter_vehemens_ad_necem ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.gamecupid.com/games/ivan-iter-vehemens-ad-necem ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://archive.org/details/IvanWin050 ...'
b'   |MSG| Found article'
Name                                         JauntTrooper series
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                                         NaN
Released                                              1994/00/00
Updated                                               1000/00/00
Developer                                        David Scheifler
Theme                                            Science Fiction
Influences                                         Doomsday 2000
Name: 113, dtype: object
http://duckduckgo.com/html/?q=%22JauntTrooper%20series%22%20AND%20Da

b'   |MSG| Found article'
b'   |MSG| Scraping http://www.downloadcollection.com/freeware/039cause.htm ...'
b'!!FAIL!! Failed to load http://www.downloadcollection.com/freeware/039cause.htm'
HTTPSConnectionPool(host='www.downloadcollection.com', port=443): Max retries exceeded with url: /freeware/039cause.htm (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x10f127400>: Failed to establish a new connection: [Errno 61] Connection refused',))
b'   |MSG| Scraping https://osdn.net/users/bowkenken/ ...'
b'   |MSG| Scraping http://www.downloadcollection.com/freeware/labyrinths.htm ...'
b'!!FAIL!! Failed to load http://www.downloadcollection.com/freeware/labyrinths.htm'
HTTPSConnectionPool(host='www.downloadcollection.com', port=443): Max retries exceeded with url: /freeware/labyrinths.htm (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x110a13080>: Failed to establish a new connection: [Errno 61] Connection refused',))

b'   |MSG| Found article'
b'   |MSG| Scraping https://www.gofundme.com/legerdemain-source-code-ransom ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.tumblr.com/search/Legerdemain ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://blog.roguetemple.com/2008/01/06/legerdemain/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.smashwords.com/profile/view/NathanJerpe ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.wowhead.com/?quest=13101 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.thefullwiki.org/Legerdemain ...'
Name                                      Linley's Dungeon Crawl
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                http://www.dungeoncrawl.org/
Released                                              1995/00/00
Updated                                               2005/00/00
Developer                                 Linley Henzell, others
Theme                    

b'   |MSG| Scraping https://www.youtube.com/watch?v=FL1AjvdIJZA ...'
b'   |MSG| Scraping http://www.ttlg.com/FORUMs/showthread.php?t=51580 ...'
b'   |MSG| Scraping https://www.supercheats.com/xbox360/questions/theelderscrollsivoblivion/90097/i-m-on-a-quest-of-the-mage-gui.htm ...'
b'   |MSG| Scraping http://www.ironworksforum.com/forum/showthread.php?t=30783 ...'
b'!!FAIL!! Failed to load http://www.ironworksforum.com/forum/showthread.php?t=30783'
HTTPConnectionPool(host='www.ironworksforum.com', port=80): Max retries exceeded with url: /forum/showthread.php?t=30783 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x1107e6d68>, 'Connection to www.ironworksforum.com timed out. (connect timeout=9.1)'))
b'   |MSG| Scraping https://forums.elderscrollsonline.com/en/discussion/374307/undaunted-fighters-and-mage-guild-leveling ...'
b'   |MSG| Scraping http://anewagecampaign.wikia.com/wiki/The_Council_of_Nine_and_the_Mage_Guild ...'
b'   |MSG| Scraping https://www.he

b'   |MSG| Found article'
b'   |MSG| Scraping https://www.mobygames.com/game/dos/mag ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.dosgames.com/g_rpg.php ...'
b'   |MSG| Found article'
Name                                                       Miner
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                               http://chomikuj.pl/revelati00
Released                                              2016/00/00
Updated                                               2016/00/00
Developer                                               Revelati
Theme                                          fantasy, survival
Influences                                             Gem miner
Name: 135, dtype: object
http://duckduckgo.com/html/?q=%22Miner%22%20AND%20Revelati%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://www.youtube.com/watch?v=P9JYgBmfL-Y ...'
b'   |MSG| Scraping https://steamcommunity.c

http://duckduckgo.com/html/?q=%22NetHack%22%20AND%20The%20NetHack%20DevTeam%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://nethackwiki.com/wiki/DevTeam ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=SjuTyJlgLJ8 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.nethack.org/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://nethack.ru.uptodown.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://zenway.ru/page/nethack ...'
b'   |MSG| Scraping https://github.com/NetHack ...'
b'   |MSG| Scraping https://everipedia.org/wiki/NetHack/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://procedural-generation.isaackarth.com/2016/04/15/an-interview-with-the-nethack-devteam-legendarily.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.moregameslike.com/nethack/ ...'
b'   |MSG| Found article'
Name                                                   NitroHack
RogueTemple    

b'   |MSG| Found article'
b'   |MSG| Scraping http://www.davidkinder.co.uk/omega.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://archive.org/details/Omega_1020 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://interviewpenguin.com/interview-questions-and-answers/ ...'
Name                                                    Oangband
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                    http://www.oangband.com/
Released                                              1998/08/13
Updated                                               2006/02/10
Developer             Bahman Rabii (''bahman''@''oangband.com'')
Theme                                                    Fantasy
Influences                                               Angband
Name: 147, dtype: object
http://duckduckgo.com/html/?q=%22Oangband%22%20AND%20Bahman%20Rabii%20%28%27%27bahman%27%27%40%27%27oangband.com%27%27%29%20AND%20game%20AND%20%28interview%20OR%

b'   |MSG| Found article'
b'   |MSG| Scraping https://plus.google.com/+WhoCaresGamingReviews/posts/EEkNTTPh6Uq ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.youtube.com/watch?v=K8dxc807R-4 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.youtube.com/watch?v=hb-PaRwieh0 ...'
b'   |MSG| Scraping http://www.zincland.com/powder/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://appadvice.com/game/app/powder/347454871 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://AppAgg.com/developer/jeff-lait/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.gamebrew.org/wiki/Powder ...'
Name                                                       PRIME
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                     http://prime-rogue.net/
Released                                              2011/03/03
Updated                                               2017/03/04
Developer           Psiweapon<br/>

http://duckduckgo.com/html/?q=%22PWMAngband%22%20AND%20PowerWyrm%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping http://angband.oook.cz/forum/showthread.php?t=8459 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.youtube.com/watch?v=_4YfU2s0jik ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://angband.oook.cz/forum/showthread.php?t=4722 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.wowhead.com/?quest=11238 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.careerprofiles.info/conflict-resolutions-interview-questions.html ...'
b'   |MSG| Scraping https://www.wrike.com/blog/team-building-games/ ...'
b'   |MSG| Scraping https://biginterview.com/blog/2013/09/behavioral-interview-questions-conflict.html ...'
b'   |MSG| Scraping http://www.coolmath-games.com/0-evil-wyrm ...'
b'   |MSG| Scraping https://www.gamezop.com/evil-wyrm/ ...'
Name                                       Quest for the U

b'   |MSG| Scraping https://playdos.games/game/play-reaping-the-dungeon-online/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.xtdos.com/reaping-the-dungeon ...'
Name                                                   Red Rogue
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                     http://www.redrogue.net
Released                                              2012/12/04
Updated                                               2016/03/10
Developer                                            Aaron Steed
Theme                                   Traditional / Platformer
Influences                                 Rogue, Hack, Spelunky
Name: 164, dtype: object
http://duckduckgo.com/html/?q=%22Red%20Rogue%22%20AND%20Aaron%20Steed%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://www.youtube.com/watch?v=DASCE8BbvOI ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.f

b'   |MSG| Found article'
b'   |MSG| Scraping https://archive.org/details/rfk_original_submission_1600000.3-robot ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=ntVjLNl2_ro ...'
b'   |MSG| Found article'
Name                                                      Rodney
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                http://slashware.net/rodney/
Released                                              2013/05/14
Updated                                               2013/06/19
Developer                                                  Slash
Theme                                                    Fantasy
Influences                                         DoomRL, Rogue
Name: 169, dtype: object
http://duckduckgo.com/html/?q=%22Rodney%22%20AND%20Slash%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://www.youtube.com/watch?v=DERUKk4tCOc ...'
b'   |MSG| Scraping https://www.yo

b'   |MSG| Scraping http://southerncrossyachting.com.au/prices-and-dates-for-theory-courses/ ...'
b'   |MSG| Scraping https://www.eastsail.com.au/ ...'
b'   |MSG| Scraping http://www.yachttrainingvictoria.com.au/ ...'
b'   |MSG| Scraping https://www.theguardian.com/science/head-quarters/2013/sep/19/neuroscience-psychology ...'
Name                                                 Rogue Touch
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                   http://www.chronosoft.com/roguetouch.html
Released                                              2009/02/19
Updated                                               2010/07/09
Developer                                             ChronoSoft
Theme                                                        NaN
Influences                                                 Rogue
Name: 174, dtype: object
http://duckduckgo.com/html/?q=%22Rogue%20Touch%22%20AND%20ChronoSoft%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20hi

b'   |MSG| Found article'
b'   |MSG| Scraping https://www.history.com/topics/the-khmer-rouge ...'
b'   |MSG| Scraping https://www.themuse.com/advice/how-to-answer-the-31-most-common-interview-questions ...'
b'   |MSG| Scraping https://www.theguardian.com/news/blog/2009/feb/16/cambodia-khmer-rouge ...'
b'   |MSG| Scraping https://www.thoughtco.com/history-of-photography-and-the-camera-1992331 ...'
b'   |MSG| Scraping https://www.enotes.com/homework-help/why-important-study-history-explain-your-answer-389341 ...'
b'   |MSG| Scraping https://www.job-interview-site.com/it-interview-questions-and-answers.html ...'
Name                               Ruoeg, a minimalist roguelike
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                     https://github.com/SanchezSobrino/Ruoeg
Released                                              2013/05/03
Updated                                               2013/05/03
Developer                               Santiago Sanchez S

b'   |MSG| Scraping http://www.html5gamedevs.com/topic/9706-wip-shiny-gauntlet/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://spritewrench.itch.io/shinygauntlet ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://gauntletgame.tumblr.com/post/131100261546/glen-again-developer-behind-shiny-gauntlet-and ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.youtube.com/watch?v=-QAmcAqL1Mw ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://rawg.io/games/shiny-gauntlet ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://gamesmojo.com/games/action/shiny-gauntlet ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://steamcommunity.com/sharedfiles/filedetails/?id=428597835 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://gameseira.com/games/action/shiny-gauntlet ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://egtheory.wordpress.com/2015/03/02/ipd/ ...'
Name                                                 Shoot First
RogueTempl

b'   |MSG| Scraping https://www.quora.com/What-is-the-difference-between-half-and-one-half?share=1 ...'
b'   |MSG| Scraping https://znanija.com/task/19804406 ...'
b'   |MSG| Scraping https://www.prizant-law.com/asylum-interview-and-questions/ ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=L8pSoKfjsgw ...'
b'   |MSG| Scraping https://gaming.stackexchange.com/q/13674 ...'
b'   |MSG| Scraping https://www.enotes.com/homework-help/why-important-study-history-explain-your-answer-389341 ...'
b'   |MSG| Scraping https://planningwithkids.com/2009/11/17/characteristics-of-three-and-a-half-year-old-behavior/ ...'
b'   |MSG| Scraping http://psc.dss.ucdavis.edu/sommerb/sommerdemo/interview/strengths.htm ...'
b'   |MSG| Scraping https://steamcommunity.com/sharedfiles/filedetails/?id=903981105 ...'
b'   |MSG| Scraping http://career.guru99.com/top-25-interview-questions-for-game-developer/ ...'
Name                                                 SilverQuest
RogueTemple    http://roguebasin.

b'   |MSG| Scraping https://prezi.com/kinkwv5f0fv-/compare-maleficent-and-sleeping-beauty/ ...'
b'   |MSG| Scraping http://www.dltk-teach.com/rhymes/sleeping-beauty/story.htm ...'
b'   |MSG| Scraping http://www.gameslist.com/Sleeping-Beauty-And-Briar-Beauty ...'
b'   |MSG| Scraping https://www.ranker.com/list/details-from-the-original-sleeping-beauty/genevieve-carlton ...'
Name                                                Space Grunts
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                      http://spacegrunts.com
Released                                              2015/09/15
Updated                                               2015/11/08
Developer                                            Orangepixel
Theme                                                     Sci-fi
Influences                              Brogue, Half-way, DoomRL
Name: 196, dtype: object
http://duckduckgo.com/html/?q=%22Space%20Grunts%22%20AND%20Orangepixel%20AND%20

b'   |MSG| Scraping https://www.historytoday.com/paul-lay/no-more-heroes-thomas-cromwell-and-thomas-more ...'
Name                                         Star Wars Miniature
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                         https://github.com/ondras/star-wars
Released                                              2012/11/20
Updated                                               2012/11/20
Developer                                                 Ondras
Theme                                                  Star Wars
Influences                                                   NaN
Name: 201, dtype: object
http://duckduckgo.com/html/?q=%22Star%20Wars%20Miniature%22%20AND%20Ondras%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://github.com/ondras/star-wars ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://rover.ebay.com/rover/1/711-53200-19255-0/1?icep_ff3=9&pub=5574933636&tooli

b'   |MSG| Found article'
Name                                       Tale written in bytes
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link           http://aniolycyberprzestrzeni.pl/Projects/Deta...
Released                                              2018/01/12
Updated                                               2018/01/12
Developer                                        Dawid Farbaniec
Theme                                              Cyber-fantasy
Influences                        Roguelike and Platformer games
Name: 206, dtype: object
http://duckduckgo.com/html/?q=%22Tale%20written%20in%20bytes%22%20AND%20Dawid%20Farbaniec%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://www.kongregate.com/games/iamdavid_f/tale-written-in-bytes ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.ted.com/talks/david_christian_big_history ...'
b'   |MSG| Scraping https://aniolycyberprzestrzeni.pl/Projects

http://duckduckgo.com/html/?q=%22Teemu%22%20AND%20Paul%20Pekkarinen%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://sites.google.com/view/teemupekkarinen/ ...'
b'   |MSG| Scraping https://www.cs.helsinki.fi/teemu.roos ...'
b'   |MSG| Scraping https://twitter.com/teemupekkarinen ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=3iim3CCygU0 ...'
b'   |MSG| Scraping https://www.barstoolsports.com/philadelphia/correct-opinion-paul-kariya-and-teemu-selanne-are-the-most-nostalgic-duo-in-hockey-history/ ...'
b'   |MSG| Scraping https://your-teachers.ru/anglijskij/testy-ege/grammatika-3/01/test-02-po-ege-anglijskij-yazyk-otvet ...'
b'   |MSG| Scraping http://lang-8.com/1118120/journals/52524489613078084969460757647161494634 ...'
b'   |MSG| Scraping https://www.tumblr.com/search/teemu%20pekkarinen ...'
b'   |MSG| Scraping https://sports.yahoo.com/teemu-selanne-paul-kariya-hall-fame-friendship-214333920.html ...'
Name           

b'   |MSG| Found article'
b'   |MSG| Scraping https://www.youtube.com/watch?v=AoWl9oI4B7g ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.reddit.com/r/tolagal/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.theglobeandmail.com/sports/article-trump-and-the-black-athlete-witnessing-one-of-the-oldest-and-ugliest/ ...'
b'   |MSG| Scraping https://envul.com/the-depths-of-tolagal-change-notes/ ...'
b'   |MSG| Scraping https://www.pcgamesn.com/the-depths-of-tolagal ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.metacritic.com/game/pc/the-depths-of-tolagal ...'
b'   |MSG| Found article'
Name                                                 The Dungeon
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                        http://www.jtiv.net/
Released                                              2010/10/31
Updated                                               2010/12/18
Developer                                  

b'   |MSG| Scraping https://www.bnd.com/living/liv-columns-blogs/answer-man/article187170293.html ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=o-qc-s3OjKM ...'
b'   |MSG| Scraping http://www.tlc.com/tv-shows/90-day-fiance/90-day-fiance-features/mike-aziza-where-are-they-now-interview/ ...'
b'   |MSG| Scraping https://theinterviewguys.com/about-the-interview-guys/ ...'
b'   |MSG| Scraping http://theideas.wikia.com/wiki/Mike_and_Sulley:_The_Movie ...'
b'   |MSG| Scraping http://www.dailymail.co.uk/news/article-6025547/US-warns-Russia-enforcing-North-Korea-sanctions.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.michaelpage.ae/advice/career-advice/job-interview-tips/top-10-interview-questions-and-how-answer-them ...'
b'   |MSG| Scraping http://www.usanetwork.com/suits/blog/14-times-we-were-jealous-of-harvey-and-mikes-friendship ...'
b'   |MSG| Scraping http://weareisrael.org/2015/04/13/the-land-and-the-seed/ ...'
Name                                       

b'   |MSG| Scraping http://www.thetempleoftorment.net/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://fearandhunger.wikia.com/wiki/The_Temple_of_Torment ...'
b'   |MSG| Scraping https://forums.roguetemple.com/index.php?topic=3797.0 ...'
b'!!FAIL!! Failed to load https://forums.roguetemple.com/index.php?topic=3797.0'
HTTPSConnectionPool(host='forums.roguetemple.com', port=443): Max retries exceeded with url: /index.php?topic=3797.0 (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')],)",),))
b'   |MSG| Scraping http://bay12forums.com/smf/index.php?topic=158611.0 ...'
b'   |MSG| Scraping https://www.moddb.com/games/the-temple-of-torment ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.theglobeandmail.com/sports/article-trump-and-the-black-athlete-witnessing-one-of-the-oldest-and-ugliest/ ...'
b'   |MSG| Scraping https://www.eater.com/2018/8/6/17631452/ruby-tandoh-sugar-history-kara-wa

http://duckduckgo.com/html/?q=%22TileRogue%22%20AND%20Donnie%20Russell%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://sites.google.com/site/donnierussellii/tilerogue ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://donrussellhomes.com.au/homes/ ...'
b'   |MSG| Scraping https://tilerogue.in.uptodown.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://tilerogue.id.uptodown.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.facebook.com/donrussellhomes/ ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=cwiJlQUrwdA ...'
b'   |MSG| Scraping https://www.rogerebert.com/demanders/lodge-49-is-interesting-but-undercooked-showcase-for-wyatt-russell ...'
b'   |MSG| Scraping https://www.therussell.com.au/ ...'
Name                                                     TomeNET
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                      http://www.to

http://duckduckgo.com/html/?q=%22Ularn%22%20AND%20Phil%20Cordier%20OR%20%20Josh%20Brandt%20OR%20%20Josh%20Bressers%20OR%20%20David%20Richerby%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://translate.google.ru/ ...'
b'   |MSG| Scraping http://larn.org/history/history.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://ru-ru.facebook.com/public/Josh-Brandt ...'
b'   |MSG| Scraping https://twitter.com/joshbrandt ...'
b'   |MSG| Scraping https://www.job-interview-site.com/chef-interview-questions-and-answers.html ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=TzCay7qcPyM ...'
b'   |MSG| Scraping https://www.ranker.com/review/josh-hutcherson/1326575 ...'
b'   |MSG| Scraping https://github.com/joshbressers/ularn ...'
b'   |MSG| Scraping https://teachershelp.ru/verbickaya-m-v-forward-angliyskiy-yazyk-dlya-6-kl-2/ ...'
Name                                                  UltraRogue
RogueTemple    http://rogueb

b'   |MSG| Scraping http://quiz-questions.net/ ...'
b'   |MSG| Scraping https://conversationstartersworld.com/21-questions-game/ ...'
Name                                                UnReal World
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                   http://www.unrealworld.fi
Released                                              1992/00/00
Updated                                               2018/04/01
Developer                                          Sami Maaranen
Theme                                                   Survival
Influences                                                   NaN
Name: 243, dtype: object
http://duckduckgo.com/html/?q=%22UnReal%20World%22%20AND%20Sami%20Maaranen%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://www.youtube.com/watch?v=Fibxbx31LUs ...'
b'   |MSG| Scraping http://www.unrealworld.fi/urw_reviews.html ...'
b'   |MSG| Found article'
b'

b'   |MSG| Scraping https://apkpure.com/web-raid-mobile/de.agsteiner.android.webraid ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.youtube.com/watch?v=Mfo6qx_tNbA ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://apkga.com/de.agsteiner.android.webraid ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.brothergames.com/android/web_raid_mobile-150198.html ...'
b'   |MSG| Scraping https://sameapk.com/web-raid-mobile/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://androidappsapk.co/detail-web-raid-mobile/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.similarplay.com/karlheinz_agsteiner/web_raid_mobile/apps/de.agsteiner.android.webraid ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://1apk.co/products-info/web-raid-mobile-apk-file ...'
Name                                                   WindTales
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link           http://www.windowsphone.com/en-us/store/

b'   |MSG| Scraping https://www.twinfinite.net/2015/05/the-witcher-3-wild-hunt-guide-how-to-make-world-state-decisions/ ...'
b'   |MSG| Scraping http://mossmouth.com/forums/index.php?topic=1555.0;wap2 ...'
b'   |MSG| Scraping https://www.9news.com.au/national/2018/08/06/10/14/sydney-man-admits-foreign-fighting-charge ...'
b'   |MSG| Scraping http://collider.com/game-of-thrones-animated-history-seven-kingdoms/ ...'
b'   |MSG| Scraping http://www.triviaquestionsnow.com/for/sports-trivia ...'
b'   |MSG| Scraping https://gamesdeveloping.com/ ...'
Name                                                       XLarn
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                   http://swinfjord-games.com/LarnII-us.html
Released                                              2015/05/25
Updated                                               2016/07/30
Developer                                        Swinfjord-Games
Theme                                                    Fant

b'   |MSG| Scraping http://www.statemaster.com/encyclopedia/Topi-Ylinen ...'
b'   |MSG| Found article'
Name                                                    ZigClimb
RogueTemple    http://roguebasin.roguelikedevelopment.org/ind...
Link                                            http://flend.net
Released                                              2010/01/10
Updated                                               1000/00/00
Developer                                                  flend
Theme                                                      Egypt
Influences                                     A Journey to Hell
Name: 259, dtype: object
http://duckduckgo.com/html/?q=%22ZigClimb%22%20AND%20flend%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping http://flend.net/zigclimb/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://translate.google.ru/ ...'
b'   |MSG| Scraping https://www.youtube.com/watch?v=9jiYciZ7Npk ...'
b'   |MSG| 

In [83]:
# For Roguelike-like games, we build a corpus with Wikipedia and DuckDuckGo
# corpus = read_json('corpus-roguelike-like.json')
# if not corpus:
corpus = []
    
for index, roguelike in roguelikelikes.iterrows():
    print(roguelike)
    title = roguelike['Name']
    text = []
    
    page = scrape_wiki(title)
    if page:
        text.append(page.content)

    developers = str(roguelike['Developer']).replace(',', ' OR ')
    links = scrape_duckduckgo(title, developers)
    
    for link in links[:20]:
        if 'roguebasin.roguelikedevelopment.org' in link \
            or 'roguebasin.com' in link \
            or 'wikipedia' in link:
            continue
        content = scrape(link)
        if content:
            text.append(content)
    
    corpus.append({"title": title, "text": text})
  
save_json('corpus-roguelike-like.json', corpus)

Name                           ToeJam & Earl
Released                                1991
Updated                                  NaN
Developer     Johnson Voorsanger Productions
Theme                                Fantasy
Influences                               NaN
Name: 0, dtype: object
http://duckduckgo.com/html/?q=%22ToeJam%20%26%20Earl%22%20AND%20Johnson%20Voorsanger%20Productions%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping http://gaming.wikia.com/wiki/ToeJam_%26_Earl_Productions ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://segaretro.org/ToeJam_%26_Earl ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.facebook.com/pages/ToeJam-Earl-Productions/109640125728425 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://toejamandearl.wikia.com/wiki/ToeJam_%26_Earl_in_Panic_on_Funkotron ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.sega-16.com/2005/02/history-of-toejam-earl/ ...'
b'



 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))


http://duckduckgo.com/html/?q=%22Diablo%22%20AND%20Blizzard%20North%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://us.battle.net/forums/en/d3/topic/7415795753 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.diabloii.net/blog/comments/blizzard-north-didnt-know-diablo-is-story-ending ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.diablowiki.net/Blizzard_North ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://topasnew.club/6645-diablo-1996-istoriya-sozdaniya-igry-sozdanie-blizzard-north.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.youtube.com/watch?v=PJDMJaJbRtQ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.gamespot.com/articles/new-diablo-game-project-in-development-blizzard-co/1100-6459406/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://diablo.gamepedia.com/Blizzard_Entertainment ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https:

b'   |MSG| Scraping https://strange-adventures-in-infinite-space.en.uptodown.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.ag.ru/games/strange-adventures-in-infinite-space/review ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.digital-eel.com/sais/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.youtube.com/watch?v=B5IcX0gY3Js ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.myabandonware.com/game/strange-adventures-in-infinite-space-3qy ...'
b'   |MSG| Scraping https://strange-adventures-in-infinite-space.soft112.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.tor.com/2018/08/07/book-reviews-an-informal-history-of-the-hugos-by-jo-walton/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.g4g.it/2009/10/01/strange-adventures-in-infinite-space/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.gamespot.com/reviews/strange-adventures-in-infinite-space-review/1900-2856375/ ...'
b'  

b'   |MSG| Found article'
b'   |MSG| Scraping http://therutor.org/torrent/432207/the-binding-of-isaac-v1.0r10-2011-pc/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://streamers.gamepedia.com/The_Binding_of_Isaac ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://Torrent-Games.net/news/the_binding_of_isaac_edmund_mcmillen_eng/2011-09-29-20262 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.polygon.com/features/2018/5/15/17345246/the-past-present-and-future-of-the-binding-of-isaac ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.pcgame.com/the-binding-of-isaac ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://bestgamer.net/load/3125-the-binding-of-isaac-edmund-mcmillen-eng-p.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://RuTracker.org/forum/viewtopic.php?t=3773360 ...'
b'   |MSG| Scraping https://steamcommunity.com/workshop/filedetails/?id=895960454 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://edmundmcmil

b'   |MSG| Found article'
b'   |MSG| Scraping http://jacksepticeye.wikia.com/wiki/Don%27t_Starve ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://m.youtube.com/user/kleient ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.gamesindustry.biz/articles/2013-01-23-dont-starve-kleis-grand-experiment ...'
b'   |MSG| Found article'
Name               Rogue Legacy
Released                   2013
Updated                     NaN
Developer     Cellar Door Games
Theme                   Fantasy
Influences                  NaN
Name: 11, dtype: object
http://duckduckgo.com/html/?q=%22Rogue%20Legacy%22%20AND%20Cellar%20Door%20Games%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping http://cellardoorgames.com/rogue-legacy-design-notes/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://store.steampowered.com/developer/cellardoorgames/ ...'
b'   |MSG| Scraping https://roguelegacy.gamepedia.com/Cellar_Door_Games ...'
b'   |M

b'   |MSG| Found article'
b'   |MSG| Scraping https://gamesmojo.com/games/action/towerclimb ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://gamemonitoring.net/en/games/towerclimb ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://rawg.io/games/towerclimb ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://gamin.me/developers/davioware-and-quazi ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://gamesystemrequirements.com/game/towerclimb/news-videos ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://nicoblog.org/pc-games/towerclimb-usa/ ...'
b'   |MSG| Scraping http://gamemod4.com/game/20277-towerclimb/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://gamefaqs.gamespot.com/pc/181438-towerclimb ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://steamspy.com/dev/Davioware+and+Quazi ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://gamestorrent.co/towerclimb.html ...'
b'   |MSG| Scraping https://gamejolt.com/games/towerclim

b'   |MSG| Found article'
b'   |MSG| Scraping http://www.ign.com/games/tower-of-guns ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://greenlitgaming.com/tower-of-guns-interview/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://www.gamersdecide.com/pc-games/tower-guns ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.gamecrate.com/reviews/review-tower-guns-reloads-nostalgia/10549 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.pcinvasion.com/tower-guns-review/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://soedesco.com/games/tower-of-guns/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://tvtropes.org/pmwiki/pmwiki.php/VideoGame/TowerOfGuns ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://gamefaqs.gamespot.com/ps4/103022-tower-of-guns ...'
b'   |MSG| Found article'
Name          Crypt of the Necrodancer
Released                          2014
Updated                            NaN
Developer         Brace Yoursel

b'   |MSG| Scraping https://dungeonoftheendless.gamepedia.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://store.steampowered.com/app/744930/Dungeon_of_the_Endless__Digital_Artbook/ ...'
b'   |MSG| Scraping https://www.pcgamer.com/dungeon-of-the-endless/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.keengamer.com/Articles/View/13176 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.youtube.com/watch?v=7X4sVw_42L0 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.facebook.com/dungeonoftheendless/?ref=page_internal ...'
b'   |MSG| Scraping https://twitter.com/amplitude/status/890564938485317633 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.usgamer.net/articles/dungeon-of-the-endless-pc-review ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://steamcommunity.com/games/249050/announcements/detail/72423209268270187 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://mytorrent.xyz/dungeon-of-the-endless/

b'   |MSG| Found article'
b'   |MSG| Scraping https://steamcommunity.com/app/242680/discussions/0/1696049513769195793/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.japantimes.co.jp/news/2018/08/06/national/ican-champions-grass-roots-efforts-persuade-japan-others-support-nuclear-free-world/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.gameskinny.com/google-amp/ybob1/nuclear-throne-review-fish-has-gills-and-i-have-no-skills ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.youtube.com/watch?v=VFTL5T-6Cdc ...'
b'   |MSG| Scraping http://devgamm-talks.com/rami-ismail-nuclear-throne/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.facebook.com/NuclearThroneGame/ ...'
b'   |MSG| Scraping https://waypoint.vice.com/en_us/article/kwzgje/read-an-excerpt-from-120-years-of-vlambeer-and-friends ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://yal.cc/nuclear-throne-u99/ ...'
b'   |MSG| Scraping http://ocean0fgames.com/nuclear-

b'   |MSG| Scraping https://www.gamepressure.com/games/runestone-keeper/z94ac1 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://freegogpcgames.com/815/runestone-keeper/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.runestonekeeper.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://pcgamingwiki.com/wiki/Runestone_Keeper ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://gamefaqs.gamespot.com/pc/137653-runestone-keeper ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://store.steampowered.com/app/339400/Runestone_Keeper/ ...'
b'   |MSG| Scraping https://www.microsoft.com/en-us/p/runestonekeeper/9nn1r85tq39s ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://twitter.com/runestonekeeper ...'
b'   |MSG| Found article'
Name          Skyshine's Bedlam
Released                   2015
Updated                     NaN
Developer        Skyshine Games
Theme          Post-apocalyptic
Influences                  NaN
Name: 28, dtype: object

http://duckduckgo.com/html/?q=%22Enter%20the%20Gungeon%22%20AND%20Dodge%20Roll%20AND%20game%20AND%20%28interview%20OR%20mortem%20OR%20history%20OR%20develop%29
b'   |MSG| Scraping https://twitter.com/dodgerollgames ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://enterthegungeon.gamepedia.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://dodgeroll.com/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://gameranx.com/features/id/48447/article/enter-the-gungeon-interview-with-dodge-rolls-dave-crooks-the-past-present-and-future/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.reddit.com/r/EnterTheGungeon/ ...'
b'   |MSG| Scraping https://steamcommunity.com/app/311690/discussions/0/135507548127470722/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.trueachievements.com/game/Enter-the-Gungeon ...'
b'   |MSG| Scraping https://www.pcgamer.com/enter-the-gungeon-review/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://oceanofga

b'   |MSG| Found article'
b'   |MSG| Scraping https://pediaview.com/openpedia/List_of_roguelikes ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://www.revolvy.com/topic/Chronology%20of%20roguelike%20video%20games&item_type=topic ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://m.localwiki.org/ann-arbor/_explore/list ...'
b'!!FAIL!! Failed to load https://m.localwiki.org/ann-arbor/_explore/list'
HTTPSConnectionPool(host='m.localwiki.org', port=443): Read timed out. (read timeout=12.1)
b'   |MSG| Scraping https://vdocuments.mx/documents/ad-75-2-the-1970s-is-here-and-now.html ...'
b'   |MSG| Scraping https://www.manualtolyf.com/2014/04/joel-lambert-tries-to-evade-philippine.html ...'
b'   |MSG| Scraping https://keithtopping.blogspot.nl/2014/10/i-wasnt-there-i-watched-it-on-telly.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://allevents.in/vienna/2016-08-21 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://vdocuments.site/documents/move-commercia

b'   |MSG| Found article'
b'   |MSG| Scraping https://www.baseball-reference.com/players/r/roberbr01.shtml ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://your-teachers.ru/anglijskij/testy-ege/grammatika-3/01/test-20-po-ege-anglijskij-yazyk-otvet ...'
b'   |MSG| Scraping https://www.job-interview-site.com/what-is-your-greatest-achievement-interview-question-and-answers.html ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://bgr.com/2015/12/14/comcast-ceo-brian-roberts-interview/ ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://znanija.com/task/20070252 ...'
b'   |MSG| Found article'
b'   |MSG| Scraping https://targetjobs.co.uk/careers-advice/interview-questions/398212-what-is-your-most-significant-achievement-tricky-graduate-interview-question ...'
b'   |MSG| Found article'
b'   |MSG| Scraping http://tonail.com/%D1%87%D1%82%D0%B5%D0%BD%D0%B8%D0%B5-%D0%B2-%D1%84%D0%BE%D1%80%D0%BC%D0%B0%D1%82%D0%B5-%D0%B5%D0%B3%D1%8D-%D0%B2%D0%B0%D1%80%D0%B8%D0%B0%D0%BD%D1%82

In [24]:
import io
import os
import re
import bs4
import sys
import json
import time
import nltk
import urllib
import pprint
import random
import string
import requests
import wikipedia
import itertools
import collections

In [None]:
# Extract themes
# Items of interest, of genre, of identification
# Emotions of joy, sadness, frustrations
# Memory recall? Specific sentices or sentiments
corpus = read_json(os.path.join(os.getcwd(), 'data', 'corpus.json'))
distributions = {}

for game, sites in corpus.items():
    print(game)
    tagged_sentences = []
    for url, content in sites.items():
        for sentence in content:
            tagged_sentences += (encode_english(sentence))
    freqdist = nltk.FreqDist((word, tag) for word, tag in tagged_sentences if tag == u'ADJ')
    distributions[game] = freqdist

In [None]:
for game, dist in distributions.items():
    words = [x[0] for x, y in distributions[game].most_common(10)]
    print(u'{}: {}'.format(game, u', '.join(words)))

In [None]:
# Testing
scrape_wiki(u"Dungeon_(video_game)")

In [None]:
# Scrape for links
game_meta = read_json(os.path.join(os.getcwd(), 'data', 'game-sources.json'))

shuffled_game_meta = game_meta.items()
random.shuffle(shuffled_game_meta)
for game, meta in shuffled_game_meta:
    game_meta[game]['Links'] += scrape_duckduckgo(game, game_meta[game]['Developer'])
    game_meta[game]['Links'] = list(set(game_meta[game]['Links']))
    save_json(os.path.join(os.getcwd(), 'data', 'game-sources.json'), game_meta)
    time.sleep(2)

In [None]:
# Load content in search results
game_meta = read_json(os.path.join(os.getcwd(), 'data', 'game-sources.json'))
cached = read_json(os.path.join(os.getcwd(), 'data', 'corpus.json'))

output = cached
for game, meta in game_meta.items():
    if game not in output:
        output[game] = {}
    print(__message(game))
    for url in meta['Links']:
        if url in output[game] or url.endswith('pdf'):
            continue
        data = []
        html = scrape(url)
        if html and any(word in html.lower() for word in ['interview', 'mortem', 'review', 'history', 'develop', 'idea', 'inspir']):
            soup = bs4.BeautifulSoup(html)
            content = soup.select('div > p') + soup.select('body > p')
            data = [c.string.strip() for c in content if c.string and c.string.strip()]
            output[game][url] = data
            print(__message(u'Scrapped {}'.format(url)))
            save_json(os.path.join(os.getcwd(), 'data', 'corpus.json'), output)

In [None]:
# Locate mentions of games
game_LUT = set(read_json(os.path.join(os.getcwd(), 'data', 'games.json')))
game_meta = read_json(os.path.join(os.getcwd(), 'data', 'game-sources.json'))
game_articles = read_json(os.path.join(os.getcwd(), 'data', 'corpus.json'))
not_games = set(read_json(os.path.join(os.getcwd(), 'data', 'not-games.json')))

# Create a look up table for games
roguelike_LUT = {}
for game, meta in game_meta.items():
    roguelike_LUT[game] = game
    if 'AKA' in meta:
        for aka in meta['AKA']:
            roguelike_LUT[aka] = game

In [None]:
# Look through the interview articles
roguelike_relations = {}
other_relations = {}
for game, articles in game_articles.items():
    roguelike_relations[game] = []
    other_relations[game] = []
    counter = collections.Counter()
    for url, article in articles.items():
        # Intersection for fast search
        things = []
        current = u''
        for paragraph in article:
            for token in paragraph.split():
                if re.compile("^[A-Z0-9][\w:']*[\w:']|[A-Z\.]+$").match(token) or \
                        (current and token in ('the', 'of', 'no', 'to')):
                    current += u'{} '.format(token)
                elif current:
                    things.append(current.strip())
                    current = u''
        roguelike_things = [roguelike_LUT[s] for s in things if s in roguelike_LUT]
        if roguelike_things:
            roguelike_relations[game].extend(roguelike_things)
        other_things = [s for s in things if
                            s in game_LUT and
                            s not in not_games and
                            s not in roguelike_LUT and
                            len(s) > 1 and
                            not s.isdigit()]
        if other_things:
            other_relations[game].extend(other_things)

# print("\n### ROGUELIKES ###\n")
# pprint.pprint(roguelike_relations, indent=2)
# print("\n### OTHER GAMES ###\n")
# pprint.pprint(other_relations, indent=2)

save_json(os.path.join(os.getcwd(), 'generated', 'roguelike-relations.json'), roguelike_relations)
save_json(os.path.join(os.getcwd(), 'generated', 'other-relations.json'), other_relations)

In [None]:
# Construct influence network

roguelike_relations = read_json(os.path.join(os.getcwd(), 'generated', 'roguelike-relations.json'))
other_relations = read_json(os.path.join(os.getcwd(), 'generated', 'other-relations.json'))
games_years = read_json(os.path.join(os.getcwd(), 'generated', 'games-years.json'))

roguelike_influence = {}
for roguelike, other_roguelikes in roguelike_relations.items():
    roguelike_influence[roguelike] = []
    
    roguelike_relation_counter = collections.Counter()
    for other_roguelike in other_roguelikes:
        if other_roguelike != roguelike:
            roguelike_relation_counter[other_roguelike] += 1
            
    other_relation_counter = collections.Counter()
    for other_relation in other_relations[roguelike]:
        if other_relation != roguelike:
            other_relation_counter[other_relation] += 1
            
    for roguelike_relation in roguelike_relation_counter.most_common(5):
        roguelike_influence[roguelike].append(roguelike_relation[0])
        
    for other_relation in other_relation_counter.most_common(5):
        if other_relation[1] > 1:
            roguelike_influence[roguelike].append(other_relation[0])
            
#     print(u'{}\n{}\n{}\n'.format(roguelike, 
#                                    roguelike_relation_counter.most_common(3), 
#                                    other_relation_counter.most_common(3)))

games_set_small = set(itertools.chain(*(roguelike_relations.values()+other_relations.values())))
    
games_years_small = {game: int(year) for game, year in games_years.items() if game in games_set_small}
    
print(games_years_small)
                                                        
save_json(os.path.join(os.getcwd(), 'generated', 'relations.json'), roguelike_influence)
save_json(os.path.join(os.getcwd(), 'generated', 'games-years-small.json'), games_years_small)