# Code to Play

In [6]:
import os
import numpy as np
import pandas as pd
import getpass
import difflib
import requests
import random
import string
import wikipediaapi
import logging

from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine

from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
from langchain.memory import ConversationBufferMemory

In [9]:
log = logging.getLogger(__name__)
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
log.info('What is up?!')

INFO:__main__:What is up?!


## Funcs

In [14]:
def get_page_summary(wiki_page):
    """
    Retrieves a brief summary of a given Wikipedia page.

    This function takes a Wikipedia page object and returns the summary of the page. However, rather than 
    returning the entire summary, it returns only the first few lines. This is particularly useful for 
    getting a quick overview or introduction to the page's content without needing to process the entire 
    summary text.

    Parameters
    ----------
    wiki_page : WikipediaPage object
        A Wikipedia page object from which the summary is to be extracted. The object should have a 'summary' 
        attribute containing the text of the page's summary.

    Returns
    -------
    str
        A string containing the first few lines of the Wikipedia page's summary. The exact number of lines 
        returned is set to 5 in this implementation.
    """
    # return just the first few lines if there are multiple
    return wiki_page.summary[:100] + "..."
    # return ". ".join(wiki_page.summary.split("\n")[:1]) + "..."

def search_wiki(search_term):
    """Search common name for search term and returns most relevant Wiki Page"""
    search_url = f"https://en.wikipedia.org/w/index.php?search={'+'.join(search_term.split())}&title=Special:Search&profile=advanced&fulltext=1&ns0=1"
    soup = BeautifulSoup(requests.get(search_url, timeout=30).content, "html.parser")
    bad_prefixes = ["list of", "history of", "Template:", "Wikipedia:", "Category:", "Portal:", "Talk:", "Template talk:"]
    for result in soup.find_all("div", class_ = "mw-search-result-heading"):
        if result.a:
            if result.a['href']:
                text = result.a['href'].replace("/wiki/", "").strip()
                starts_with_bad_prefix = any(text.lower().replace("_", " ").startswith(prefix.lower()) for prefix in bad_prefixes)
                if not starts_with_bad_prefix:
                    return text

def getWikiTemplate():
    template = """You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is {current} and
you must select a new wikipedia page closer to the target topic {target1}. You can select a topic from
the following list:

{{links}}

If the target topic of {target2} is available, you should select the target topic {target3}. Otherwise, select
a next topic that is likely to be closer or fewer connections to the topic {target4}. Format your output as:
Next topic=<topic here>
"""
    return template
    

In [5]:
search_wiki('python programming langugae')

'Python_(programming_language)'

## LLM Bot

In [15]:
class WikiGameLLMBot():
    def __init__(self, wiki_wiki,
                 start_topic = None,
                 target_topic = None,
                 model_name='meta/llama3-70b-instruct',
                 temperature=0.1):

        assert start_topic != target_topic, "Please enter different start and target topics."
        
        ################################################################
        #
        #    Save some things
        #
        ################################################################
        
        self.wiki_wiki     = wiki_wiki
        self.start_topic   = start_topic
        self.target_topic  = target_topic
        self.target_page   = self.wiki_wiki.page(self.target_topic)
        self.start_page    = self.wiki_wiki.page(self.start_topic)
        self.current_topic = self.start_page
        self.current_page  = self.start_page
        self.visited       = [self.start_page]

        ################################################################
        #
        #    Load NVIDIA model and chatbot history
        #
        ################################################################

        if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
            nvidia_api_key = getpass.getpass("Enter your NVIDIA API key: ")
            assert nvidia_api_key.startswith("nvapi-"), f"{nvidia_api_key[:5]}... is not a valid key"
            os.environ["NVIDIA_API_KEY"] = nvidia_api_key
        else:
            nvidia_api_key = os.environ["NVIDIA_API_KEY"]
            
        self.llm = ChatNVIDIA(model       = model_name,
                              api_key     = nvidia_api_key,
                              temperature = temperature,
                             )
        self.memory = ConversationBufferMemory(ai_prefix="System")

    def get_filled_templates(self, alltitles, allsummaries):
        i = 0
        k = 100 # batch size
        templates, titles, summaries  = [], [], []
        while i < len(alltitles):
            titlesTemplate    = alltitles[i:i+k]
            summariesTemplate = allsummaries[i:i+k]
            titles.append(titlesTemplate)
            summaries.append(summariesTemplate)
            templates.append(self.get_filled_template(titlesTemplate, summariesTemplate))
            i += k
        return templates, titles, summaries
    
    def get_filled_template(self, titles, summaries):
        template = getWikiTemplate()
        template = template.format(target1 = self.target_topic,
                                   target2 = self.target_topic,
                                   target3 = self.target_topic,
                                   target4 = self.target_topic,
                                   current = self.current_topic,
                                  )
        link_table = """Topic\tDescription
--------\t\t\t-----------
"""
        for i in range(len(titles)):
            link_table += ("title:" + titles[i] + '\t' + summaries[i] + '\n')

        template = template.format(links = link_table)
        return template
    
    def take_turn(self):
        # Get all linked pages
        print(type(self.current_page))
        page_titles = list(self.current_page.links)

        badpages = ['Category', 'Help', 'Wikipedia', 'Portal', 'List', 'Talk', 'Template']
        badpages_lower = [bad.lower() for bad in badpages]
        page_titles = [title for title in page_titles if not any(title.lower().startswith(bad) for bad in badpages_lower)]
        
        pages = []

        print('Got all titles')
        for i, title in enumerate(page_titles):
            print(title + '\t\t' + str(i) + '/' + str(len(page_titles)))
            pages.append(self.wiki_wiki.page(title))

        # get the summaries of these pages
        titles, summaries = [], []
        page_title_lookup = {}
        for page in pages:
            titles.append(page.title)
            summaries.append(get_page_summary(page))
            page_title_lookup[page.title] = page

        templates, titleSets, summarySets = self.get_filled_templates(titles, summaries)
        bestTitles, bestSummaries = [], []
        for i, template in enumerate(templates):
            print("Template")
            log.info(template)
    
            response = self.llm.invoke(template)
            print("Response")
            print(response)
    
            print("Parsed Response")
            proposedPage = response.content.split('=')[1]
            print(proposedPage)
            
            print("Most similar page")
            most_similar = difflib.get_close_matches(proposedPage, titleSets[i], n=1)[0]
            log.info(most_similar)
            bestTitles.append(most_similar)
            bestSummaries.append(get_page_summary(page_title_lookup[most_similar]))

        # reduce it further
        if len(bestTitles) > 1:
            template = self.get_filled_template(bestTitles, bestSummaries)
            log.info(template)
            response = self.llm.invoke(template)
            
            print("Response")
            print(response)
    
            print("Parsed Response")
            proposedPage = response.content.split('=')[1]
            print(proposedPage)
            
            print("Most similar page")
            most_similar = difflib.get_close_matches(proposedPage, bestTitles, n=1)[0]
        else:
            most_similar = bestTitles[0]
            
        # reset the current page
        log.info(most_similar)
        self.current_page = page_title_lookup[most_similar]
        
        if self.target_page.fullurl == self.current_page.fullurl:
            return True

        return False


# Play Game

In [16]:
global wiki_wiki
random_string = 'XXX' # generate_random_string(10)
wiki_wiki = wikipediaapi.Wikipedia(
    f'WikiBot-{random_string} (https://www.linkedin.com/in/kmaurinjones/)',
    'en',
    timeout = 30
    )

INFO:wikipediaapi:Wikipedia: language=en, user_agent: WikiBot-XXX (https://www.linkedin.com/in/kmaurinjones/) (Wikipedia-API/0.6.0; https://github.com/martin-majlis/Wikipedia-API/), extract_format=1


In [12]:
bot = WikiGameLLMBot(wiki_wiki,
                 start_topic = 'CRISPE',
                 target_topic = 'Israel',
                 model_name='meta/llama3-70b-instruct',
                 temperature=0.1)

Enter your NVIDIA API key:  ········


In [13]:
found = False
i = 0
while not found:
    found = bot.take_turn()
    print(bot.current_page.title)
    i += 1
    if i > 5:
        break

INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Kayvan Najarian&pllimit=500


<class 'wikipediaapi.WikipediaPage'>


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Amirkabir University of Technology&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=CRC Press&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=ISBN (identifier)&explaintext=1&exsectionformat=wiki


Got all titles
Amirkabir University of Technology		0/6
CRC Press		1/6
ISBN (identifier)		2/6
Sharif University of Technology		3/6
University of British Columbia		4/6
University of Michigan		5/6


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Sharif University of Technology&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=University of British Columbia&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=University of Michigan&explaintext=1&exsectionformat=wiki
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Amirkabir University of Technology	Amirkabir University of Technology (AUT) (Persian:...
title:CRC Press	The CRC Press, LLC is an

Template


INFO:__main__:Sharif University of Technology
INFO:__main__:Sharif University of Technology
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Israel&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle


Response
content='Next topic=Sharif University of Technology' response_metadata={'role': 'assistant', 'content': 'Next topic=Sharif University of Technology', 'token_usage': {'prompt_tokens': 260, 'total_tokens': 269, 'completion_tokens': 9}, 'model_name': 'meta/llama3-70b-instruct'} id='run-3ea3ea8f-bb68-43d7-ad7a-fc304c23878c-0' role='assistant'
Parsed Response
Sharif University of Technology
Most similar page


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Sharif University of Technology&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Sharif University of Technology&pllimit=500
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1979 revolution&explaintext=1&exsectionformat=wiki


Sharif University of Technology
<class 'wikipediaapi.WikipediaPage'>
Got all titles
1979 revolution		0/143
AJA University of Medical Sciences		1/143
Abbas Anvari		2/143
Academic Ranking of World Universities		3/143
Adel Ferdosipour		4/143
Ali Akbar Salehi		5/143
Ali Daei		6/143
Ali Larijani		7/143
Ali Mohammad Ranjbar		8/143
Alireza Mehran		9/143
Allameh Tabatabaei University		10/143
Alzahra University		11/143
Amin Police University		12/143
Amirkabir University of Technology		13/143
Anonymous martyr		14/143
Azadi Square		15/143
Azadi Tower		16/143
Baqiyatallah University of Medical Sciences		17/143
Basij		18/143
CWTS Leiden Ranking		19/143
Civil Aviation Technology College		20/143
College and university rankings		21/143
Doctorate		22/143
Doi (identifier)		23/143
Elshan Moradi		24/143
Eshaq Jahangiri		25/143
Farhangian University		26/143
FarsiTeX		27/143
Fazlollah Reza		28/143
Fields Medal		29/143
Financial endowment		30/143
Geographic coordinate system		31/143
Government of Hassan Rouh

INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=AJA University of Medical Sciences&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Abbas Anvari&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Academic Ranking of World Universities&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Adel Ferdosipour&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Ali Akbar Salehi&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=Ali Daei&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/ap

Template


INFO:__main__:Hassan Rouhani
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Sayed Khatiboleslam Sadrnezhaad	Sayed Khatiboleslam Sadrnezhaad is an Iranian dist...
title:School colors	School colors, also known as university colors or ...
title:Science, technology, engineering, and mathematics	Science, technology, engineering, and mathematics ...
title:Scientia Iranica	Scientia Iranica is a peer-reviewed scientific jou...
title:Seyyed Hossein Nasr	Seyyed Hossein Nasr (; Persian: سید حسین نصر, born...
title:Shahbal	Shahbal is an unmanned aerial vehicle (UAV) design...
title:Shahed University	Shahed University (Persian: دانشگاه شاهد) is a pub...
title:Shahid Behes

Response
content='Next topic=Hassan Rouhani' response_metadata={'role': 'assistant', 'content': 'Next topic=Hassan Rouhani', 'token_usage': {'prompt_tokens': 2408, 'total_tokens': 2417, 'completion_tokens': 9}, 'model_name': 'meta/llama3-70b-instruct'} id='run-191dcb19-1c1e-4244-b4df-7c333143c589-0' role='assistant'
Parsed Response
Hassan Rouhani
Most similar page
Template


INFO:__main__:Tehran
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Hassan Rouhani	Hassan Rouhani (Persian: حسن روحانی, Standard Pers...
title:Tehran	Tehran (; Persian: تهران Tehrân [tehˈɾɒːn] ) is th...


If the target topic of Israel is available, you should select the target topic Israel. Otherwise, select
a next topic that is likely to be closer or fewer connections to the topic Israel. Format your output as:
Next topic=<topic here>



Response
content='Next topic=Tehran' response_metadata={'role': 'assistant', 'content': 'Next topic=Tehran', 'token_usage': {'prompt_tokens': 1022, 'total_tokens': 1028, 'completion_tokens': 6}, 'model_name': 'meta/llama3-70b-instruct'} id='run-fbb33032-862f-405b-8cbc-931ea5da6083-0' role='assistant'
Parsed Response
Tehran
Most similar page


INFO:__main__:Tehran
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Tehran&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle


Response
content='Next topic=Tehran' response_metadata={'role': 'assistant', 'content': 'Next topic=Tehran', 'token_usage': {'prompt_tokens': 195, 'total_tokens': 201, 'completion_tokens': 6}, 'model_name': 'meta/llama3-70b-instruct'} id='run-f872527e-d779-47c2-803e-5a6588752068-0' role='assistant'
Parsed Response
Tehran
Most similar page


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Tehran&pllimit=500
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Tehran&pllimit=500&plcontinue=57654|0|Mahmoodieh
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1908 bombardment of the Majlis&explaintext=1&exsectionformat=wiki


Tehran
<class 'wikipediaapi.WikipediaPage'>
Got all titles
1908 bombardment of the Majlis		0/879
1951 Asian Games		1/879
1954 Asian Games		2/879
1958 Asian Games		3/879
1962 Asian Games		4/879
1966 Asian Games		5/879
1968 AFC Asian Cup		6/879
1970 Asian Games		7/879
1974 Asian Games		8/879
1976 AFC Asian Cup		9/879
1978 Asian Games		10/879
1979 Revolution		11/879
1982 Asian Games		12/879
1986 Asian Games		13/879
1986 Asian Winter Games		14/879
1990 Asian Games		15/879
1990 Asian Winter Games		16/879
1994 Asian Games		17/879
1996 Asian Winter Games		18/879
1998 Asian Games		19/879
1999 Asian Winter Games		20/879
2,500-year celebration of the Persian Empire		21/879
2002 Asian Games		22/879
2003 Asian Winter Games		23/879
2006 Asian Games		24/879
2007 Asian Winter Games		25/879
2009 Iranian presidential election protests		26/879
2010 Asian Games		27/879
2011 Asian Winter Games		28/879
2014 Asian Games		29/879
2017 Asian Winter Games		30/879
2018 Asian Games		31/879
2022 Asian Games		32/87

INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1951 Asian Games&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1954 Asian Games&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1958 Asian Games&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1962 Asian Games&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1966 Asian Games&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1968 AFC Asian Cup&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=ex

Template


INFO:__main__:Achaemenid Empire
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Azadi Cinema Complex	Azadi Cinema Complex is a cineplex building locate...
title:Azadi Sport Complex	The Āzādi Sport Complex (Persian: مجموعه ورزشی آزا...
title:Azadi Square	Shahyad Square (Persian: میدان آزادی Meydāne Āzādi...
title:Azadi Stadium	The Azadi Stadium (Persian: ورزشگاه آزادی Varzešgâ...
title:Azadi Tower	The Azadi Tower (Persian: برج آزادی, Borj-e Āzādi;...
title:Azerbaijan	Azerbaijan, officially the Republic of Azerbaijan,...
title:Azerbaijan (Iran)	Azerbaijan or Azarbaijan (Persian/Azerbaijani: آذر...
title:Baghdad	Baghdad (  BAG-dad or  bəg-DAD; Arabic: بَغْدَاد, .

Response
content='Next topic=Achaemenid Empire' response_metadata={'role': 'assistant', 'content': 'Next topic=Achaemenid Empire', 'token_usage': {'prompt_tokens': 2716, 'total_tokens': 2724, 'completion_tokens': 8}, 'model_name': 'meta/llama3-70b-instruct'} id='run-db3394b7-dd43-452f-870d-febe8215653d-0' role='assistant'
Parsed Response
Achaemenid Empire
Most similar page
Template


INFO:__main__:Baku
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Culture of Tehran	The culture of Tehran concerns the arts, music, mu...
title:Cyprus	Cyprus ( ), officially the Republic of Cyprus, is ...
title:Cyprus dispute	The Cyprus problem, also known as the Cyprus confl...
title:Czech Republic	The Czech Republic, also known as Czechia, is a la...
title:Damascus	Damascus (, UK also  də-MAH-skəs; Arabic: دِمَشق, ...
title:Damavand, Iran	Damavand (Persian: دماوند) is a city in the Centra...
title:Damavand County	Damavand County (Persian: شهرستان دماوند) is in Te...
title:Damavand Street	Damavand Street is a street in central and eastern...
title:Dar ul-Fun

Response
content='Next topic=Baku' response_metadata={'role': 'assistant', 'content': 'Next topic=Baku', 'token_usage': {'prompt_tokens': 2352, 'total_tokens': 2357, 'completion_tokens': 5}, 'model_name': 'meta/llama3-70b-instruct'} id='run-5d953b53-cd23-4c18-bb8e-a2db64244ca1-0' role='assistant'
Parsed Response
Baku
Most similar page
Template


INFO:__main__:Cyprus
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Gentrification	Gentrification is the process of changing the char...
title:Geographic coordinate system	A geographic coordinate system (GCS) is a spherica...
title:Georgia (country)	Georgia (Georgian: საქართველო, romanized: sakartve...
title:Georgian-Ossetian conflict	The Georgian–Ossetian conflict is an ethno-politic...
title:Gernot Windfuhr	...
title:Gheytarieh	Qeytarieh (also spelled Gheytarieh) is a neighbour...
title:Golestan, Tehran Province	Golestan (Persian: گلستان) is a city in, and the c...
title:Golestan Palace	The Golestan Palace (Persian: کاخ گلستان, Kākh-e G...
title:Google Book

Response
content='Next topic=Cyprus' response_metadata={'role': 'assistant', 'content': 'Next topic=Cyprus', 'token_usage': {'prompt_tokens': 2343, 'total_tokens': 2349, 'completion_tokens': 6}, 'model_name': 'meta/llama3-70b-instruct'} id='run-040da943-fbb1-4ccb-82fd-a8ded597e69b-0' role='assistant'
Parsed Response
Cyprus
Most similar page
Template


INFO:__main__:Georgia (country)
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Islamic Republic of Iran Railways	The Islamic Republic of Iran Railways (abbreviated...
title:Islamic republic	The term  Islamic republic has been used in differ...
title:Israel	Israel, officially the State of Israel, is a count...
title:Istanbul	Istanbul is the largest city in Turkey, straddling...
title:Jafarabad-e Baqeraf	Jafarabad-e Baqeraf (Persian: جعفرابادباقراف, also...
title:Jafarabad-e Jangal, Tehran	شرکت خزر
Jafarabad-e Jangal (Persian: جعفرابادجنگل...
title:Jahanabad, Tehran	Jahanabad (Persian: جهان اباد, also Romanized as J...
title:Jajrood River	The Jajrud (Jājrūd) (P

Response
content='Next topic=Georgia (country)' response_metadata={'role': 'assistant', 'content': 'Next topic=Georgia (country)', 'token_usage': {'prompt_tokens': 2307, 'total_tokens': 2315, 'completion_tokens': 8}, 'model_name': 'meta/llama3-70b-instruct'} id='run-2dab4a94-48fa-409d-89cd-003b1b6f3a46-0' role='assistant'
Parsed Response
Georgia (country)
Most similar page
Template


INFO:__main__:Jerusalem
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Mehran (district)	Mehran district is a neighborhood of northern Tehr...
title:Mellat Park	Mellat Park (Persian: پارک ملت Pārk e Mellat), lit...
title:Mercer (consulting firm)	Mercer is an American consulting firm founded in 1...
title:Mesopotamian Arabic	Mesopotamian Arabic (Arabic: لهجة بلاد ما بين النه...
title:Metro Manila	Metropolitan Manila (Filipino: Kalakhang Maynila),...
title:Metropolitan Area of Bogotá	Metropolitan Area of Bogotá is the metropolitan ar...
title:Metropolitan area	A metropolitan area or metro is a region consistin...
title:Mexico City	Mexico City (Spanish: Ciudad d

Response
content='Next topic=Jerusalem' response_metadata={'role': 'assistant', 'content': 'Next topic=Jerusalem', 'token_usage': {'prompt_tokens': 2643, 'total_tokens': 2649, 'completion_tokens': 6}, 'model_name': 'meta/llama3-70b-instruct'} id='run-e73d6bf4-729a-4eb4-b001-2178e0e6d200-0' role='assistant'
Parsed Response
Jerusalem
Most similar page
Template


INFO:__main__:Middle East
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Pishva	Pishva (Persian: پيشوا) is a city in the Central D...
title:Pishva County	Pishva County (Persian: شهرستان پیشوا) is in Tehra...
title:Precipitation	In meteorology, precipitation is any product of th...
title:Precipitation (meteorology)	In meteorology, precipitation is any product of th...
title:Pretoria	Pretoria ( prih-TOR-ee-ə, pree-; Afrikaans: [prəˈt...
title:Primate city	A primate city is a city that is the largest in it...
title:Protoplast (religion)	A protoplast, from ancient Greek πρωτόπλαστος (prō...
title:Provinces of Iran	Iran is subdivided into thirty-one provinces (Per

Response
content='Next topic=Middle East' response_metadata={'role': 'assistant', 'content': 'Next topic=Middle East', 'token_usage': {'prompt_tokens': 2430, 'total_tokens': 2436, 'completion_tokens': 6}, 'model_name': 'meta/llama3-70b-instruct'} id='run-3a9c4358-d56b-4a54-b18d-030a5126b6e1-0' role='assistant'
Parsed Response
Middle East
Most similar page
Template


INFO:__main__:Ramallah
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Shermine Shahrivar	Shermine Shahrivar (Persian: شرمینه شهریور; born 1...
title:Shiraz	Shiraz (Persian: شیراز;  ; [ʃiːˈɾɒːz] ) is the fif...
title:Shokrabad, Tehran	Shokrabad (Persian: شكراباد, also Romanized as Sho...
title:Sikh	Sikhs (singular Sikh:  sik or  seek; Punjabi: ਸਿੱਖ...
title:Sinai and Gulf Corner Centre	Sinai and Gulf Corner Centre (Persian: کوی سینا و ...
title:Singapore	Singapore, officially the Republic of Singapore, i...
title:Sir Thomas Herbert, 1st Baronet	Sir Thomas Herbert, 1st Baronet (1606–1682), was a...
title:Sister city	A sister city or a twin town relationship is 

Response
content='Next topic=Ramallah' response_metadata={'role': 'assistant', 'content': 'Next topic=Ramallah', 'token_usage': {'prompt_tokens': 2571, 'total_tokens': 2577, 'completion_tokens': 6}, 'model_name': 'meta/llama3-70b-instruct'} id='run-d1dfa352-d163-45c4-8e34-5eb1c22761d7-0' role='assistant'
Parsed Response
Ramallah
Most similar page
Template


INFO:__main__:State of Palestine
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Tourism in Tehran	Tehran is one of Iran's leading tourism destinatio...
title:Treaty of Gulistan	The Treaty of Gulistan (also spelled Golestan: Rus...
title:Treaty of Turkmenchay	The Treaty of Turkmenchay (Persian: عهدنامه ترکمان...
title:Triumph of Tehran	The Triumph of Tehran (Persian: فتح تهران, romaniz...
title:Trolleybus	A trolleybus (also known as trolley bus, trolley c...
title:Trolleybuses in Tehran	The Tehran trolleybus system (Persian:سامانهٔ اتوب...
title:Tskhinvali	Tskhinvali (Georgian: ცხინვალი [ˈt͡sʰχinʷali] ) or...
title:Tughrul Tower	Toghrol Tower (Persian: برج طغر

Response
content='Next topic=State of Palestine' response_metadata={'role': 'assistant', 'content': 'Next topic=State of Palestine', 'token_usage': {'prompt_tokens': 2409, 'total_tokens': 2416, 'completion_tokens': 7}, 'model_name': 'meta/llama3-70b-instruct'} id='run-1cc8f099-9e4a-4163-94f3-bd41c607fc83-0' role='assistant'
Parsed Response
State of Palestine
Most similar page
Template


INFO:__main__:Turkey
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Achaemenid Empire	The Achaemenid Empire or Achaemenian Empire, also ...
title:Baku	Baku (US: , UK: ; Azerbaijani: Bakı [bɑˈcɯ] ) is t...
title:Cyprus	Cyprus ( ), officially the Republic of Cyprus, is ...
title:Georgia (country)	Georgia (Georgian: საქართველო, romanized: sakartve...
title:Jerusalem	Jerusalem ( jə-ROO-sə-ləm, -⁠zə-; Hebrew: יְרוּשָׁ...
title:Middle East	The Middle East (term originally coined in English...
title:Ramallah	Ramallah (UK:  rə-MAL-ə, US:  rə-MAH-lə; Arabic: ر...
title:State of Palestine	Palestine, officially the State of Palestine, is a...
title:Turkey	Turkey, offici

Response
content='Next topic=Turkey' response_metadata={'role': 'assistant', 'content': 'Next topic=Turkey', 'token_usage': {'prompt_tokens': 2026, 'total_tokens': 2031, 'completion_tokens': 5}, 'model_name': 'meta/llama3-70b-instruct'} id='run-97681016-bf55-47bc-b195-54aab816d433-0' role='assistant'
Parsed Response
Turkey
Most similar page


INFO:__main__:Jerusalem
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Jerusalem&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle


Response
content='Next topic=Jerusalem' response_metadata={'role': 'assistant', 'content': 'Next topic=Jerusalem', 'token_usage': {'prompt_tokens': 379, 'total_tokens': 385, 'completion_tokens': 6}, 'model_name': 'meta/llama3-70b-instruct'} id='run-b7f1ebd7-d51e-45d1-b65a-f149bbb9389a-0' role='assistant'
Parsed Response
Jerusalem
Most similar page


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Jerusalem&pllimit=500
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Jerusalem&pllimit=500&plcontinue=16043|0|El'ad


Jerusalem
<class 'wikipediaapi.WikipediaPage'>


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Jerusalem&pllimit=500&plcontinue=16043|0|Judeo-Tat&format=json&redirects=1
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Jerusalem&pllimit=500&plcontinue=16043|0|Prime_Minister_of_Israel&format=json&redirects=1
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Jerusalem&pllimit=500&plcontinue=16043|0|Zuhr_prayer&format=json&redirects=1
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles='Arab al-Jahalin&explaintext=1&exsectionformat=wiki


Got all titles
'Arab al-Jahalin		0/1991
1840–41 Royal Engineers maps of Palestine, Lebanon and Syria		1/1991
1860 Mount Lebanon civil war		2/1991
1920 Palestine riots		3/1991
1929 Palestine riots		4/1991
1948 Arab–Israeli War		5/1991
1948 Arab–Israeli war		6/1991
1949 Armistice Agreements		7/1991
2009 Capital of Arab Culture		8/1991
ASIN (identifier)		9/1991
AT&T		10/1991
A Peace to End All Peace: The Fall of the Ottoman Empire and the Creation of the Modern Middle East		11/1991
Aaron W. Hughes		12/1991
Aaron in Islam		13/1991
Abbasid Caliphate		14/1991
Abd al-Malik ibn Marwan		15/1991
Abdi-Heba		16/1991
Abdullah Yusuf Ali		17/1991
Abdullah el-Tell		18/1991
Abkhaz-Georgian conflict		19/1991
Abkhazia		20/1991
Abraha		21/1991
Abraham		22/1991
Abraham in Islam		23/1991
Abrahamic religions		24/1991
Abu Bakr		25/1991
Abu Dhabi		26/1991
Abu Dis		27/1991
Abu Ghosh		28/1991
Abu Jihad		29/1991
Abu Tor		30/1991
Abū Lahab		31/1991
Academy of the Hebrew Language		32/1991
Achaemenid Empire		33/1991

INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1840–41 Royal Engineers maps of Palestine, Lebanon and Syria&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1860 Mount Lebanon civil war&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1920 Palestine riots&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1929 Palestine riots&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1948 Arab–Israeli War&explaintext=1&exsectionformat=wiki
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=extracts&titles=1948 Arab–Israeli war&explaintext=1&exsectionformat=wiki
INFO:wikipedia

Template


INFO:__main__:Acre, Israel
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Ancient Rome	In modern historiography, ancient Rome encompasses...
title:Ancient towns in Saudi Arabia	Thirteen ancient towns have been discovered in Sau...
title:Angel	In Abrahamic religious traditions (such as Judaism...
title:Angels in Islam	In Islam, angels (Arabic: ملاك٬ ملك‎, romanized: m...
title:Anglican Communion	The Anglican Communion is the third largest Christ...
title:Ankara	Ankara ( ANG-kər-ə, US also  AHNG-kər-ə; Turkish: ...
title:Anna Ticho	Anna Ticho (Hebrew: אנה טיכו; 27 October 1894 – 1 ...
title:Annabel Jane Wharton	Annabel Jane Wharton (also known in print as Ann W

Response
content='Next topic=Acre, Israel' response_metadata={'role': 'assistant', 'content': 'Next topic=Acre, Israel', 'token_usage': {'prompt_tokens': 2900, 'total_tokens': 2907, 'completion_tokens': 7}, 'model_name': 'meta/llama3-70b-instruct'} id='run-708acc17-6f41-4e58-9186-c33ecc2b93ae-0' role='assistant'
Parsed Response
Acre, Israel
Most similar page
Template


INFO:__main__:Arad, Israel
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Bakkah	Bakkah (Arabic: بَكَّةُ [ˈbɛk.kɛh]), is a place me...
title:Baku	Baku (US: , UK: ; Azerbaijani: Bakı [bɑˈcɯ] ) is t...
title:Balaam	Balaam (; Hebrew: בִּלְעָם, Standard Bīlʿam Tiberi...
title:Bandar Seri Begawan	Bandar Seri Begawan (BSB; Jawi: بندر سري بڬاوان‎; ...
title:Bangkok	Bangkok, officially known in Thai as Krung Thep Ma...
title:Bangladesh	Bangladesh, officially the People's Republic of Ba...
title:Bank of Israel	The Bank of Israel (Hebrew: בנק ישראל, Arabic: بنك...
title:Banu Aws	The Banū Aws (Arabic: بنو أوس  pronounced [ˈbanuː ...
title:Banu Hashim	The Banū Hāshim (Ar

Response
content='Next topic=Arad, Israel' response_metadata={'role': 'assistant', 'content': 'Next topic=Arad, Israel', 'token_usage': {'prompt_tokens': 2779, 'total_tokens': 2787, 'completion_tokens': 8}, 'model_name': 'meta/llama3-70b-instruct'} id='run-38fac393-3d52-4882-9e07-d60e5bd7bdc7-0' role='assistant'
Parsed Response
Arad, Israel
Most similar page
Template


INFO:__main__:Bank of Israel
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:British Indian Ocean Territory	The British Indian Ocean Territory (BIOT) is an Ov...
title:British Mandate-era Jerusalem	During its long history, Jerusalem has been attack...
title:British Mandate of Mesopotamia	The Mandate for Mesopotamia (Arabic: الانتداب على ...
title:British School of Archaeology in Jerusalem	The Kenyon Institute, previously known as the Brit...
title:Broad Wall (Jerusalem)	The Broad Wall (Hebrew: החומה הרחבה HaChoma HaRech...
title:Bronze Age	The Bronze Age was a historical period lasting fro...
title:Brunei	Brunei, officially Brunei Darussalam and the State...
t

Response
content='Next topic=Bank of Israel' response_metadata={'role': 'assistant', 'content': 'Next topic=Bank of Israel', 'token_usage': {'prompt_tokens': 2999, 'total_tokens': 3006, 'completion_tokens': 7}, 'model_name': 'meta/llama3-70b-instruct'} id='run-ec466dc2-d2b7-4a51-9d75-b2267c52c62c-0' role='assistant'
Parsed Response
Bank of Israel
Most similar page
Template


INFO:__main__:Cabinet of Israel
INFO:__main__:You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic	Description
--------			-----------
title:Coptic Orthodox Church	The Coptic Orthodox Church (Coptic: Ϯⲉⲕ̀ⲕⲗⲏⲥⲓⲁ ⲛ̀ⲣ...
title:Copts	Copts (Coptic: ⲛⲓⲣⲉⲙⲛ̀ⲭⲏⲙⲓ, romanized: niremənkhēm...
title:Corpus separatum (Jerusalem)	Corpus separatum (Latin for "separated body") was ...
title:Costa Rica	Costa Rica (UK: , US:  ; Spanish: [ˈkosta ˈrika]; ...
title:Courtyard neighborhood	Courtyard neighborhoods are Jewish neighborhoods b...
title:Crimean Karaites	The Crimean Karaites or Krymkaraylar (Crimean Kara...
title:Crucifixion	Crucifixion is a method of capital punishment in w...
title:Crucifixion of Jesus	The crucifixion of Jesus o

Response
content='Next topic=Cabinet of Israel' response_metadata={'role': 'assistant', 'content': 'Next topic=Cabinet of Israel', 'token_usage': {'prompt_tokens': 2589, 'total_tokens': 2596, 'completion_tokens': 7}, 'model_name': 'meta/llama3-70b-instruct'} id='run-3691d746-4caa-4df3-82f5-936132841a61-0' role='assistant'
Parsed Response
Cabinet of Israel
Most similar page
Template


INFO:__main__:Declaration of Independence (Israel)


Response
content='Next topic=Declaration of Independence (Israel)' response_metadata={'role': 'assistant', 'content': 'Next topic=Declaration of Independence (Israel)', 'token_usage': {'prompt_tokens': 2792, 'total_tokens': 2802, 'completion_tokens': 10}, 'model_name': 'meta/llama3-70b-instruct'} id='run-9391541f-66db-46e7-96fb-f8bd575a0746-0' role='assistant'
Parsed Response
Declaration of Independence (Israel)
Most similar page


KeyError: 'Declaration of Independence (Israel)'

In [None]:
def play_game(self, verbose = True):
    # turn number
    turn_num = 0

    # first 'current' topic is starting topic
    current_topic = self.start_topic

    # to prevent duplicates
    visited = set()

    # keep playing until target is reached
    while True:

        # for turn time tracking
        turn_start = time.time()

        # find most similar topic on current page to target topic
        visited.add(current_topic)
        next_topic = self.take_turn(current_topic, list(visited))

        # for turn time tracking
        turn_time = time.time() - turn_start

        self.log_turn(
            {
                'starting_topic': self.start_topic,
                'target_topic': self.start_topic,
                'turn': turn_num,             
                'current_topic': current_topic,
                'current_summary': self.current_summary
            }
        )

        if verbose:
            printouts = [
                "-" * 50,
                f"Turn: {turn_num}",
                f"Start topic: {self.start_topic.replace('_', ' ')}",
                f"Current topic: {current_topic.replace('_', ' ')}",
                f"Next topic: {next_topic.replace('_', ' ')}",
                f"Target topic: {self.target_topic.replace('_', ' ')}",
            ]

            self.printouts.append(printouts)

            # print progress
            for i in self.printouts[-1]:
                print(i)

        # else, set new next_topic to current topic and loop
        current_topic = next_topic

        # increment turn
        turn_num += 1

In [8]:
page = wiki_wiki.page('Python_(programming_language)')
print(page.title)
# Python (programming language)


Python_(programming_language)


In [11]:
help(wiki_wiki.page)

Help on method page in module wikipediaapi:

page(title: str, ns: Union[wikipediaapi.Namespace, int] = <Namespace.MAIN: 0>, unquote: bool = False) -> 'WikipediaPage' method of wikipediaapi.Wikipedia instance
    Constructs Wikipedia page with title `title`.
    
    Creating `WikipediaPage` object is always the first step for extracting
    any information.
    
    Example::
    
        wiki_wiki = wikipediaapi.Wikipedia('en')
        page_py = wiki_wiki.page('Python_(programming_language)')
        print(page_py.title)
        # Python (programming language)
    
        wiki_hi = wikipediaapi.Wikipedia('hi')
    
        page_hi_py = wiki_hi.article(
            title='%E0%A4%AA%E0%A4%BE%E0%A4%87%E0%A4%A5%E0%A4%A8',
            unquote=True,
        )
        print(page_hi_py.title)
        # पाइथन
    
    :param title: page title as used in Wikipedia URL
    :param ns: :class:`WikiNamespace`
    :param unquote: if true it will unquote title
    :return: object representing :class

In [10]:
?wiki_wiki.page

[0;31mSignature:[0m
[0mwiki_wiki[0m[0;34m.[0m[0mpage[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtitle[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mns[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mwikipediaapi[0m[0;34m.[0m[0mNamespace[0m[0;34m,[0m [0mint[0m[0;34m][0m [0;34m=[0m [0;34m<[0m[0mNamespace[0m[0;34m.[0m[0mMAIN[0m[0;34m:[0m [0;36m0[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0munquote[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'WikipediaPage'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Constructs Wikipedia page with title `title`.

Creating `WikipediaPage` object is always the first step for extracting
any information.

Example::

    wiki_wiki = wikipediaapi.Wikipedia('en')
    page_py = wiki_wiki.page('Python_(programming_language)')
    print(page_py.title)
    # Python (programming language)

    wiki_hi = wikipedi

In [9]:
page.links

{'"Hello, World!" program': "Hello, World!" program (id: ??, ns: 0),
 '3ds Max': 3ds Max (id: ??, ns: 0),
 '?:': ?: (id: ??, ns: 0),
 'ABC (programming language)': ABC (programming language) (id: ??, ns: 0),
 'ADMB': ADMB (id: ??, ns: 0),
 'ALGOL': ALGOL (id: ??, ns: 0),
 'ALGOL 68': ALGOL 68 (id: ??, ns: 0),
 'APL (programming language)': APL (programming language) (id: ??, ns: 0),
 'ATmega': ATmega (id: ??, ns: 0),
 'AVR microcontrollers': AVR microcontrollers (id: ??, ns: 0),
 'Abaqus': Abaqus (id: ??, ns: 0),
 'Academic Free License': Academic Free License (id: ??, ns: 0),
 'Academic conference': Academic conference (id: ??, ns: 0),
 'Action selection': Action selection (id: ??, ns: 0),
 'Activation function': Activation function (id: ??, ns: 0),
 'Ada (programming language)': Ada (programming language) (id: ??, ns: 0),
 'Advanced Simulation Library': Advanced Simulation Library (id: ??, ns: 0),
 'Adversarial machine learning': Adversarial machine learning (id: ??, ns: 0),
 'AlexNe

In [14]:
links = list(page.links.keys())
links

['"Hello, World!" program',
 '3ds Max',
 '?:',
 'ABC (programming language)',
 'ADMB',
 'ALGOL',
 'ALGOL 68',
 'APL (programming language)',
 'ATmega',
 'AVR microcontrollers',
 'Abaqus',
 'Academic Free License',
 'Academic conference',
 'Action selection',
 'Activation function',
 'Ada (programming language)',
 'Advanced Simulation Library',
 'Adversarial machine learning',
 'AlexNet',
 'Alex Graves (computer scientist)',
 'Alex Martelli',
 'Algebra',
 'AlphaFold',
 'AlphaGo',
 'AlphaZero',
 'Alternative terms for free software',
 'Amazon (company)',
 'AmigaOS 4',
 'Amoeba (operating system)',
 'Anaconda (installer)',
 'Analyse-it',
 'Andrew Ng',
 'Android (operating system)',
 'Anonymous function',
 'Anthropic',
 'Apache Groovy',
 'Apache License',
 'Apache webserver',
 'Aphorism',
 'Apple M1',
 'Apple Public Source License',
 'ArXiv (identifier)',
 'Arbitrary-precision arithmetic',
 'ArcGIS',
 'Arithmetic operations',
 'ArkTS',
 'Array index',
 'Array slicing',
 'Artificial intelli

In [33]:
?page

[0;31mType:[0m        WikipediaPage
[0;31mString form:[0m Run-D.M.C. (album) (id: 2431325, ns: 0)
[0;31mFile:[0m        ~/.local/lib/python3.11/site-packages/wikipediaapi/__init__.py
[0;31mDocstring:[0m  
Represents Wikipedia page.

Except properties mentioned as part of documentation, there are also
these properties available:

* `fullurl` - full URL of the page
* `canonicalurl` - canonical URL of the page
* `pageid` - id of the current page
* `displaytitle` - title of the page to display
* `talkid` - id of the page with discussion

In [42]:
page

Run-D.M.C. (album) (id: 2431325, ns: 0)

In [34]:
page.summary

'Run-D.M.C. is the debut studio album by American hip hop group Run-D.M.C., released on March 27, 1984, by Profile Records, and re-issued by Arista Records. The album was primarily produced by Russell Simmons and Larry Smith.\nThe album was considered groundbreaking for its time, presenting a tougher, more hardcore form of rap. The album\'s sparse beats and aggressive rhymes were in sharp contrast with the light, party-oriented sound that was popular in contemporary hip hop. With the album, Run-D.M.C. came to be regarded by music critics as pioneering the movement of new-school hip hop of the mid-1980s. Five singles were released in support of it: "It\'s Like That", "Hard Times", "Rock Box", "30 Days" and "Hollis Crew". The first single from the album, "It\'s Like That", released on August 10, 1983, expanded lyrical boundaries in rap with its tone of social protest (unemployment, inflation). "It\'s Like That" is considered by many to be the first hardcore rap song, and the first new-sc

In [29]:
?wiki_wiki.page

[0;31mSignature:[0m
[0mwiki_wiki[0m[0;34m.[0m[0mpage[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtitle[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mns[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mwikipediaapi[0m[0;34m.[0m[0mNamespace[0m[0;34m,[0m [0mint[0m[0;34m][0m [0;34m=[0m [0;34m<[0m[0mNamespace[0m[0;34m.[0m[0mMAIN[0m[0;34m:[0m [0;36m0[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0munquote[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'WikipediaPage'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Constructs Wikipedia page with title `title`.

Creating `WikipediaPage` object is always the first step for extracting
any information.

Example::

    wiki_wiki = wikipediaapi.Wikipedia('en')
    page_py = wiki_wiki.page('Python_(programming_language)')
    print(page_py.title)
    # Python (programming language)

    wiki_hi = wikipedi

In [13]:
source = get_random_wiki_page(wiki_wiki)
target = get_random_wiki_page(wiki_wiki)
print(f'source={source}')
print(f'target={target}')


source=Run-D.M.C._(album)
target=Marculescu


In [15]:
page = wiki_wiki.page(source)

In [20]:
page.fullurl

'https://en.wikipedia.org/wiki/Run-D.M.C._(album)'

In [23]:
page.canonicalurl

'https://en.wikipedia.org/wiki/Run-D.M.C._(album)'

In [28]:
page.links

{'AMC (TV channel)': AMC (TV channel) (id: ??, ns: 0),
 'Adobe Flash': Adobe Flash (id: ??, ns: 0),
 'African American culture': African American culture (id: ??, ns: 0),
 'Album': Album (id: ??, ns: 0),
 'AllMusic': AllMusic (id: ??, ns: 0),
 'Arista Records': Arista Records (id: ??, ns: 0),
 'Back from Hell': Back from Hell (id: ??, ns: 0),
 'Beats Per Minute (website)': Beats Per Minute (website) (id: ??, ns: 0),
 'Billboard (magazine)': Billboard (magazine) (id: ??, ns: 0),
 'Billboard 200': Billboard 200 (id: ??, ns: 0),
 'Can You Rock It Like This': Can You Rock It Like This (id: ??, ns: 0),
 'Checks Thugs and Rock n Roll': Checks Thugs and Rock n Roll (id: ??, ns: 0),
 'Chicago Tribune': Chicago Tribune (id: ??, ns: 0),
 'Christian Hoard': Christian Hoard (id: ??, ns: 0),
 'Christmas in Hollis': Christmas in Hollis (id: ??, ns: 0),
 'Complex (magazine)': Complex (magazine) (id: ??, ns: 0),
 'Crown Royal (album)': Crown Royal (album) (id: ??, ns: 0),
 'Danceteria': Danceteria (id

In [25]:
validate_pages(page)

['AMC (TV channel)',
 'Adobe Flash',
 'African American culture',
 'Album',
 'AllMusic',
 'Arista Records',
 'Back from Hell',
 'Beats Per Minute (website)',
 'Billboard (magazine)',
 'Billboard 200',
 'Can You Rock It Like This',
 'Checks Thugs and Rock n Roll',
 'Chicago Tribune',
 'Christian Hoard',
 'Christmas in Hollis',
 'Complex (magazine)',
 'Crown Royal (album)',
 'Danceteria',
 'Darryl McDaniels',
 'David Toop',
 'Def Jam Recordings',
 'Diggy Simmons',
 'Disco',
 'Disco Fever',
 'Discogs',
 'Diss (music)',
 'Distortion (Joseph Simmons album)',
 'Down with the King (album)',
 'Down with the King (song)',
 'Drum machine',
 'East Coast hip hop',
 'Eddie Martinez (musician)',
 'Eric Weisbard',
 'Faces (Run-D.M.C. song)',
 'Gold certification',
 'Graffiti Rock',
 'Grandmaster Flash and the Furious Five',
 'Greatest Hits (Run-D.M.C. album)',
 'Greene St. Recording',
 'Greg Kot',
 'Greg Tate',
 'Hard Times (Run-D.M.C. song)',
 'Hardcore hip hop',
 'Hardcore rap',
 'High Profile: The

In [24]:
page.links

{'AMC (TV channel)': AMC (TV channel) (id: ??, ns: 0),
 'Adobe Flash': Adobe Flash (id: ??, ns: 0),
 'African American culture': African American culture (id: ??, ns: 0),
 'Album': Album (id: ??, ns: 0),
 'AllMusic': AllMusic (id: ??, ns: 0),
 'Arista Records': Arista Records (id: ??, ns: 0),
 'Back from Hell': Back from Hell (id: ??, ns: 0),
 'Beats Per Minute (website)': Beats Per Minute (website) (id: ??, ns: 0),
 'Billboard (magazine)': Billboard (magazine) (id: ??, ns: 0),
 'Billboard 200': Billboard 200 (id: ??, ns: 0),
 'Can You Rock It Like This': Can You Rock It Like This (id: ??, ns: 0),
 'Checks Thugs and Rock n Roll': Checks Thugs and Rock n Roll (id: ??, ns: 0),
 'Chicago Tribune': Chicago Tribune (id: ??, ns: 0),
 'Christian Hoard': Christian Hoard (id: ??, ns: 0),
 'Christmas in Hollis': Christmas in Hollis (id: ??, ns: 0),
 'Complex (magazine)': Complex (magazine) (id: ??, ns: 0),
 'Crown Royal (album)': Crown Royal (album) (id: ??, ns: 0),
 'Danceteria': Danceteria (id

In [17]:
?page

[0;31mType:[0m        WikipediaPage
[0;31mString form:[0m Run-D.M.C._(album) (id: ??, ns: 0)
[0;31mFile:[0m        ~/.local/lib/python3.11/site-packages/wikipediaapi/__init__.py
[0;31mDocstring:[0m  
Represents Wikipedia page.

Except properties mentioned as part of documentation, there are also
these properties available:

* `fullurl` - full URL of the page
* `canonicalurl` - canonical URL of the page
* `pageid` - id of the current page
* `displaytitle` - title of the page to display
* `talkid` - id of the page with discussion

In [14]:
?wiki_wiki.page

[0;31mSignature:[0m
[0mwiki_wiki[0m[0;34m.[0m[0mpage[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtitle[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mns[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mwikipediaapi[0m[0;34m.[0m[0mNamespace[0m[0;34m,[0m [0mint[0m[0;34m][0m [0;34m=[0m [0;34m<[0m[0mNamespace[0m[0;34m.[0m[0mMAIN[0m[0;34m:[0m [0;36m0[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0munquote[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'WikipediaPage'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Constructs Wikipedia page with title `title`.

Creating `WikipediaPage` object is always the first step for extracting
any information.

Example::

    wiki_wiki = wikipediaapi.Wikipedia('en')
    page_py = wiki_wiki.page('Python_(programming_language)')
    print(page_py.title)
    # Python (programming language)

    wiki_hi = wikipedi

# Scratch

In [None]:
def get_linked_page_summary(topic):
    """
    Get the summary of a linked page
    """
    page = wiki_wiki.page(topic)
    summary = get_page_summary(current_page)
    return summary

def validate_pages(wiki_page):
    """
    Filters and validates the linked pages from a given Wikipedia page.

    This function takes a Wikipedia page object and extracts all the links (or references to other Wikipedia 
    pages) from it. It then filters out unwanted links based on predefined criteria, such as links with 
    certain prefixes (like "Template:", "Wikipedia:", etc.) and links that do not contain any alphabetical 
    characters. The purpose is to retain only relevant and potentially useful page links for further processing.

    Parameters
    ----------
    wiki_page : WikipediaPage object
        A Wikipedia page object from which the links are to be extracted and validated. The object is 
        expected to have a 'links' attribute containing a dictionary of linked page titles.

    Returns
    -------
    list
        A list of validated linked page titles. The titles in this list do not include any of the unwanted 
        prefixes and contain at least one alphabetical character.
    """
    # get all links
    links = list(wiki_page.links.keys())

    # various unwanted prefixes
    bad_prefixes = ["list of", "history of", "Template:", "Wikipedia:", "Category:", "Portal:", "Talk:", "Template talk:"]
    links = [link for link in links 
        if not any(link.lower().startswith(prefix.lower()) for prefix in bad_prefixes) 
        and any(char.isalpha() for char in link)
    ]
    return links

In [None]:
def generate_random_string(length):
    characters = string.ascii_letters + string.digits
    random_string = ''.join(random.choice(characters) for _ in range(length))
    return random_string

In [None]:
def get_random_wiki_page(wiki_wiki):
    """
    Selects a random Wikipedia page that meets certain validity criteria.

    This function repeatedly requests random Wikipedia pages until it finds one that satisfies specific 
    criteria: the title should not start with certain prefixes (like "Template:", "List of", etc.), should 
    not contain certain unwanted characters, and must contain at least one alphabetical character. The 
    function also checks if the page has a reasonable summary (at least 20 words) before accepting it.

    Returns
    -------
    str
        The title of a valid random Wikipedia page.
    """
    wiki_title = None
    while True:
        url = "https://en.wikipedia.org/wiki/Special:Random"
        response = requests.get(url, timeout = 30, allow_redirects = True)
        final_url = response.url
        wiki_title = final_url.split("wiki/")[-1]
        is_valid_title = True

        # various unwanted prefixes
        bad_prefixes = ["list of", "history of", "Template:", "Wikipedia:", "Category:", "Portal:", "Talk:", "Template talk:"]

        # check for unwanted chars
        for char in "[]{}:%":
            if char in wiki_title:
                is_valid_title = False
        
        # validation criteria
        starts_with_bad_prefix = any(wiki_title.lower().startswith(prefix.lower()) for prefix in bad_prefixes)
        contains_alpha = any(char.isalpha() for char in wiki_title)
        is_valid_title = not starts_with_bad_prefix and contains_alpha

        if is_valid_title:

            # check if a reasonable page summary is present (at least 20 words)
            summary = get_page_summary(wiki_wiki.page(wiki_title))
            if len(summary.split()) > 20:
                break

    return wiki_title