# Code to Play

In [6]:
import os
import numpy as np
import pandas as pd
import getpass
import difflib
import requests
import random
import string
import wikipediaapi
import logging

from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine

from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
from langchain.memory import ConversationBufferMemory

In [9]:
log = logging.getLogger(__name__)
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
log.info('What is up?!')

INFO:__main__:What is up?!


## Funcs

In [82]:
def get_page_summary(wiki_page):
    """
    Retrieves a brief summary of a given Wikipedia page.

    This function takes a Wikipedia page object and returns the summary of the page. However, rather than 
    returning the entire summary, it returns only the first few lines. This is particularly useful for 
    getting a quick overview or introduction to the page's content without needing to process the entire 
    summary text.

    Parameters
    ----------
    wiki_page : WikipediaPage object
        A Wikipedia page object from which the summary is to be extracted. The object should have a 'summary' 
        attribute containing the text of the page's summary.

    Returns
    -------
    str
        A string containing the first few lines of the Wikipedia page's summary. The exact number of lines 
        returned is set to 5 in this implementation.
    """
    # return just the first few lines if there are multiple
    return wiki_page.summary[:100] + "..."
    # return ". ".join(wiki_page.summary.split("\n")[:1]) + "..."

def search_wiki(search_term):
    """Search common name for search term and returns most relevant Wiki Page"""
    search_url = f"https://en.wikipedia.org/w/index.php?search={'+'.join(search_term.split())}&title=Special:Search&profile=advanced&fulltext=1&ns0=1"
    soup = BeautifulSoup(requests.get(search_url, timeout=30).content, "html.parser")
    bad_prefixes = ["list of", "history of", "Template:", "Wikipedia:", "Category:", "Portal:", "Talk:", "Template talk:"]
    for result in soup.find_all("div", class_ = "mw-search-result-heading"):
        if result.a:
            if result.a['href']:
                text = result.a['href'].replace("/wiki/", "").strip()
                starts_with_bad_prefix = any(text.lower().replace("_", " ").startswith(prefix.lower()) for prefix in bad_prefixes)
                if not starts_with_bad_prefix:
                    return text

def getWikiTemplate():
    template = """You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is {current} and
you must select a new wikipedia page closer to the target topic {target1}. You can select a topic from
the following list:

{{links}}

If the target topic of {target2} is available, you should select the target topic {target3}. Otherwise, select
a next topic that is likely to be closer or fewer connections to the topic {target4}. If no topics appear relevant
you must still choose a topic to do next. You cannot say None. You have already visited the following pages,
which should be avoided in the future:

{visited}

Format your output as:
Next topic=<topic here>
"""
    return template
    

In [5]:
search_wiki('python programming langugae')

'Python_(programming_language)'

## LLM Bot

In [97]:
class WikiGameLLMBot():
    def __init__(self, wiki_wiki,
                 start_topic = None,
                 target_topic = None,
                 model_name='meta/llama3-70b-instruct',
                 temperature=0.1,
                 rag=True
                ):

        assert start_topic != target_topic, "Please enter different start and target topics."
        
        ################################################################
        #
        #    Save some things
        #
        ################################################################
        
        self.wiki_wiki     = wiki_wiki
        self.start_topic   = start_topic
        self.target_topic  = target_topic
        self.target_page   = self.wiki_wiki.page(self.target_topic)
        self.start_page    = self.wiki_wiki.page(self.start_topic)
        self.current_topic = self.start_page
        self.current_page  = self.start_page
        self.visited       = [self.start_page]
        self.rag           = rag

        ################################################################
        #
        #    Load NVIDIA model and chatbot history
        #
        ################################################################

        if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
            nvidia_api_key = getpass.getpass("Enter your NVIDIA API key: ")
            assert nvidia_api_key.startswith("nvapi-"), f"{nvidia_api_key[:5]}... is not a valid key"
            os.environ["NVIDIA_API_KEY"] = nvidia_api_key
        else:
            nvidia_api_key = os.environ["NVIDIA_API_KEY"]
            
        self.llm = ChatNVIDIA(model       = model_name,
                              api_key     = nvidia_api_key,
                              temperature = temperature,
                             )
        self.memory = ConversationBufferMemory(ai_prefix="System")

    def get_filled_templates(self, alltitles, allsummaries):
        i = 0
        k = 100 # batch size
        templates, titles, summaries  = [], [], []
        while i < len(alltitles):
            titlesTemplate    = alltitles[i:i+k]
            titles.append(titlesTemplate)
            if self.rag:
                summariesTemplate = allsummaries[i:i+k]
                summaries.append(summariesTemplate)
                templates.append(self.get_filled_template(titlesTemplate, summaries=summariesTemplate))
            else:
                templates.append(self.get_filled_template(titlesTemplate))
            i += k
        return templates, titles, summaries
    
    def get_filled_template(self, titles, summaries=None):
        template = getWikiTemplate()
        visitedPages = ""
        for pg in self.visited:
            visitedPages += '- ' + str(pg.title) + '\n'
        template = template.format(target1 = self.target_topic,
                                   target2 = self.target_topic,
                                   target3 = self.target_topic,
                                   target4 = self.target_topic,
                                   current = self.current_topic,
                                   visited = visitedPages
                                  )
        if self.rag:
            link_table = """Topic\tDescription
--------\t-----------
"""
            for i in range(len(titles)):
                link_table += ("title:" + titles[i] + '\t' + summaries[i] + '\n')
        else:
            link_table = """Topic
----------
"""
            for i in range(len(titles)):
                link_table += ("title:" + titles[i] + '\n')

        template = template.format(links = link_table)
        return template
    
    def take_turn(self):
        # Get all linked pages
        print(type(self.current_page))
        page_titles = list(self.current_page.links)

        badpages = ['Category', 'Help', 'Wikipedia', 'Portal', 'List', 'Talk', 'Template']
        badpages_lower = [bad.lower() for bad in badpages]
        page_titles = [title for title in page_titles if not any(title.lower().startswith(bad) for bad in badpages_lower)]
        
        pages = []
        if self.rag:
            print('Got all titles')
            for i, title in enumerate(page_titles):
                print(title + '\t\t' + str(i) + '/' + str(len(page_titles)))
                pages.append(self.wiki_wiki.page(title))
    
            # get the summaries of these pages
            titles, summaries = [], []
            page_title_lookup = {}
            for page in pages:
                titles.append(page.title)
                summaries.append(get_page_summary(page))
                page_title_lookup[titles[-1]] = page
        else:
            titles = page_titles
            summaries = []
            page_title_lookup = []

        print(f'len(titles)={len(titles)}')
        print(f'len(summaries)={len(summaries)}')
        if self.rag:
            print(f'len(page_title_lookup.keys())={len(page_title_lookup.keys())}')

        if self.rag:
            print('page_title_lookup.keys()')
            print(list(page_title_lookup.keys()))

        templates, titleSets, summarySets = self.get_filled_templates(titles, summaries)
        bestTitles, bestSummaries = [], []
        for i in range(len(templates)):
            template = templates[i]

#            print('\n\n\n')
            print("Template")
            print(template)
#            print('\n\n\n')            
            print('Title Set')
            print(titleSets)
#            print('\n\n\n')

            response = self.llm.invoke(template)
            print("Response")
            print(response)
#            print('\n\n\n')
            print("Parsed Response")
            proposedPage = response.content.split('=')[1]
            print(proposedPage)
#            print('\n\n\n')

            print("Most similar page")
            most_similar = difflib.get_close_matches(proposedPage, titleSets[i], n=1)[0]
            print(most_similar)
#            print('\n\n\n')
            print('\n\n\n')

            bestTitles.append(most_similar)
            if self.rag:
                bestSummaries.append(get_page_summary(page_title_lookup[most_similar]))

        # reduce it further
        if len(bestTitles) > 1:
            template = self.get_filled_template(bestTitles, bestSummaries)
            print('Map Reduce Template')
            print(template)
            response = self.llm.invoke(template)

            print("Response")
            print(response)

            print("Parsed Response")
            proposedPage = response.content.split('=')[1]
            print(proposedPage)
            
            print("Most similar page")
            most_similar = difflib.get_close_matches(proposedPage, bestTitles, n=1)[0]
        else:
            most_similar = bestTitles[0]
            
        # reset the current page
        print('Selected Page')
        print(most_similar)
        if self.rag:
            self.current_page = page_title_lookup[most_similar]
        else:
            self.current_page = self.wiki_wiki.page(most_similar)

        self.visited.append(self.current_page)
        
        if self.target_page.fullurl == self.current_page.fullurl:
            return True

        return False


# Play Game

In [98]:
global wiki_wiki
random_string = 'XXX' # generate_random_string(10)
wiki_wiki = wikipediaapi.Wikipedia(
    f'WikiBot-{random_string} (https://www.linkedin.com/in/kmaurinjones/)',
    'en',
    timeout = 30
    )

INFO:wikipediaapi:Wikipedia: language=en, user_agent: WikiBot-XXX (https://www.linkedin.com/in/kmaurinjones/) (Wikipedia-API/0.6.0; https://github.com/martin-majlis/Wikipedia-API/), extract_format=1


In [99]:
bot = WikiGameLLMBot(wiki_wiki,
                 start_topic = 'Henry Ford',
                 target_topic = 'Joe Biden',
                 model_name='meta/llama3-70b-instruct',
                 temperature=0.1,
                 rag=False)

In [100]:
found = False
i = 0
while not found:
    found = bot.take_turn()
    print(bot.current_page.title)
    i += 1
    if i > 5:
        break

INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Henry Ford&pllimit=500
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Henry Ford&pllimit=500&plcontinue=13371|0|Mother_Teresa


<class 'wikipediaapi.WikipediaPage'>
len(titles)=781
len(summaries)=0
Template
You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Henry Ford (id: 13371, ns: 0) and
you must select a new wikipedia page closer to the target topic Joe Biden. You can select a topic from
the following list:

Topic
----------
title:/pol/
title:17-4 stainless steel
title:1918 United States Senate election in Michigan
title:1988 Hamas charter
title:20th Century Press Archives
title:440C
title:8chan
title:A. C. Cuza
title:ARP4754
title:ARP4761
title:AS9000
title:AS9100
title:Aaron Sapiro
title:Adolf Eichmann
title:Adolf Hitler
title:Akio Morita
title:Alan Turing
title:Albert Einstein
title:Alclad
title:Aldous Huxley
title:Alexander Fleming
title:Alexander Winton
title:Alexander Y. Malcomson
title:Alfred Ford
title:Alfred Sloan
title:Allan Nevins
title:Alloy steel
title:Alt-right
title:Amadeo Giannini
title:A

INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Joe Biden&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle


Response
content='Next topic=Democratic Party (United States)' response_metadata={'role': 'assistant', 'content': 'Next topic=Democratic Party (United States)', 'token_usage': {'prompt_tokens': 230, 'total_tokens': 240, 'completion_tokens': 10}, 'model_name': 'meta/llama3-70b-instruct'} id='run-ffa0040a-2dfc-4b3d-9f0b-abcc1e796489-0' role='assistant'
Parsed Response
Democratic Party (United States)
Most similar page
Selected Page
Democratic Party (United States)


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Democratic Party (United States)&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Democratic Party (United States)&pllimit=500
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Democratic Party (United States)&pllimit=500&plcontinue=5043544|0|Equal_opportunity_employment


Democratic Party (United States)
<class 'wikipediaapi.WikipediaPage'>


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Democratic Party (United States)&pllimit=500&plcontinue=5043544|0|Nancy_Pelosi&format=json&redirects=1
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Democratic Party (United States)&pllimit=500&plcontinue=5043544|0|United_States_Senate_Democratic_Steering_and_Outreach_Committee&format=json&redirects=1


len(titles)=1582
len(summaries)=0
Template
You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Henry Ford (id: 13371, ns: 0) and
you must select a new wikipedia page closer to the target topic Joe Biden. You can select a topic from
the following list:

Topic
----------
title:104th United States Congress
title:110th United States Congress
title:113th Congress
title:117th Congress
title:1821 United States Senate election in New York
title:1828 New York gubernatorial election
title:1828 United States presidential election
title:1832 Democratic National Convention
title:1832 United States presidential election
title:1835 Democratic National Convention
title:1836 United States presidential election
title:1840 Democratic National Convention
title:1840 United States presidential election
title:1844 Democratic National Convention
title:1844 United States presidential election
title:1848 Demo

INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Barack Obama&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle


Response
content='Next topic=Barack Obama' response_metadata={'role': 'assistant', 'content': 'Next topic=Barack Obama', 'token_usage': {'prompt_tokens': 307, 'total_tokens': 314, 'completion_tokens': 7}, 'model_name': 'meta/llama3-70b-instruct'} id='run-b7b83e4f-b3a8-438e-97db-90f5342982f6-0' role='assistant'
Parsed Response
Barack Obama
Most similar page
Selected Page
Barack Obama


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Barack Obama&pllimit=500
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Barack Obama&pllimit=500&plcontinue=534366|0|Black_players_in_professional_American_football


Barack Obama
<class 'wikipediaapi.WikipediaPage'>


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Barack Obama&pllimit=500&plcontinue=534366|0|George_Floyd_protests&format=json&redirects=1
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Barack Obama&pllimit=500&plcontinue=534366|0|List_of_Democratic_Party_presidential_primaries&format=json&redirects=1
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Barack Obama&pllimit=500&plcontinue=534366|0|Regulation_of_greenhouse_gases_under_the_Clean_Air_Act&format=json&redirects=1
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Barack Obama&pllimit=500&plcontinue=534366|0|West_Wing_Week&format=json&redirects=1


len(titles)=2503
len(summaries)=0
Template
You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Henry Ford (id: 13371, ns: 0) and
you must select a new wikipedia page closer to the target topic Joe Biden. You can select a topic from
the following list:

Topic
----------
title:109th United States Congress
title:110th United States Congress
title:14th Dalai Lama
title:1828 United States presidential election
title:1832 Democratic National Convention
title:1835 Democratic National Convention
title:1840 Democratic National Convention
title:1844 Democratic National Convention
title:1848 Democratic National Convention
title:1852 Democratic National Convention
title:1856 Democratic National Convention
title:1860 Democratic National Conventions
title:1864 Democratic National Convention
title:1868 Democratic National Convention
title:1872 Democratic National Convention
title:1876 Democratic Na

INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Barack Obama 2008 presidential campaign&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle
INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Barack Obama 2008 presidential campaign&pllimit=500


Response
content='Next topic=Barack Obama 2008 presidential campaign' response_metadata={'role': 'assistant', 'content': 'Next topic=Barack Obama 2008 presidential campaign', 'token_usage': {'prompt_tokens': 406, 'total_tokens': 418, 'completion_tokens': 12}, 'model_name': 'meta/llama3-70b-instruct'} id='run-2805e2e3-95b3-467f-9c7d-0e7b75369207-0' role='assistant'
Parsed Response
Barack Obama 2008 presidential campaign
Most similar page
Selected Page
Barack Obama 2008 presidential campaign
Barack Obama 2008 presidential campaign
<class 'wikipediaapi.WikipediaPage'>


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Barack Obama 2008 presidential campaign&pllimit=500&plcontinue=17881122|0|List_of_Department_of_Education_appointments_by_Joe_Biden


len(titles)=872
len(summaries)=0
Template
You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Henry Ford (id: 13371, ns: 0) and
you must select a new wikipedia page closer to the target topic Joe Biden. You can select a topic from
the following list:

Topic
----------
title:1972 United States Senate election in Delaware
title:1978 United States Senate election in Delaware
title:1984 United States Senate election in Delaware
title:1988 Democratic Party presidential primaries
title:1990 United States Senate election in Delaware
title:1996 United States Senate election in Delaware
title:2000 Illinois's 1st congressional district election
title:2002 United States Senate election in Delaware
title:2004 Democratic National Convention
title:2004 Democratic National Convention keynote address
title:2004 United States Senate election in Illinois
title:2004 United States presidential election


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Early life and career of Joe Biden&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle


Response
content='Next topic=Early life and career of Joe Biden' response_metadata={'role': 'assistant', 'content': 'Next topic=Early life and career of Joe Biden', 'token_usage': {'prompt_tokens': 275, 'total_tokens': 286, 'completion_tokens': 11}, 'model_name': 'meta/llama3-70b-instruct'} id='run-2a8e87bc-fd42-4225-992a-8ce845bd15f8-0' role='assistant'
Parsed Response
Early life and career of Joe Biden
Most similar page
Selected Page
Early life and career of Joe Biden


INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=links&titles=Early life and career of Joe Biden&pllimit=500


Early life and career of Joe Biden
<class 'wikipediaapi.WikipediaPage'>
len(titles)=375
len(summaries)=0
Template
You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Henry Ford (id: 13371, ns: 0) and
you must select a new wikipedia page closer to the target topic Joe Biden. You can select a topic from
the following list:

Topic
----------
title:1972 U.S. Senate election in Delaware
title:1972 United States Senate election in Delaware
title:1978 United States Senate election in Delaware
title:1984 United States Senate election in Delaware
title:1988 Democratic Party presidential primaries
title:1990 United States Senate election in Delaware
title:1996 United States Senate election in Delaware
title:2002 United States Senate election in Delaware
title:2008 Democratic National Convention
title:2008 Democratic Party presidential debates and forums
title:2008 Democratic Party presidential

INFO:wikipediaapi:Request URL: https://en.wikipedia.org/w/api.php?action=query&prop=info&titles=Joseph Robinette Biden Jr.&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle


Response
content='Next topic=Joseph Robinette Biden Jr.' response_metadata={'role': 'assistant', 'content': 'Next topic=Joseph Robinette Biden Jr.', 'token_usage': {'prompt_tokens': 244, 'total_tokens': 254, 'completion_tokens': 10}, 'model_name': 'meta/llama3-70b-instruct'} id='run-564ae484-164f-4747-9da3-3737837ff47c-0' role='assistant'
Parsed Response
Joseph Robinette Biden Jr.
Most similar page
Selected Page
Joseph Robinette Biden Jr.
Joe Biden


In [101]:
bot.visited

[Henry Ford (id: 13371, ns: 0),
 Democratic Party (United States) (id: 5043544, ns: 0),
 Barack Obama (id: 534366, ns: 0),
 Barack Obama 2008 presidential campaign (id: 17881122, ns: 0),
 Early life and career of Joe Biden (id: 68027061, ns: 0),
 Joe Biden (id: 145422, ns: 0)]