# Code to Play

In [1]:
import os
import numpy as np
import pandas as pd
import getpass
import difflib
import requests
import random
import string
import wikipediaapi

from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine

from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
from langchain.memory import ConversationBufferMemory

  from tqdm.autonotebook import tqdm, trange


## Funcs

In [7]:
def get_page_summary(wiki_page):
    """
    Retrieves a brief summary of a given Wikipedia page.

    This function takes a Wikipedia page object and returns the summary of the page. However, rather than 
    returning the entire summary, it returns only the first few lines. This is particularly useful for 
    getting a quick overview or introduction to the page's content without needing to process the entire 
    summary text.

    Parameters
    ----------
    wiki_page : WikipediaPage object
        A Wikipedia page object from which the summary is to be extracted. The object should have a 'summary' 
        attribute containing the text of the page's summary.

    Returns
    -------
    str
        A string containing the first few lines of the Wikipedia page's summary. The exact number of lines 
        returned is set to 5 in this implementation.
    """
    # return just the first few lines if there are multiple
    return wiki_page.summary[:50] + "..."
    # return ". ".join(wiki_page.summary.split("\n")[:1]) + "..."

def search_wiki(search_term):
    """Search common name for search term and returns most relevant Wiki Page"""
    search_url = f"https://en.wikipedia.org/w/index.php?search={'+'.join(search_term.split())}&title=Special:Search&profile=advanced&fulltext=1&ns0=1"
    soup = BeautifulSoup(requests.get(search_url, timeout=30).content, "html.parser")
    bad_prefixes = ["list of", "history of", "Template:", "Wikipedia:", "Category:", "Portal:", "Talk:", "Template talk:"]
    for result in soup.find_all("div", class_ = "mw-search-result-heading"):
        if result.a:
            if result.a['href']:
                text = result.a['href'].replace("/wiki/", "").strip()
                starts_with_bad_prefix = any(text.lower().replace("_", " ").startswith(prefix.lower()) for prefix in bad_prefixes)
                if not starts_with_bad_prefix:
                    return text

def getWikiTemplate():
    template = """You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is {current} and
you must select a new wikipedia page closer to the target topic {target1}. You can select a topic from
the following list:

{{links}}

If the target topic of {target2} is available, you should select the target topic {target3}. Otherwise, select
a next topic that is likely to be closer or fewer connections to the topic {target4}. Format your output as:
Next topic=<topic here>
"""
    return template
    

In [5]:
search_wiki('python programming langugae')

'Python_(programming_language)'

## LLM Bot

In [15]:
class WikiGameLLMBot():
    def __init__(self, wiki_wiki,
                 start_topic = None,
                 target_topic = None,
                 model_name='meta/llama3-70b-instruct',
                 temperature=0.1):

        assert start_topic != target_topic, "Please enter different start and target topics."
        
        ################################################################
        #
        #    Save some things
        #
        ################################################################
        
        self.wiki_wiki     = wiki_wiki
        self.start_topic   = start_topic
        self.target_topic  = target_topic
        self.target_page   = self.wiki_wiki.page(self.target_topic)
        self.start_page    = self.wiki_wiki.page(self.start_topic)
        self.current_topic = self.start_page
        self.current_page  = self.start_page
        self.visited       = [self.start_page]

        ################################################################
        #
        #    Load NVIDIA model and chatbot history
        #
        ################################################################

        if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
            nvidia_api_key = getpass.getpass("Enter your NVIDIA API key: ")
            assert nvidia_api_key.startswith("nvapi-"), f"{nvidia_api_key[:5]}... is not a valid key"
            os.environ["NVIDIA_API_KEY"] = nvidia_api_key
        else:
            nvidia_api_key = os.environ["NVIDIA_API_KEY"]
            
        self.llm = ChatNVIDIA(model       = model_name,
                              api_key     = nvidia_api_key,
                              temperature = temperature,
                             )
        self.memory = ConversationBufferMemory(ai_prefix="System")

    def get_filled_templates(self, titles, summaries):
        i = 0
        templates = []
        while i < len(titles):
            titlesTemplate = titles[i:i+100]
            summariesTemplate = summaries[i:i+100]
            templates.append(self.get_filled_template(titlesTemplate, summariesTemplate))
            i += 100
        return templates
    
    def get_filled_template(self, titles, summaries):
        template = getWikiTemplate()
        template = template.format(target1 = self.target_topic,
                                   target2 = self.target_topic,
                                   target3 = self.target_topic,
                                   target4 = self.target_topic,
                                   current = self.current_topic,
                                  )
        link_table = """Topic\tDescription
--------\t\t\t-----------
"""
        for i in range(len(titles)):
            link_table += ("title:" + titles[i] + '\t' + summaries[i] + '\n')

        template = template.format(links = link_table)
        return template
    
    def take_turn(self):
        # Get all linked pages
        print(type(self.current_page))
        page_titles = list(self.current_page.links)

        badpages = ['Category', 'Help', 'Wikipedia', 'Portal', 'List', 'Talk', 'Template']
        badpages_lower = [bad.lower() for bad in badpages]
        page_titles = [title for title in page_titles if not any(title.lower().startswith(bad) for bad in badpages_lower)]
        
        pages = []

        print('Got all titles')
        for i, title in enumerate(page_titles):
            print(title + '\t\t' + str(i) + '/' + str(len(page_titles)))
            pages.append(self.wiki_wiki.page(title))

        # get the summaries of these pages
        titles, summaries = [], []
        page_title_lookup = {}
        for page in pages:
            titles.append(page.title)
            summaries.append(get_page_summary(page))
            page_title_lookup[page.title] = page

        templates = self.get_filled_templates(titles, summaries)
        for template in templates:
            print("Template")        
            print(template)
    
            response = self.llm.invoke(template)
            print("Response")
            print(response)
    
            print("Parsed Response")
            proposedPage = response.content.split('=')[1]
            print(proposedPage)
            
            print("Most similar page")
            most_similar = difflib.get_close_matches(proposedPage, titles, n=1)[0]
            print(most_similar)
    
            # reset the current page
            self.current_page = page_title_lookup[most_similar]
        
        if self.target_page.fullurl == self.current_page.fullurl:
            return True

        return False


# Play Game

In [16]:
global wiki_wiki
random_string = 'XXX' # generate_random_string(10)
wiki_wiki = wikipediaapi.Wikipedia(
    f'WikiBot-{random_string} (https://www.linkedin.com/in/kmaurinjones/)',
    'en',
    timeout = 30
    )

In [17]:
bot = WikiGameLLMBot(wiki_wiki,
                 start_topic = 'Kayvan Najarian',
                 target_topic = 'Israel',
                 model_name='meta/llama3-70b-instruct',
                 temperature=0.1)

In [18]:
found = False
i = 0
while not found:
    found = bot.take_turn()
    print(bot.current_page.title)
    i += 1
    if i > 5:
        break

<class 'wikipediaapi.WikipediaPage'>
Got all titles
Amirkabir University of Technology		0/6
CRC Press		1/6
ISBN (identifier)		2/6
Sharif University of Technology		3/6
University of British Columbia		4/6
University of Michigan		5/6
Template
You must are playing the Wikipedia Game where you must find a chain of
Wikipedia pages that connect a source topic to a target topic. Your current topic is Kayvan Najarian (id: 26555151, ns: 0) and
you must select a new wikipedia page closer to the target topic Israel. You can select a topic from
the following list:

Topic   			Description
--------			-----------
title:Amirkabir University of Technology			Amirkabir University of Technology (AUT) (Persian:...
title:CRC Press			The CRC Press, LLC is an American publishing group...
title:ISBN (identifier)			The International Standard Book Number (ISBN) is a...
title:Sharif University of Technology			Sharif University of Technology (SUT; Persian: دان...
title:University of British Columbia			The Universit

Exception: [400] Bad Request
This model's maximum context length is 8192 tokens. However, you requested 11857 tokens (10833 in the messages, 1024 in the completion). Please reduce the length of the messages or completion.
RequestID: c7a306af-277a-44b3-8aa0-8fa265886514

In [None]:
def play_game(self, verbose = True):
    # turn number
    turn_num = 0

    # first 'current' topic is starting topic
    current_topic = self.start_topic

    # to prevent duplicates
    visited = set()

    # keep playing until target is reached
    while True:

        # for turn time tracking
        turn_start = time.time()

        # find most similar topic on current page to target topic
        visited.add(current_topic)
        next_topic = self.take_turn(current_topic, list(visited))

        # for turn time tracking
        turn_time = time.time() - turn_start

        self.log_turn(
            {
                'starting_topic': self.start_topic,
                'target_topic': self.start_topic,
                'turn': turn_num,             
                'current_topic': current_topic,
                'current_summary': self.current_summary
            }
        )

        if verbose:
            printouts = [
                "-" * 50,
                f"Turn: {turn_num}",
                f"Start topic: {self.start_topic.replace('_', ' ')}",
                f"Current topic: {current_topic.replace('_', ' ')}",
                f"Next topic: {next_topic.replace('_', ' ')}",
                f"Target topic: {self.target_topic.replace('_', ' ')}",
            ]

            self.printouts.append(printouts)

            # print progress
            for i in self.printouts[-1]:
                print(i)

        # else, set new next_topic to current topic and loop
        current_topic = next_topic

        # increment turn
        turn_num += 1

In [8]:
page = wiki_wiki.page('Python_(programming_language)')
print(page.title)
# Python (programming language)


Python_(programming_language)


In [11]:
help(wiki_wiki.page)

Help on method page in module wikipediaapi:

page(title: str, ns: Union[wikipediaapi.Namespace, int] = <Namespace.MAIN: 0>, unquote: bool = False) -> 'WikipediaPage' method of wikipediaapi.Wikipedia instance
    Constructs Wikipedia page with title `title`.
    
    Creating `WikipediaPage` object is always the first step for extracting
    any information.
    
    Example::
    
        wiki_wiki = wikipediaapi.Wikipedia('en')
        page_py = wiki_wiki.page('Python_(programming_language)')
        print(page_py.title)
        # Python (programming language)
    
        wiki_hi = wikipediaapi.Wikipedia('hi')
    
        page_hi_py = wiki_hi.article(
            title='%E0%A4%AA%E0%A4%BE%E0%A4%87%E0%A4%A5%E0%A4%A8',
            unquote=True,
        )
        print(page_hi_py.title)
        # पाइथन
    
    :param title: page title as used in Wikipedia URL
    :param ns: :class:`WikiNamespace`
    :param unquote: if true it will unquote title
    :return: object representing :class

In [10]:
?wiki_wiki.page

[0;31mSignature:[0m
[0mwiki_wiki[0m[0;34m.[0m[0mpage[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtitle[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mns[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mwikipediaapi[0m[0;34m.[0m[0mNamespace[0m[0;34m,[0m [0mint[0m[0;34m][0m [0;34m=[0m [0;34m<[0m[0mNamespace[0m[0;34m.[0m[0mMAIN[0m[0;34m:[0m [0;36m0[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0munquote[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'WikipediaPage'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Constructs Wikipedia page with title `title`.

Creating `WikipediaPage` object is always the first step for extracting
any information.

Example::

    wiki_wiki = wikipediaapi.Wikipedia('en')
    page_py = wiki_wiki.page('Python_(programming_language)')
    print(page_py.title)
    # Python (programming language)

    wiki_hi = wikipedi

In [9]:
page.links

{'"Hello, World!" program': "Hello, World!" program (id: ??, ns: 0),
 '3ds Max': 3ds Max (id: ??, ns: 0),
 '?:': ?: (id: ??, ns: 0),
 'ABC (programming language)': ABC (programming language) (id: ??, ns: 0),
 'ADMB': ADMB (id: ??, ns: 0),
 'ALGOL': ALGOL (id: ??, ns: 0),
 'ALGOL 68': ALGOL 68 (id: ??, ns: 0),
 'APL (programming language)': APL (programming language) (id: ??, ns: 0),
 'ATmega': ATmega (id: ??, ns: 0),
 'AVR microcontrollers': AVR microcontrollers (id: ??, ns: 0),
 'Abaqus': Abaqus (id: ??, ns: 0),
 'Academic Free License': Academic Free License (id: ??, ns: 0),
 'Academic conference': Academic conference (id: ??, ns: 0),
 'Action selection': Action selection (id: ??, ns: 0),
 'Activation function': Activation function (id: ??, ns: 0),
 'Ada (programming language)': Ada (programming language) (id: ??, ns: 0),
 'Advanced Simulation Library': Advanced Simulation Library (id: ??, ns: 0),
 'Adversarial machine learning': Adversarial machine learning (id: ??, ns: 0),
 'AlexNe

In [14]:
links = list(page.links.keys())
links

['"Hello, World!" program',
 '3ds Max',
 '?:',
 'ABC (programming language)',
 'ADMB',
 'ALGOL',
 'ALGOL 68',
 'APL (programming language)',
 'ATmega',
 'AVR microcontrollers',
 'Abaqus',
 'Academic Free License',
 'Academic conference',
 'Action selection',
 'Activation function',
 'Ada (programming language)',
 'Advanced Simulation Library',
 'Adversarial machine learning',
 'AlexNet',
 'Alex Graves (computer scientist)',
 'Alex Martelli',
 'Algebra',
 'AlphaFold',
 'AlphaGo',
 'AlphaZero',
 'Alternative terms for free software',
 'Amazon (company)',
 'AmigaOS 4',
 'Amoeba (operating system)',
 'Anaconda (installer)',
 'Analyse-it',
 'Andrew Ng',
 'Android (operating system)',
 'Anonymous function',
 'Anthropic',
 'Apache Groovy',
 'Apache License',
 'Apache webserver',
 'Aphorism',
 'Apple M1',
 'Apple Public Source License',
 'ArXiv (identifier)',
 'Arbitrary-precision arithmetic',
 'ArcGIS',
 'Arithmetic operations',
 'ArkTS',
 'Array index',
 'Array slicing',
 'Artificial intelli

In [33]:
?page

[0;31mType:[0m        WikipediaPage
[0;31mString form:[0m Run-D.M.C. (album) (id: 2431325, ns: 0)
[0;31mFile:[0m        ~/.local/lib/python3.11/site-packages/wikipediaapi/__init__.py
[0;31mDocstring:[0m  
Represents Wikipedia page.

Except properties mentioned as part of documentation, there are also
these properties available:

* `fullurl` - full URL of the page
* `canonicalurl` - canonical URL of the page
* `pageid` - id of the current page
* `displaytitle` - title of the page to display
* `talkid` - id of the page with discussion

In [42]:
page

Run-D.M.C. (album) (id: 2431325, ns: 0)

In [34]:
page.summary

'Run-D.M.C. is the debut studio album by American hip hop group Run-D.M.C., released on March 27, 1984, by Profile Records, and re-issued by Arista Records. The album was primarily produced by Russell Simmons and Larry Smith.\nThe album was considered groundbreaking for its time, presenting a tougher, more hardcore form of rap. The album\'s sparse beats and aggressive rhymes were in sharp contrast with the light, party-oriented sound that was popular in contemporary hip hop. With the album, Run-D.M.C. came to be regarded by music critics as pioneering the movement of new-school hip hop of the mid-1980s. Five singles were released in support of it: "It\'s Like That", "Hard Times", "Rock Box", "30 Days" and "Hollis Crew". The first single from the album, "It\'s Like That", released on August 10, 1983, expanded lyrical boundaries in rap with its tone of social protest (unemployment, inflation). "It\'s Like That" is considered by many to be the first hardcore rap song, and the first new-sc

In [29]:
?wiki_wiki.page

[0;31mSignature:[0m
[0mwiki_wiki[0m[0;34m.[0m[0mpage[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtitle[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mns[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mwikipediaapi[0m[0;34m.[0m[0mNamespace[0m[0;34m,[0m [0mint[0m[0;34m][0m [0;34m=[0m [0;34m<[0m[0mNamespace[0m[0;34m.[0m[0mMAIN[0m[0;34m:[0m [0;36m0[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0munquote[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'WikipediaPage'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Constructs Wikipedia page with title `title`.

Creating `WikipediaPage` object is always the first step for extracting
any information.

Example::

    wiki_wiki = wikipediaapi.Wikipedia('en')
    page_py = wiki_wiki.page('Python_(programming_language)')
    print(page_py.title)
    # Python (programming language)

    wiki_hi = wikipedi

In [13]:
source = get_random_wiki_page(wiki_wiki)
target = get_random_wiki_page(wiki_wiki)
print(f'source={source}')
print(f'target={target}')


source=Run-D.M.C._(album)
target=Marculescu


In [15]:
page = wiki_wiki.page(source)

In [20]:
page.fullurl

'https://en.wikipedia.org/wiki/Run-D.M.C._(album)'

In [23]:
page.canonicalurl

'https://en.wikipedia.org/wiki/Run-D.M.C._(album)'

In [28]:
page.links

{'AMC (TV channel)': AMC (TV channel) (id: ??, ns: 0),
 'Adobe Flash': Adobe Flash (id: ??, ns: 0),
 'African American culture': African American culture (id: ??, ns: 0),
 'Album': Album (id: ??, ns: 0),
 'AllMusic': AllMusic (id: ??, ns: 0),
 'Arista Records': Arista Records (id: ??, ns: 0),
 'Back from Hell': Back from Hell (id: ??, ns: 0),
 'Beats Per Minute (website)': Beats Per Minute (website) (id: ??, ns: 0),
 'Billboard (magazine)': Billboard (magazine) (id: ??, ns: 0),
 'Billboard 200': Billboard 200 (id: ??, ns: 0),
 'Can You Rock It Like This': Can You Rock It Like This (id: ??, ns: 0),
 'Checks Thugs and Rock n Roll': Checks Thugs and Rock n Roll (id: ??, ns: 0),
 'Chicago Tribune': Chicago Tribune (id: ??, ns: 0),
 'Christian Hoard': Christian Hoard (id: ??, ns: 0),
 'Christmas in Hollis': Christmas in Hollis (id: ??, ns: 0),
 'Complex (magazine)': Complex (magazine) (id: ??, ns: 0),
 'Crown Royal (album)': Crown Royal (album) (id: ??, ns: 0),
 'Danceteria': Danceteria (id

In [25]:
validate_pages(page)

['AMC (TV channel)',
 'Adobe Flash',
 'African American culture',
 'Album',
 'AllMusic',
 'Arista Records',
 'Back from Hell',
 'Beats Per Minute (website)',
 'Billboard (magazine)',
 'Billboard 200',
 'Can You Rock It Like This',
 'Checks Thugs and Rock n Roll',
 'Chicago Tribune',
 'Christian Hoard',
 'Christmas in Hollis',
 'Complex (magazine)',
 'Crown Royal (album)',
 'Danceteria',
 'Darryl McDaniels',
 'David Toop',
 'Def Jam Recordings',
 'Diggy Simmons',
 'Disco',
 'Disco Fever',
 'Discogs',
 'Diss (music)',
 'Distortion (Joseph Simmons album)',
 'Down with the King (album)',
 'Down with the King (song)',
 'Drum machine',
 'East Coast hip hop',
 'Eddie Martinez (musician)',
 'Eric Weisbard',
 'Faces (Run-D.M.C. song)',
 'Gold certification',
 'Graffiti Rock',
 'Grandmaster Flash and the Furious Five',
 'Greatest Hits (Run-D.M.C. album)',
 'Greene St. Recording',
 'Greg Kot',
 'Greg Tate',
 'Hard Times (Run-D.M.C. song)',
 'Hardcore hip hop',
 'Hardcore rap',
 'High Profile: The

In [24]:
page.links

{'AMC (TV channel)': AMC (TV channel) (id: ??, ns: 0),
 'Adobe Flash': Adobe Flash (id: ??, ns: 0),
 'African American culture': African American culture (id: ??, ns: 0),
 'Album': Album (id: ??, ns: 0),
 'AllMusic': AllMusic (id: ??, ns: 0),
 'Arista Records': Arista Records (id: ??, ns: 0),
 'Back from Hell': Back from Hell (id: ??, ns: 0),
 'Beats Per Minute (website)': Beats Per Minute (website) (id: ??, ns: 0),
 'Billboard (magazine)': Billboard (magazine) (id: ??, ns: 0),
 'Billboard 200': Billboard 200 (id: ??, ns: 0),
 'Can You Rock It Like This': Can You Rock It Like This (id: ??, ns: 0),
 'Checks Thugs and Rock n Roll': Checks Thugs and Rock n Roll (id: ??, ns: 0),
 'Chicago Tribune': Chicago Tribune (id: ??, ns: 0),
 'Christian Hoard': Christian Hoard (id: ??, ns: 0),
 'Christmas in Hollis': Christmas in Hollis (id: ??, ns: 0),
 'Complex (magazine)': Complex (magazine) (id: ??, ns: 0),
 'Crown Royal (album)': Crown Royal (album) (id: ??, ns: 0),
 'Danceteria': Danceteria (id

In [17]:
?page

[0;31mType:[0m        WikipediaPage
[0;31mString form:[0m Run-D.M.C._(album) (id: ??, ns: 0)
[0;31mFile:[0m        ~/.local/lib/python3.11/site-packages/wikipediaapi/__init__.py
[0;31mDocstring:[0m  
Represents Wikipedia page.

Except properties mentioned as part of documentation, there are also
these properties available:

* `fullurl` - full URL of the page
* `canonicalurl` - canonical URL of the page
* `pageid` - id of the current page
* `displaytitle` - title of the page to display
* `talkid` - id of the page with discussion

In [14]:
?wiki_wiki.page

[0;31mSignature:[0m
[0mwiki_wiki[0m[0;34m.[0m[0mpage[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mtitle[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mns[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mwikipediaapi[0m[0;34m.[0m[0mNamespace[0m[0;34m,[0m [0mint[0m[0;34m][0m [0;34m=[0m [0;34m<[0m[0mNamespace[0m[0;34m.[0m[0mMAIN[0m[0;34m:[0m [0;36m0[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0munquote[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'WikipediaPage'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Constructs Wikipedia page with title `title`.

Creating `WikipediaPage` object is always the first step for extracting
any information.

Example::

    wiki_wiki = wikipediaapi.Wikipedia('en')
    page_py = wiki_wiki.page('Python_(programming_language)')
    print(page_py.title)
    # Python (programming language)

    wiki_hi = wikipedi

# Scratch

In [None]:
def get_linked_page_summary(topic):
    """
    Get the summary of a linked page
    """
    page = wiki_wiki.page(topic)
    summary = get_page_summary(current_page)
    return summary

def validate_pages(wiki_page):
    """
    Filters and validates the linked pages from a given Wikipedia page.

    This function takes a Wikipedia page object and extracts all the links (or references to other Wikipedia 
    pages) from it. It then filters out unwanted links based on predefined criteria, such as links with 
    certain prefixes (like "Template:", "Wikipedia:", etc.) and links that do not contain any alphabetical 
    characters. The purpose is to retain only relevant and potentially useful page links for further processing.

    Parameters
    ----------
    wiki_page : WikipediaPage object
        A Wikipedia page object from which the links are to be extracted and validated. The object is 
        expected to have a 'links' attribute containing a dictionary of linked page titles.

    Returns
    -------
    list
        A list of validated linked page titles. The titles in this list do not include any of the unwanted 
        prefixes and contain at least one alphabetical character.
    """
    # get all links
    links = list(wiki_page.links.keys())

    # various unwanted prefixes
    bad_prefixes = ["list of", "history of", "Template:", "Wikipedia:", "Category:", "Portal:", "Talk:", "Template talk:"]
    links = [link for link in links 
        if not any(link.lower().startswith(prefix.lower()) for prefix in bad_prefixes) 
        and any(char.isalpha() for char in link)
    ]
    return links

In [None]:
def generate_random_string(length):
    characters = string.ascii_letters + string.digits
    random_string = ''.join(random.choice(characters) for _ in range(length))
    return random_string

In [None]:
def get_random_wiki_page(wiki_wiki):
    """
    Selects a random Wikipedia page that meets certain validity criteria.

    This function repeatedly requests random Wikipedia pages until it finds one that satisfies specific 
    criteria: the title should not start with certain prefixes (like "Template:", "List of", etc.), should 
    not contain certain unwanted characters, and must contain at least one alphabetical character. The 
    function also checks if the page has a reasonable summary (at least 20 words) before accepting it.

    Returns
    -------
    str
        The title of a valid random Wikipedia page.
    """
    wiki_title = None
    while True:
        url = "https://en.wikipedia.org/wiki/Special:Random"
        response = requests.get(url, timeout = 30, allow_redirects = True)
        final_url = response.url
        wiki_title = final_url.split("wiki/")[-1]
        is_valid_title = True

        # various unwanted prefixes
        bad_prefixes = ["list of", "history of", "Template:", "Wikipedia:", "Category:", "Portal:", "Talk:", "Template talk:"]

        # check for unwanted chars
        for char in "[]{}:%":
            if char in wiki_title:
                is_valid_title = False
        
        # validation criteria
        starts_with_bad_prefix = any(wiki_title.lower().startswith(prefix.lower()) for prefix in bad_prefixes)
        contains_alpha = any(char.isalpha() for char in wiki_title)
        is_valid_title = not starts_with_bad_prefix and contains_alpha

        if is_valid_title:

            # check if a reasonable page summary is present (at least 20 words)
            summary = get_page_summary(wiki_wiki.page(wiki_title))
            if len(summary.split()) > 20:
                break

    return wiki_title