In [None]:
import pywikibot
from pywikibot import pagegenerators
from tqdm import tqdm

lang_wiki = pywikibot.Site(code='en', fam='hitchwiki')
if not lang_wiki.user():
    lang_wiki.login()

In [None]:
pages = list(pagegenerators.AllpagesPageGenerator(site=lang_wiki))

In [None]:
pages[0]

In [None]:
articles = {}

In [None]:
for page in tqdm(pages, desc="Processing pages"):
    try:
        articles[page.title()] = {"text": page.text}
    except Exception as e:
        print(f"Error processing page: {e}")
        continue

In [None]:
with open("articles.py", "w") as f:
    f.write("articles = ")
    f.write(repr(articles))
    f.write("\n")

In [None]:
from articles import articles

In [None]:
len(articles)

In [None]:
articles["Berlin"]

In [None]:
for article, items in tqdm(articles.items()):
    text = items["text"]
    if "[[Category:City's]]" in text or "{{Category|City's}}" in text:
        corrected_text = text.replace("[[Category:City's]]", "[[Category:Cities]]").replace("{{Category|City's}}", "[[Category:Cities]]")

        page = pywikibot.Page(lang_wiki, article)
        page.text = corrected_text
        page.save(f"Correct City's category in {article}")
        print(page.title())
        print("******")

        articles[article]["text"] = corrected_text

In [None]:
with open("articles.py", "w") as f:
    f.write("articles = ")
    f.write(repr(articles))
    f.write("\n")

In [None]:
import requests

def is_geographical_location(title: str) -> bool:
    """Determine if article is about a geographical location by checking if Wikipedia has coordinates."""
    try:
        # Add headers to identify your request properly
        headers = {
            'User-Agent': 'YourAppName/1.0 (your.email@example.com) Python/requests'
        }
        
        wikipedia_url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + title.replace(" ", "_")
        response = requests.get(wikipedia_url, headers=headers, timeout=10)
        print(response)

        if response.status_code == 200:
            data = response.json()
            # If Wikipedia has coordinates, it's a geographical location
            if 'coordinates' in data:
                return True
    except Exception as e:
        print(f"Wikipedia lookup error for {title}: {e}")

    return False

In [None]:
is_geographical_location("Berlin")