In [None]:
import requests
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk
import random

nltk.download('punkt')
nltk.download('stopwords')

wiki_api_url = "https://en.wikipedia.org/w/api.php"

def fetch_wikipedia_content(title, num_sentences=2):
    params = {
        'action': 'query',
        'format': 'json',
        'titles': title,
        'prop': 'extracts',
        'exintro': True,
        'explaintext': True,
        'exsentences': num_sentences,
    }
    response = requests.get(wiki_api_url, params=params)
    data = response.json()
    page_id = next(iter(data['query']['pages']))
    return data['query']['pages'][page_id]['extract']

def preprocess_text(text):
    stop_words = set(stopwords.words('english'))
    words = [word.lower() for word in word_tokenize(text) if word.isalnum() and word.lower() not in stop_words]
    return ' '.join(words)


geographic_titles = ["Geography", "Landforms", "Countries", "Cities", "Coordinates"]
non_geographic_titles = ["Technology", "History_of_technology", "Engineering", "Computer_science", "Space_exploration", "Robotics"]


geographic_texts = [fetch_wikipedia_content(title) for title in geographic_titles]

non_geographic_texts = [fetch_wikipedia_content(title) for title in non_geographic_titles]

all_texts = geographic_texts + non_geographic_texts
labels = ['geographic'] * len(geographic_texts) + ['non-geographic'] * len(non_geographic_texts)
preprocessed_texts = [preprocess_text(text) for text in all_texts]

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(preprocessed_texts)
y = labels

random.shuffle(y)

clf = MultinomialNB()
clf.fit(X, y)

y_pred = clf.predict(X)

accuracy = accuracy_score(y, y_pred)
print(f"\nOverall Accuracy: {accuracy:.2f}")

print("\nClassification Report:")
print(classification_report(y, y_pred))