In [None]:
!pip install wikipedia-api
!pip install gradio_client
!pip install "pymongo[srv]"

In [None]:
import time

def time_it(func):
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        end = time.time()
        print(f"{func.__name__} took {(end-start):.2f} s")
        return result
    return wrapper


In [None]:
# Connect to MongoDB
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Access an environment variable
password = os.getenv('MONGO')

uri = f"mongodb+srv://baderalotaibi3:{password}@cluster0.od393y9.mongodb.net/?retryWrites=true&w=majority"
# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))
db = client["WIKIQUIZ"]
collection = db["WikiQuizEnApi"]
CategoriesCollection = db["Categories"]
PagesCollection = db["WikiPagesTitle"]
# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")

except Exception as e:
    print(e)


In [None]:
import wikipediaapi
from gradio_client import Client


In [None]:
import requests
def get_wikipedia_image_url(page_title):
    # Replace spaces with underscores for the Wikipedia API request
    page_title = page_title.replace(' ', '_')

    # Wikipedia API endpoint for querying page images
    URL = "https://en.wikipedia.org/w/api.php"

    # Parameters for the API request
    params = {
        "action": "query",
        "format": "json",
        "titles": page_title,
        "prop": "pageimages",
        "pithumbsize": 500  # Specify the thumbnail size
    }

    response = requests.get(URL, params=params)
    data = response.json()

    # Extract the page ID to navigate the response
    page_id = next(iter(data['query']['pages']))
    
    # Check if the page exists and has an image
    if 'thumbnail' in data['query']['pages'][page_id]:
        image_url = data['query']['pages'][page_id]['thumbnail']['source']
        return image_url
    else:
        return False

In [None]:
client = Client("https://bader4k-question-gen-en.hf.space/")
@time_it
def query(context):
    result = client.submit(
            context,	# str in 'context' Textbox component
            api_name="/predict"
    )
    print(result.result())
    return result.result()

In [None]:
wiki_wiki = wikipediaapi.Wikipedia('QiziWiki/1.0 (baderalotaibi3@gmail.com)',"en")
def get_wiki_summary(page_title):
    page = wiki_wiki.page(page_title)
    #print("Page - Exists:\n",page)
    #print("Page - Summary:\n",page.summary)
    #print("Page - Text:\n",page.text)
    #print("Page - Categories:\n",page.categories)
    #print("Page - Sections:\n",page.sections)
    if not page.exists():
        return None
    print(page.summary)
      # Get summary
    summary = page.summary
    print("Cat")
    # Get categories
    categories = {cat: page.categories[cat].fullurl for cat in page.categories}
    print(categories)
    # Get sections
    sections = [section.title for section in page.sections]
    print("Sections")
    print(sections)
    return summary, categories, sections , page

def main():
    
        total_documents = PagesCollection.count_documents({})
        completed_documents = collection.count_documents({})
        print(f"Total documents: {total_documents}")
        print(f"Completed documents: {completed_documents}")
        
        for index , document in enumerate(PagesCollection.find().sort('_id', -1)):
            index += completed_documents
            page_title = document["title"]
            percentage_processed = (index / total_documents) * 100
            print(f"Processed {index} out of {total_documents} documents.")
            print(f"Percentage of documents processed: {percentage_processed:.2f}%")
            if collection.find_one({"page_title": page_title}) is None:
                wiki_obj = get_wiki_summary(page_title)
                if wiki_obj is None:
                    print(f"Debug:   {page_title}  Not exsited")
                    continue
                print("Title",page_title)
                summary, categories, sections, page = wiki_obj

                print(categories)
                print(sections)
                if len(summary)>250:
                    question = query(summary)

                    dict_={

                        "page_title" : page_title,
                        "summary" : summary,
                        "categories" : categories,
                        "question" : question,
                        "image_url" : get_wikipedia_image_url(page_title),

                    }

                    print(dict_)
                    collection.insert_one(dict_)
                    PagesCollection.delete_one({"title": page_title})
                else:
                    print(f"Debug:   {page_title}  Summary is too short")
                    PagesCollection.delete_one({"title": page_title})
            else:
                print(f"Debug:   {page_title}  Already exsited {index}")
                PagesCollection.delete_one({"page_title": page_title})
        
    
main()