## Inclusion

In [None]:
!pip install import-ipynb
import import_ipynb
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials


# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
from typing import List
from googlesearch import search
import wikipedia
import requests
from bs4 import BeautifulSoup

In [None]:
your_module = drive.CreateFile({'id':'1Ymo-e8KvTVA3K-zc3PgeBFJ6Kvx5nwFo'})
your_module.GetContentFile('question_answerer.ipynb')
import question_answerer

## Test three models for results and then chose the best one

In [4]:
from transformers import pipeline

In [5]:
qa_pipeline_henryk = pipeline(
            "question-answering",
            model="henryk/bert-base-multilingual-cased-finetuned-polish-squad2",
            tokenizer="henryk/bert-base-multilingual-cased-finetuned-polish-squad2",
        )

In [None]:
qa_pipeline_dkleczek = pipeline(
            "question-answering",
            model="dkleczek/bert-base-polish-cased-v1",
            tokenizer="dkleczek/bert-base-polish-cased-v1",
        )

In [None]:
qa_pipeline_herbert = pipeline(
            "question-answering",
            model="allegro/herbert-large-cased",
            tokenizer="allegro/herbert-large-cased",
        )

In [None]:
def get_urls_test(query: str) -> List[str]:
    """Get urls of given query from Google."""
    return search(query, tld="com", lang="pl", stop=5, pause=2)


def get_context_test2(url: str) -> str:
    """First version of get_context function"""
    wikipedia.set_lang("pl")
    r = requests.get(url)
    html = r.text
    if "wikipedia" in url:
        soup = BeautifulSoup(html, "html.parser")
        title = soup.find("title").text[:-32]
        wiki_page = wikipedia.page(title)
        context = wiki_page.content
    else:
        downloaded = BeautifulSoup(html, "html.parser")
        context = soup.find_all("p")[0].text
    return context


def find_potential_answers2(question, pipeline):
    """First version of find_potenctial_answers."""
    urls = get_urls_test(question)
    answers = []
    for url in urls:
        context = get_context_test2(url)
        if context:
            context = context[:1000]
            ans = pipeline({"context": context, "question": question})
            answers.append(ans)
    return answers


def determine_best_answer_test(answers):
    """Find best answer by checking the scores."""
    return (max(answers, key=lambda x: x["score"]))["answer"]


def find_answer_test_henryk(question, pipeline=qa_pipeline_henryk):
    """Trial of Henryk model."""
    potential_answers = find_potential_answers2(question, pipeline)
    return determine_best_answer_test(potential_answers)


def find_answer_test_dekleczek(question, pipeline=qa_pipeline_dkleczek):
    """Trial of DKleczek model"""
    potential_answers = find_potential_answers2(question, pipeline)
    return determine_best_answer_test(potential_answers)


def find_answer_test_herbert(question, pipeline=qa_pipeline_herbert):
    """Trial of Herbert model"""
    potential_answers = find_potential_answers2(question, pipeline)
    return determine_best_answer_test(potential_answers)
