![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)


[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/public/QUESTION_ANSWERING_CLOSED_BOOK.ipynb)

# **QUESTION ANSWERING (Closed Book Questions)**

# **Colab Setup and Start Spark Session**

In [None]:
!pip install -q pyspark==3.3.0 spark-nlp==4.2.8

In [2]:
import sparknlp
import pandas as pd

spark = sparknlp.start()


from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline
from pyspark.sql.types import StringType, IntegerType



print("Spark NLP version", sparknlp.version())
print("Apache Spark version:", spark.version)

spark 


Spark NLP version 4.2.8
Apache Spark version: 3.3.0


# **`google_t5_small_ssm_n`** model

In [3]:
document_assembler = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("documents")

sentence_detector = SentenceDetectorDLModel\
    .pretrained("sentence_detector_dl", "en")\
    .setInputCols(["documents"])\
    .setOutputCol("questions")
    
t5 = T5Transformer()\
    .pretrained("google_t5_small_ssm_nq")\
    .setTask('trivia question:')\
    .setInputCols(["questions"])\
    .setOutputCol("answers")
    
qa_pp = Pipeline(
    stages=[
        document_assembler, 
        sentence_detector, 
        t5])


sentence_detector_dl download started this may take some time.
Approximate size to download 354.6 KB
[OK!]
google_t5_small_ssm_nq download started this may take some time.
Approximate size to download 170.8 MB
[OK!]


In [4]:
questions_list = ["Who is Clark Kent?",
                  "Which is the capital of Bulgaria ?",
                  "Which country tops the annual global democracy index compiled by the economist intelligence unit?",
                  "In which city is the Eiffel Tower located?",
                  "Who is the founder of Microsoft?"]

In [5]:
df = spark.createDataFrame(questions_list, StringType()).toDF("text")
df.show(truncate=False)


+-------------------------------------------------------------------------------------------------+
|text                                                                                             |
+-------------------------------------------------------------------------------------------------+
|Who is Clark Kent?                                                                               |
|Which is the capital of Bulgaria ?                                                               |
|Which country tops the annual global democracy index compiled by the economist intelligence unit?|
|In which city is the Eiffel Tower located?                                                       |
|Who is the founder of Microsoft?                                                                 |
+-------------------------------------------------------------------------------------------------+



In [6]:
results = qa_pp.fit(df).transform(df)

In [7]:
results.select("questions.result", "answers.result").show(truncate=False)

+---------------------------------------------------------------------------------------------------+------------------------------+
|result                                                                                             |result                        |
+---------------------------------------------------------------------------------------------------+------------------------------+
|[Who is Clark Kent?]                                                                               |[a superhero]                 |
|[Which is the capital of Bulgaria ?]                                                               |[Sofia]                       |
|[Which country tops the annual global democracy index compiled by the economist intelligence unit?]|[Norway]                      |
|[In which city is the Eiffel Tower located?]                                                       |[Paris]                       |
|[Who is the founder of Microsoft?]                                  