# Import dependencies

In [1]:
from extractors import extract_text_from_pdf,extract_all_content_from_webpage
import asyncio
import nest_asyncio
from pipeline import QuestionAnsweringSystem
import json

# Step 1 Extract Texts from Multimedia Sources

## Extract From PDF

In [2]:
# text_pdf = extract_text_from_pdf("tests/example.pdf")
# text_pdf

## Extract From Website (testing)

In [3]:
# nest_asyncio.apply()
# text_web = asyncio.run(extract_all_content_from_webpage("https://www.livingsecurity.com/solutions/unify-insights"))
# text_web_new = " ".join(text_web)

In [4]:
example_text = '''
The world of Human Resources (HR) is a domain of perpetual dynamics, characterized by continuous interactions, decision-making, and evaluation. Within this multifaceted milieu, a prominent embodiment of Artificial Intelligence (AI) has emerged as a game-changer: meet "Emplify", a state-of-the-art AI-powered employee engagement and performance management platform.

Emplify has been engineered to masterfully merge AI capabilities with HR processes, focusing on enhancing employee engagement, facilitating performance management, and informing data-driven HR decisions. By leveraging natural language processing (NLP), machine learning, and predictive analytics, Emplify enables a nuanced understanding of employee sentiments, aspirations, and behavior patterns, thereby enriching the HR management landscape.

At its core, Emplify performs comprehensive analysis of both qualitative and quantitative employee data. The AI system swiftly scans through a plethora of data sources - including surveys, performance evaluations, and digital communications. It goes beyond just number crunching and recognizes patterns, gauges sentiment, and interprets tacit knowledge concealed within unstructured data.

In the sphere of performance management, Emplify's AI has been harnessed to create a fair, objective, and consistent evaluation system. Its predictive analytics capability aids in identifying future performance trends and potential employee churn, enabling HR teams to proactively strategize and respond.

Moreover, Emplify learns continuously, adapting and improving its analysis and recommendations based on user interactions and feedback. This continual learning embodies the concept of AI’s autodidactic capabilities, allowing for a highly personalized and evolving user experience.

However, in the midst of this technological breakthrough, it is crucial to consider the ethical implications of integrating AI in HR, particularly in regards to data privacy and algorithmic fairness. Unbiased algorithms, data security, and respectful employee privacy are paramount considerations that must be diligently maintained.

In summary, AI systems such as Emplify represent the dawn of a new era in Human Resources, one that harmonizes human decision-making with AI's analytical prowess. These systems pose significant potential, ushering in transformative changes in the HR landscape, while simultaneously presenting new challenges to be navigated responsibly and ethically.
'''

## Define Questions

In [5]:
selected_question_extractive = "What is the name of the AI system ?"
search_str_extractive = selected_question_extractive
selected_question_extractive

'What is the name of the AI system ?'

# Step 2 Information Improvement

## Obtain similar paragraphs based on the provided question

In [6]:
qa_system = QuestionAnsweringSystem()

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cpu


In [7]:
paralist = qa_system.create_paragraphs(example_text)
len(paralist)

15

In [8]:
selected_text = qa_system.get_similar_paragraphs(search_str_extractive,threshold=0.5)
selected_text

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[0.25701302 0.51656014 0.40026197 0.18099384 0.11487711 0.60238093
 0.31746805 0.5023394  0.15665938 0.2390898  0.47608775 0.37044895
 0.10591739 0.59706485 0.19914529]


'Within this multifaceted milieu, a prominent embodiment of Artificial Intelligence (AI) has emerged as a game-changer: meet "Emplify", a state-of-the-art AI-powered employee engagement and performance management platform.\n\n The AI system swiftly scans through a plethora of data sources - including surveys, performance evaluations, and digital communications. In the sphere of performance management, Emplify\'s AI has been harnessed to create a fair, objective, and consistent evaluation system. In summary, AI systems such as Emplify represent the dawn of a new era in Human Resources, one that harmonizes human decision-making with AI\'s analytical prowess.'

## Summarize the similar paragraphs

# Step 3 Extractive Question Answering based on similar paragraphs

In [9]:
answer_qa = qa_system.answer_question_extractive_qa(context=selected_text, question=selected_question_extractive)
print(answer_qa)

{'score': 0.6613650918006897, 'start': 125, 'end': 132, 'answer': 'Emplify'}


# Step 4 Multi-Choice Question Answering based on similar paragraphs

In [10]:
with open("questions.json") as f:
    questions = json.load(f)
questions

{'questionnaire': [{'category': 'Basic Information',
   'type': 'multiple',
   'question': 'For Experience, this project enhances:',
   'options': ['Consumer Experience',
    'Customer Experience',
    'Employee Experience',
    'Commercial Experience',
    'Product Management',
    'Data & AI Foundation',
    'Technology Foundation']},
  {'category': 'Basic Information',
   'type': 'multiple',
   'question': 'For Ownership & Hierarchy, the project function are:',
   'options': ['Audit',
    'Finance',
    'Legal',
    'Supply Chain',
    'Data & Analytics',
    'Information Technology',
    'Research & Development',
    'Customer Development',
    'Human Resources',
    'Operations',
    'Communications',
    'General Management',
    'Marketing & CMI']},
  {'category': 'End Purpose',
   'type': 'multiple',
   'question': 'Of the enterprise use-cases listed below, select those that apply to this Solution:',
   'options': ['Employment or training decisions',
    'Image processing of pe

In [11]:
question1 = questions['questionnaire'][1]['question']
option1 = questions['questionnaire'][1]['options']

selected_question_multi = question1
selected_option_multi  = option1
search_str_multi = selected_question_multi+str(selected_option_multi)
print(selected_question_multi)
print(selected_option_multi)

For Ownership & Hierarchy, the project function are:
['Audit', 'Finance', 'Legal', 'Supply Chain', 'Data & Analytics', 'Information Technology', 'Research & Development', 'Customer Development', 'Human Resources', 'Operations', 'Communications', 'General Management', 'Marketing & CMI']


In [12]:
qa_system.create_paragraphs(example_text)

['\nThe world of Human Resources (HR) is a domain of perpetual dynamics, characterized by continuous interactions, decision-making, and evaluation.',
 'Within this multifaceted milieu, a prominent embodiment of Artificial Intelligence (AI) has emerged as a game-changer: meet "Emplify", a state-of-the-art AI-powered employee engagement and performance management platform.\n\n',
 'Emplify has been engineered to masterfully merge AI capabilities with HR processes, focusing on enhancing employee engagement, facilitating performance management, and informing data-driven HR decisions.',
 'By leveraging natural language processing (NLP), machine learning, and predictive analytics, Emplify enables a nuanced understanding of employee sentiments, aspirations, and behavior patterns, thereby enriching the HR management landscape.\n\n',
 'At its core, Emplify performs comprehensive analysis of both qualitative and quantitative employee data.',
 'The AI system swiftly scans through a plethora of dat

In [13]:
selected_text = qa_system.get_similar_paragraphs(search_str_multi,threshold=0.2)
selected_text

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[ 0.22560075  0.17246443  0.22670834  0.18237607  0.2507618   0.18245178
  0.20089443  0.17636386  0.17732492 -0.01743981  0.06800747  0.1572057
  0.1437474   0.21762419  0.17483985]


"\nThe world of Human Resources (HR) is a domain of perpetual dynamics, characterized by continuous interactions, decision-making, and evaluation. Emplify has been engineered to masterfully merge AI capabilities with HR processes, focusing on enhancing employee engagement, facilitating performance management, and informing data-driven HR decisions. At its core, Emplify performs comprehensive analysis of both qualitative and quantitative employee data. It goes beyond just number crunching and recognizes patterns, gauges sentiment, and interprets tacit knowledge concealed within unstructured data.\n\n In summary, AI systems such as Emplify represent the dawn of a new era in Human Resources, one that harmonizes human decision-making with AI's analytical prowess."

In [14]:
answer_multi = qa_system.answer_question_multi_qa(context=selected_text, question=selected_question_multi,options=selected_option_multi)
print(answer_multi)

[['Audit', 'Finance', 'Legal', 'Supply Chain', 'Data & Analytics', 'Information Technology', 'Research & Development', 'Customer Development', 'Human Resources', 'Operations', 'Communications', 'General Management', 'Marketing & CMI'], [0.020554186776280403, 0.0050886101089417934, 0.007435772567987442, 0.005290585104376078, 0.1851184368133545, 0.0467793270945549, 0.09547492116689682, 0.0031756290700286627, 0.46819475293159485, 0.08334694057703018, 0.01545038353651762, 0.05447177216410637, 0.009618663229048252]]


In [15]:
import logging
import time

questions = qa_system.load_questions()
qa_system.create_paragraphs(example_text)
relevant_questions = questions['questionnaire'][0:2]

start_time = time.time()

result = qa_system.process_questions(relevant_questions)

end_time = time.time()

total_time = end_time - start_time
avg_time_per_question = total_time / len(relevant_questions)

logging.info(f"{total_time} in total, {avg_time_per_question} for each inference")
result

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[0.23309773 0.3400941  0.30801067 0.2797255  0.30105382 0.18793589
 0.29263857 0.22813746 0.22454378 0.1726288  0.25218898 0.20935394
 0.18934976 0.2933966  0.24124946]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[ 0.22560075  0.17246443  0.22670834  0.18237607  0.2507618   0.18245178
  0.20089443  0.17636386  0.17732492 -0.01743981  0.06800747  0.1572057
  0.1437474   0.21762419  0.17483985]


INFO:root:60.85014295578003 in total, 30.425071477890015 for each inference


[{'question': 'For Experience, this project enhances:',
  'answer': ['Employee Experience']},
 {'question': 'For Ownership & Hierarchy, the project function are:',
  'answer': ['Human Resources']}]

In [16]:
result

[{'question': 'For Experience, this project enhances:',
  'answer': ['Employee Experience']},
 {'question': 'For Ownership & Hierarchy, the project function are:',
  'answer': ['Human Resources']}]