In [1]:
!pip install keybert

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keybert
  Downloading keybert-0.7.0.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sentence-transformers>=0.3.8
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 KB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers<5.0.0,>=4.6.0
  Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m54.8 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece
  Downloading sentencepiece-0.1.97-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m51.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub>=0.4.0
  Downlo

In [2]:
from keybert import KeyBERT
import re
import logging
import spacy
from transformers import AutoModelWithLMHead, AutoTokenizer
logger = logging.getLogger(__name__)

In [3]:
class QuestionGeneration:

  def __init__(self):
    try:
      self.nlp = spacy.load("en_core_web_sm")
      self.answers_model = KeyBERT(
          model="distilbert-base-nli-mean-tokens"
          )
      self.tokenizer = AutoTokenizer.from_pretrained(
          "mrm8488/t5-base-finetuned-question-generation-ap"
          )
      self.model = AutoModelWithLMHead.from_pretrained(
          "mrm8488/t5-base-finetuned-question-generation-ap"
          )
    except Exception :
      logger.error("model download failed")

  def clean_text(self, text):
      text = re.sub(r"[\W_]", " ", text)
      text = re.sub(r"\s+", " ", text)
      return text

  def get_key_phrases(self, context):
    answers = self.answers_model.extract_keywords(context,
                                                        top_n=10,
                                                        keyphrase_ngram_range=(1,7),
                                                        stop_words="english",
                                                        use_mmr=True,
                                                        diversity=0.6
                                                        )
    return answers

  def get_entities(self,context):
    context = self.nlp(context)
    entities_list=[]
    for ent in context.ents:
      entities_list.append((ent.text,1))
    return entities_list

  def get_question(self, context, max_length=64):
    Questions = set()
    context=self.clean_text(context)
    answers=self.get_key_phrases(context)
    answers.extend(self.get_entities(context))
    for answer in answers:
      if answer[1]>0:
        input_text = "answer: %s  context: %s </s>" % (answer[0], context)
        features = self.tokenizer([input_text], return_tensors='pt')
        output = self.model.generate(input_ids=features['input_ids'],
                                attention_mask=features['attention_mask'],
                                max_length=max_length
                                )
        Questions.add(self.tokenizer.decode(output[0])[16:-4])
    return list(Questions)

In [4]:
%%time
obj=QuestionGeneration()

Downloading (…)925a9/.gitattributes:   0%|          | 0.00/690 [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)1a515925a9/README.md:   0%|          | 0.00/3.99k [00:00<?, ?B/s]

Downloading (…)515925a9/config.json:   0%|          | 0.00/550 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/265M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)925a9/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/450 [00:00<?, ?B/s]

Downloading (…)1a515925a9/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)15925a9/modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


Downloading pytorch_model.bin:   0%|          | 0.00/1.19G [00:00<?, ?B/s]

CPU times: user 7.64 s, sys: 4.04 s, total: 11.7 s
Wall time: 1min 28s


**Testing Group Vehicle Policy**

1. introduction

In [5]:
doc1='''
I am pleased to announce the “Group Vehicle Policy” for Aditya Birla Group – India. The revised policy 
will be effective 1st April 2023 and will supersede ABMCPL/GHR/2015 – 103 Group Car Policy (India). 
Businesses have the flexibility to implement the policy at any time on or before 1st April 2023, basis their
budgets and implementation preparedness
'''

In [6]:
%%time
obj.get_question(doc1)

CPU times: user 23.4 s, sys: 162 ms, total: 23.6 s
Wall time: 25.6 s


['What do businesses have to do to implement the Group Car Policy?',
 'What is the new group car policy for Aditya Birla Group India?',
 'When will the Group Vehicle Policy for Aditya Birla Group India become effective?',
 'What is the group vehicle policy for Aditya Birla Group India?',
 'What is the Group Vehicle Policy for Aditya Birla Group India?',
 'What will the revised Group Vehicle Policy for Aditya Birla Group India do?',
 'What is the name of the group vehicle policy?',
 'What policy did the Group Vehicle Policy replace?',
 'What is the flexibility of businesses to implement the Group Vehicle Policy?',
 'What is the Group Vehicle Policy for Aditya birla group india?',
 'What is the new Group Car Policy?',
 'What country will the Group Vehicle Policy for Aditya Birla Group be in effect in 2023?',
 'What year was the Group Car Policy for Aditya Birla Group India last revised?',
 'How many Group Car Policies were there in 2015?',
 'What will the Group Car Policy for Aditya Birl

2. purpose

In [7]:
doc2='''
The Group Vehicle Policy (India) is part of the rewards and recognition pillar of our Employee 
Value Proposition (EVP), “A World of Opportunities”. The Group recognizes that employees have 
different official and personal commute requirements and hence need flexibility while availing the 
Vehicle benefit. This policy is designed to provide flexibility to employees to avail a Vehicle based on 
lifestyle and commute preferences.
'''

In [8]:
%%time
obj.get_question(doc2)

CPU times: user 15.7 s, sys: 132 ms, total: 15.8 s
Wall time: 15.7 s


['What country is part of the Group Vehicle Policy?',
 'What is the Group Vehicle Policy India a pillar of?',
 'What does the group do to employees who need flexibility while availing the Vehicle benefit?',
 'What is the primary reason for employees to avail of the Group Vehicle Policy India?',
 'What benefit does the Group provide employees with?',
 'What does the Group recognize that employees have different official requirements and what else?',
 'What does the Group recognize about employees?',
 'What is the Group Vehicle Policy India?',
 'What is the Group Vehicle Policy India part of?',
 'What is the pillar of the Group Vehicle Policy India?',
 'What is the Group Vehicle Policy part of?',
 'What is the name of the policy that is part of the rewards and recognition pillar of our Employee Value Proposition?']

3. eligibility

In [9]:
doc3='''
All Aditya Birla Group employees at Job Bands 7 & above and employed in India are eligible for the 
Group Vehicle Policy (India). Businesses have the flexibility to offer the policy to JB 8 and JB 9. ABMC 
Directors and Business Heads are governed by ABMCPL /1263 - “Group Executive Car Policy (India)”. 
'''

In [10]:
%%time
obj.get_question(doc3)

CPU times: user 26.2 s, sys: 138 ms, total: 26.4 s
Wall time: 26.4 s


['What is the policy for all Aditya Birla Group employees?',
 'What is the policy offered to JB 8 and JB 9 employees?',
 'Who is governed by ABMCPL 1263 Group Executive Car Policy India?',
 'Are all Aditya Birla Group employees in India eligible for the Group Vehicle Policy?',
 'What is the highest level of the Aditya Birla Group?',
 'What is the name of the policy that governs ABMC Directors and Business Heads?',
 'How are the ABMCPL 1263 Group Executive Car Policy India governed?',
 'Are all Aditya Birla Group employees eligible for the Group Vehicle Policy?',
 'What is the group vehicle policy?',
 'Which two job bands are eligible for the Group Vehicle Policy?',
 'What Job Bands are above?',
 'What is the Group Executive policy?',
 'What are the job bands 7 and above?',
 'What is the Group Executive Car Policy India?',
 'What is the job title of the Aditya Birla Group employees?',
 'What type of vehicle policy is available to all Aditya Birla employees?',
 'Where are all Aditya Birl

4. applicability

In [11]:
doc4='''
The policy will be applicable to all Business Units in India from 1st April 2023. Businesses have the 
flexibility to implement the policy at any-time on or before 1st April 2023, basis their budgets and 
implementation preparedness. Transition Plan is detailed in Annexure A. All acquired units in India 
are required to adopt the policy within one year from the date of acquisition.
'''

In [12]:
%%time
obj.get_question(doc4)

CPU times: user 19.4 s, sys: 170 ms, total: 19.5 s
Wall time: 19.4 s


['What is in Annexure A?',
 'What types of businesses will the policy be applicable to?',
 'When will the policy be applicable to all business units in India?',
 'How long is the policy required to be adopted?',
 'What is the policy applicable to businesses in India in 2023?',
 'Where is the Transition Plan?',
 'When will the policy become effective?',
 'When will the policy be applicable to all Business Units in India?',
 'In what country will the policy be applicable to all Business Units?',
 'What is the flexibility of businesses to do with the policy?',
 'What is the policy applicable to businesses in India from?',
 'What is the requirement for all acquired units in India to adopt the policy within one year from the date of acquisition?']

5. Vehicle On-Road Price 

In [13]:
doc5='''
Vehicle On-Road Price is defined as 
o Vehicle Ex-showroom price + Octroi (if any) + Goods and Service Tax + Vehicle 
Registration charges + Road tax + Purchase value of all accessories. 
o This does not include first year insurance premium. Insurance premium is part of the 
Vehicle Operating expenses
'''

In [14]:
%%time
obj.get_question(doc5)

CPU times: user 16 s, sys: 163 ms, total: 16.1 s
Wall time: 16 s


['What is the value of all accessories in a vehicle?',
 'What is the cost of the vehicle accessories?',
 'What is the vehicle Ex showroom price?',
 'What charges Road tax?',
 'What is the definition of a Vehicle On Road Price?',
 'What is the showroom price of a vehicle?',
 'What does not include the first year insurance premium?',
 'What does the purchase value of all accessories not include?',
 'What is the definition of the Vehicle On Road Price?',
 'What is the charge for the tax on the vehicle?',
 'What is the definition of Vehicle On Road Price?',
 'What is the On Road Price defined as?',
 'What is the only year the insurance premium is included in the vehicle price?',
 'What is part of the Vehicle Operating expenses?',
 'What is not included in the Vehicle On Road Price?']

6. value of vehicle operating purchase

In [15]:
doc6='''• Vehicle Operating Expenses include expenses incurred towards fuel, maintenance, insurance 
premium and Chauffer. 
o Fuel: Expense incurred on Fuel, Vehicle Parking/Society Parking/ Toll charges and 
Vehicle cleaning expenses. Fuel expenses includes Cost of charging electric Vehicles
(Where exclusive bill / invoice for charging is available).
o Maintenance: Expenses incurred on repair and maintenance of the Vehicle; any 
repairs not covered by insurance.
o Chauffer: All expenses incurred such as Chauffer’s salary, annual bonus, conveyance, 
mobile (instrument and monthly rentals) and over time.
o Insurance Premium: Annual insurance premium for a zero-depreciation
comprehensive cover.
• Employees can allocate any amount under Vehicle Operating Expenses within the maximum 
limit as per Table 1 at the time of availing the Vehicle. Amount once chosen cannot be changed 
during the tenure of the Vehicle. 
• Insurance Premium is part of Vehicle Operating Expenses and employees should estimate the 
same for the Vehicle tenure, while deciding the amount to be allocated for Vehicle Operating 
Expenses. 
• Any unutilized amount towards Vehicle operating expenses will be paid as a taxable allowance 
at the end of the financial year. 
• Executive Bands A and B have a choice of Vehicle Operating Expenses as Actuals or on 
Reimbursement basis. The basis has to be chosen at time of Vehicle Purchase and the same 
cannot be changed subsequently during the Vehicle tenure. 
o For Actual Basis, the maximum limit of Vehicle Operating Expenses shall be default 
allocated amount towards Vehicle Operating Expenses and the actual expenses 
incurred during the year shall be reimbursed. In case in any financial year, actual 
expenses are lower than default allocated amount the same shall not be paid.
o For Reimbursement Basis, the employee is required to allocate any amount under 
Vehicle Operating Expenses within the limit as per Table 1 at the time of availing the 
Vehicle. The reimbursement is restricted to the amount allocated and any unutilized 
amount is paid as taxable at end of the financial year.'''

In [16]:
%%time
obj.get_question(doc6)

CPU times: user 1min 22s, sys: 220 ms, total: 1min 23s
Wall time: 1min 23s


['How often is the Insurance Premium paid?',
 'What is part of the Vehicle Operating Expenses?',
 'What includes expenses incurred towards fuel maintenance insurance premium and Chauffer?',
 'What is the amount of actual expenses incurred during the year?',
 'What is the maximum amount of Vehicle Operating Expenses?',
 'What is the maximum amount of insurance premium for a Chauffeur?',
 'What is the annual premium for insurance?',
 'Maintenance Expenses incurred on repair and maintenance of what?',
 'What is the depreciation of the vehicle covered by?',
 'On what basis is the employee required to allocate any amount under Vehicle Operating Expenses within the limit as per Table 1 at the time of availing the Vehicle?',
 'What is the depreciation comprehensive cover of the Insurance Premium?',
 'What is the job title of the employee who is paid a fixed amount of money for their services?',
 'What is the term for expenses incurred on repair and maintenance of the Vehicle?',
 'What is the 

In [17]:
doc7='''
In line with our focus on accelerating Environmental, Social and Governance (ESG) agenda, we encourage 
our employees to choose environment friendly modes of commute. To support the use of environment 
friendly Vehicles by our employees the following provisions will apply.
'''

In [18]:
%%time
obj.get_question(doc7)

CPU times: user 11.3 s, sys: 124 ms, total: 11.4 s
Wall time: 11.4 s


['What is the focus of our Environmental Social and Governance ESG agenda?',
 'What does the ESG agenda encourage employees to do?',
 'What does the focus on environmental social and what else?',
 'What does the company want to support the use of?',
 'What is the focus of our ESG agenda?',
 'What is the focus of the company on?',
 'What do we do to encourage our employees to do?',
 'What does the ESG agenda encourage our employees to do?',
 'What does the focus on accelerating?',
 'What does the following provisions apply to?']

In [19]:
'''
paragraph-A.10
section-360
s/o someone
24/350 house noo
'''

'\nparagraph-A.10\nsection-360\ns/o someone\n24/350 house noo\n'