In [1]:
import pdfplumber
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import random
import spacy
import language_tool_python
import csv

# Preprocessing the Given PDF

In [3]:
import pdfplumber
import re
import csv

def preprocess_text(text):
    text = text.replace('\n', ' ')
    text = re.sub(r'[^a-zA-Z0-9\s.]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    text = text.lower()
    return text

def extract_and_preprocess_text(file_path):
    preprocessed_texts = [] 
    with pdfplumber.open(file_path) as pdf:
        num_pages = len(pdf.pages)

        for page_num in range(num_pages):
            page = pdf.pages[page_num]
            text = page.extract_text()
            preprocessed_text = preprocess_text(text)
            preprocessed_texts.append(preprocessed_text)

            print(f"Page {page_num + 1}: {preprocessed_text}\n")

    return preprocessed_texts 

def save_to_csv(file_path, preprocessed_texts):
    with open(file_path, 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['Page', 'Preprocessed Text'])  # Header

        for i, text in enumerate(preprocessed_texts, start=1):
            csv_writer.writerow([i, text])

pdf_file_path = 'C:/Users/sriyo/Desktop/jets/NLP Bot/files/NCERT Biology.pdf'
csv_file_path = 'output.csv'

preprocessed_texts = extract_and_preprocess_text(pdf_file_path)
save_to_csv(csv_file_path, preprocessed_texts)


Page 1: chapter 2 human reproduction 2.1 the male reproductive system as you are aware humans are sexually reproducing and 2.2 the female reproductive viviparous. the reproductive events in humans include system formation of gametes gametogenesis i.e. sperms in males 2.3 gametogenesis and ovum in females transfer of sperms into the female 2.4 menstrual cycle genital tract insemination and fusion of male and female gametes fertilisation leading to formation of zygote. this 2.5 fertilisation and is followed by formation and development of blastocyst implantation and its attachment to the uterine wall implantation 2.6 pregnancy and embryonic embryonic development gestation and delivery of the development baby parturition. you have learnt that these reproductive events occur after puberty. there are remarkable 2.7 parturition and lactation differences between the reproductive events in the male and in the female for example sperm formation continues even in old men but formation of ovum ce

# Question Generation

In [50]:

def generate_questions_for_sentences(sentences, model, tokenizer, page_number, top_k=50, top_p=0.95):
    generated_questions = set()

    num_sentences = len(sentences)

    for i in range(num_sentences - 1):

        selected_sentences = sentences[i:i+10]
        selected_text = ' '.join(selected_sentences)

        input_text_with_prefix = f"Generate a question for the following text (Page {page_number}): {selected_text}"
        inputs = tokenizer(input_text_with_prefix, return_tensors="pt")

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=50,
                top_k=top_k,
                top_p=top_p,
                temperature=1.0,
            )

        generated_question = tokenizer.decode(outputs[0], skip_special_tokens=True)
        split_questions = [question.strip() + '?' for question in re.split(r'[.!?]', generated_question)]
        generated_questions.update(split_questions)

    return [{'page': page_number, 'question': question} for question in generated_questions]
def save_questions_to_file(questions, output_file_path='generated_questions.txt'):
    with open(output_file_path, 'w', encoding='utf-8') as file:
        for question_entry in questions:
            question = question_entry['question']
            file.write(question + '\n')


tokenizer = AutoTokenizer.from_pretrained("wiselinjayajos/t5-end2end-questions-generation")
model = AutoModelForSeq2SeqLM.from_pretrained("wiselinjayajos/t5-end2end-questions-generation")



all_generated_questions = []

for i, input_paragraph in enumerate(preprocessed_texts, start=1):
    sentences = re.split(r'[.!?]', input_paragraph)
    generated_questions = generate_questions_for_sentences(sentences, model, tokenizer, page_number=i, top_k=50, top_p=0.95)
    all_generated_questions.extend(generated_questions)


save_questions_to_file(all_generated_questions, output_file_path='generated_questions.txt')


print("Generated Questions:")
for question in all_generated_questions:
    print(question)

print("Generated questions have been saved to 'generated_questions.txt' and printed.")


def save_questions_to_file_csv_with_page(questions, output_file_path='generated_questions_with_page.csv'):
    with open(output_file_path, 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['Page', 'Generated Questions'])  # Header

        for entry in questions:
            csv_writer.writerow([entry['page'], entry['question']])

save_questions_to_file_csv_with_page(all_generated_questions, output_file_path='generated_questions_with_page.csv')



Generated Questions:
{'page': 1, 'question': 'What is the reproductive event in humans?'}
{'page': 1, 'question': 'What is the rationalised 202324?'}
{'page': 1, 'question': 'Where is the female reproductive system located?'}
{'page': 1, 'question': '?'}
{'page': 1, 'question': 'What is the name of the genital tract insemination?'}
{'page': 1, 'question': 'Where is the male reproductive system located?'}
{'page': 1, 'question': 'What does sperm formation continue even in old men?'}
{'page': 1, 'question': 'What is the name?'}
{'page': 1, 'question': 'What is the male reproductive viviparous?'}
{'page': 1, 'question': 'What is the external genitalia rationalised 202324?'}
{'page': 1, 'question': 'What is the female reproductive viviparous?'}
{'page': 1, 'question': 'What is the male reproductive system located in?'}
{'page': 1, 'question': 'What is the name of the genital tract insemination and fusion of male and female gametes fertilisation leading to formation of zygote?'}
{'page': 1,

In [2]:
with open("generated_questions_with_page.csv", 'r', encoding='utf-8') as file:
        generated_questions = file.readlines()

In [3]:
questions_only = [line.split(',')[1].strip() for line in generated_questions[1:]]


In [4]:
len(questions_only)

581

## To read preprocessed text

In [5]:
file_path = "preprocessed_paragraphs.txt"
preprocessed_texts= []
with open(file_path, 'r', encoding='utf-8') as file:
    for line in file:
        preprocessed_texts.append(line.strip())

# Keyword Extraction

### Models using


#### 1. H1 extractor (code runs faster, ok ok keywords)---3-4mins
#### 2. tech-keywords-extractor ( Code works good if given smaller chunks, execution takes time)---20mins
#### 3. BART model by FB ( Should finetune with our data and can be used for question gen aswell )

In [6]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import csv

def extract_keywords_batch(texts, max_length=100):
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=max_length)
    predicted_keywords = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return predicted_keywords

all_keywords = []

tokenizer = AutoTokenizer.from_pretrained("ilsilfverskiold/tech-keywords-extractor")
model = AutoModelForSeq2SeqLM.from_pretrained("ilsilfverskiold/tech-keywords-extractor")

batch_size = 10


csv_file = "extracted_keywords.csv"

with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Original Sentence', 'Extracted Keywords'])

    for sentence in preprocessed_texts:
        sentence_parts = [sentence[i:i + max(1, int(len(sentence) / batch_size))] for i in range(0, len(sentence),
                                                                                        max(1, int(len(sentence) / batch_size)))]
        for part in sentence_parts:
            keywords = extract_keywords_batch(part, max_length=512)
            all_keywords.append(keywords.split())
            print("Extracted Keywords:", keywords)
            writer.writerow([sentence, keywords])

print("All Extracted Keywords:", all_keywords)
print(f"Keywords saved to {csv_file}")


From c:\Users\sriyo\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\src\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.



Extracted Keywords: Human Reproduction, Reproductive System
Extracted Keywords: Human Reproductive Events, System Formation, Gametes, Sperms, Males
Extracted Keywords: gametogenesis, ovum, sperms, genital tract insemination
Extracted Keywords: le and female gametes fertilisation, zygote
Extracted Keywords: Blastocyst Implantation, Uterine Wall, Embryo Development
Extracted Keywords: Development Baby Parturition, Puberty
Extracted Keywords: Parturition, Lactation Differences, Reproductive Events
Extracted Keywords: Sperm Formation, Ovum
Extracted Keywords: Male Reproductive Systems, Human Reproductive System
Extracted Keywords: elvis region, testes, accessory ducts, external genitalia
Extracted Keywords: 324
Extracted Keywords: Human Reproduction, Testes, Scrotum
Extracted Keywords: Testis, Spermatogenesis, Body Temperature
Extracted Keywords: Testicular lobules, Male Pelvis, Reproductive System
Extracted Keywords: Spermatogonia, Sertoli Cells, Sperms
Extracted Keywords: Meiotic Divisio

In [7]:
all_keywords_hi=[]
for i in all_keywords:
    for j in i:
        all_keywords_hi.append(j)

In [8]:
all_keywords

[['Human', 'Reproduction,', 'Reproductive', 'System'],
 ['Human',
  'Reproductive',
  'Events,',
  'System',
  'Formation,',
  'Gametes,',
  'Sperms,',
  'Males'],
 ['gametogenesis,', 'ovum,', 'sperms,', 'genital', 'tract', 'insemination'],
 ['le', 'and', 'female', 'gametes', 'fertilisation,', 'zygote'],
 ['Blastocyst', 'Implantation,', 'Uterine', 'Wall,', 'Embryo', 'Development'],
 ['Development', 'Baby', 'Parturition,', 'Puberty'],
 ['Parturition,', 'Lactation', 'Differences,', 'Reproductive', 'Events'],
 ['Sperm', 'Formation,', 'Ovum'],
 ['Male', 'Reproductive', 'Systems,', 'Human', 'Reproductive', 'System'],
 ['elvis',
  'region,',
  'testes,',
  'accessory',
  'ducts,',
  'external',
  'genitalia'],
 ['324'],
 ['Human', 'Reproduction,', 'Testes,', 'Scrotum'],
 ['Testis,', 'Spermatogenesis,', 'Body', 'Temperature'],
 ['Testicular', 'lobules,', 'Male', 'Pelvis,', 'Reproductive', 'System'],
 ['Spermatogonia,', 'Sertoli', 'Cells,', 'Sperms'],
 ['Meiotic', 'Divisions,', 'Sertoli', 'Cel

In [9]:
len(all_keywords_hi)

738

In [10]:
import spacy

nlp = spacy.load("en_core_web_sm")

def filter_keywords_spacy(keywords):
    doc = nlp(" ".join(keywords))

    filtered_keywords = []
    removed_keywords = []

    for token in doc:
        if (
            not token.is_stop
            and not token.is_punct
            and not any(char.isdigit() for char in token.text)
            and token.text.lower() != 'figure'
            and token.text.lower() != 'diagram'
            and len(token.text) >= 3  
        ):
            filtered_keywords.append(token.text)
        else:
            removed_keywords.append(token.text)

    return filtered_keywords, removed_keywords

filtered_keywords_spacy, removed_keywords_spacy = filter_keywords_spacy(all_keywords_hi)

print("Filtered Keywords (spaCy):", filtered_keywords_spacy)
print("Removed Keywords (spaCy):", removed_keywords_spacy)


Filtered Keywords (spaCy): ['Human', 'Reproduction', 'Reproductive', 'System', 'Human', 'Reproductive', 'Events', 'System', 'Formation', 'Gametes', 'Sperms', 'Males', 'gametogenesis', 'ovum', 'sperms', 'genital', 'tract', 'insemination', 'female', 'gametes', 'fertilisation', 'zygote', 'Blastocyst', 'Implantation', 'Uterine', 'Wall', 'Embryo', 'Development', 'Development', 'Baby', 'Parturition', 'Puberty', 'Parturition', 'Lactation', 'Differences', 'Reproductive', 'Events', 'Sperm', 'Formation', 'Ovum', 'Male', 'Reproductive', 'Systems', 'Human', 'Reproductive', 'System', 'elvis', 'region', 'testes', 'accessory', 'ducts', 'external', 'genitalia', 'Human', 'Reproduction', 'Testes', 'Scrotum', 'Testis', 'Spermatogenesis', 'Body', 'Temperature', 'Testicular', 'lobules', 'Male', 'Pelvis', 'Reproductive', 'System', 'Spermatogonia', 'Sertoli', 'Cells', 'Sperms', 'Meiotic', 'Divisions', 'Sertoli', 'Cells', 'Sperm', 'Formation', 'Ssels', 'Interstitial', 'Cells', 'Leydig', 'Cells', 'Testicular',

In [11]:
filtered_keywords_spaci=[]
for i in filtered_keywords_spacy:
    if i.lower() not in filtered_keywords_spaci:
        filtered_keywords_spaci.append(i.lower())

In [12]:
len(filtered_keywords_spaci)

303

In [13]:
filtered_keywords_spaci

['human',
 'reproduction',
 'reproductive',
 'system',
 'events',
 'formation',
 'gametes',
 'sperms',
 'males',
 'gametogenesis',
 'ovum',
 'genital',
 'tract',
 'insemination',
 'female',
 'fertilisation',
 'zygote',
 'blastocyst',
 'implantation',
 'uterine',
 'wall',
 'embryo',
 'development',
 'baby',
 'parturition',
 'puberty',
 'lactation',
 'differences',
 'sperm',
 'male',
 'systems',
 'elvis',
 'region',
 'testes',
 'accessory',
 'ducts',
 'external',
 'genitalia',
 'scrotum',
 'testis',
 'spermatogenesis',
 'body',
 'temperature',
 'testicular',
 'lobules',
 'pelvis',
 'spermatogonia',
 'sertoli',
 'cells',
 'meiotic',
 'divisions',
 'ssels',
 'interstitial',
 'leydig',
 'hormones',
 'androgens',
 'sex',
 'vas',
 'deferens',
 'seminiferous',
 'immunologically',
 'competent',
 'vasa',
 'efferentia',
 'epididymis',
 'seminal',
 'vesicle',
 'urethra',
 'ejaculatory',
 'duct',
 'penis',
 'biology',
 'tubule',
 'glans',
 'glands',
 'vesicles',
 'prostate',
 'bulbourethral',
 'pla

# Filtration using Keywords

In [14]:
import csv

def save_to_csv(file_path, data):
    with open(file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Question'])

        for item in data:
            cleaned_question = item
            writer.writerow([cleaned_question])


def filter_questions_by_context(questions, context_keywords):
    filtered_questions = []
    keyword_out = []
    for question in questions:
        if any(keyword in question.lower() for keyword in context_keywords):
            filtered_questions.append(question)
        else:
            keyword_out.append(question)
    return filtered_questions, keyword_out

filtered_questions, keyword_out = filter_questions_by_context(generated_questions, filtered_keywords_spaci)

# Save filtered questions to CSV
save_to_csv("filtered_questions.csv", filtered_questions)

# Save questions filtered out to CSV
save_to_csv("questions_filtered_out.csv", keyword_out)

print("Filtered Questions:")
for question in filtered_questions:
    print(question)

print(f"\nFiltered questions have been saved to 'filtered_questions.csv'.")
print(f"Questions filtered out have been saved to 'questions_filtered_out.csv'.")


Filtered Questions:
1,What is the reproductive event in humans?

1,What is the rationalised 202324?

1,Where is the female reproductive system located?

1,What is the name of the genital tract insemination?

1,Where is the male reproductive system located?

1,What does sperm formation continue even in old men?

1,What is the male reproductive viviparous?

1,What is the external genitalia rationalised 202324?

1,What is the female reproductive viviparous?

1,What is the male reproductive system located in?

1,What is the name of the genital tract insemination and fusion of male and female gametes fertilisation leading to formation of zygote?

1,Where is the male reproductive?

1,What is the male reproductive system?

1,What is the female reproductive system located in?

1,What does ovum cease to do in women around fifty years?

1,What is the name of the sperms in males?

1,What are the differences between the reproductive events in the male and female?

1,What is the name of the sperms 

In [39]:
import pandas as pd
pd.read_csv('filtered_questions.csv')

Unnamed: 0,Question
0,"1,What is the reproductive event in humans?\n"
1,"1,What is the rationalised 202324?\n"
2,"1,Where is the female reproductive system loca..."
3,"1,What is the name of the genital tract insemi..."
4,"1,Where is the male reproductive system locate..."
...,...
485,"15,What hormones are involved in induction of ..."
486,"15,What is the process of release of ovum from..."
487,"15,Where do truefalse d leydig cells synthesis..."
488,"15,What is not a reliable indicator of virgini..."


In [37]:
keyword_out

['Page,Generated Questions\n',
 '1,?\n',
 '1,What is the name?\n',
 '1,What?\n',
 '2,What are the two types?\n',
 '2,?\n',
 '2,What are?\n',
 '2,What?\n',
 '2,What is?\n',
 '2,How many compartments are in?\n',
 '2,How many highly coiled seminifer?\n',
 '3,What is the secretions of the bulboureth?\n',
 '3,?\n',
 '3,What is the length of each?\n',
 '3,What is the length of?\n',
 '3,What is the secretions of?\n',
 '3,What?\n',
 '3,What is?\n',
 '4,?\n',
 '4,What is the?\n',
 '4,What?\n',
 '5,?\n',
 '5,What is the outer thin membranous peri?\n',
 '5,What?\n',
 '5,What is the question for the following text?\n',
 '5,What is the?\n',
 '5,What can also be broken by a sudden fall or jolt?\n',
 '6,What are mamm?\n',
 '6,?\n',
 '6,What is the name?\n',
 '6,What is the process called that?\n',
 '6,What?\n',
 '6,What is the?\n',
 '6,What is the process of sper?\n',
 '7,?\n',
 '7,How many sper?\n',
 '7,What are the?\n',
 '7,What is the?\n',
 '7,What is the structure of a sper?\n',
 '7,How many?\n',

# Grammar Checking

In [40]:
import torch
import csv
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("grammarly/coedit-large")
model = AutoModelForSeq2SeqLM.from_pretrained("grammarly/coedit-large")

def correct_questions(questions):
    corrected_questions = []

    for question in questions:
        input_ids = tokenizer.encode(question, return_tensors="pt")
        with torch.no_grad():
            corrected_output = model.generate(input_ids)
        corrected_question = tokenizer.decode(corrected_output[0], skip_special_tokens=True)

        corrected_questions.append(corrected_question)

    return corrected_questions


corrected_questions = correct_questions(filtered_questions)

csv_file_path = "corrected_questions.csv"

with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    writer.writerow(['Original Question', 'Corrected Question'])

    for original, corrected in zip(filtered_questions, corrected_questions):
        writer.writerow([original, corrected])

print(f"Results saved to {csv_file_path}")




Results saved to corrected_questions.csv


In [21]:
def save_questions_to_file(questions, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        for question in questions:
            file.write(question + '\n')

output_file_path = 'grammar_corrected.txt'

save_questions_to_file(, output_file_path)


In [41]:
pd.read_csv('corrected_questions.csv')

Unnamed: 0,Original Question,Corrected Question
0,"1,What is the reproductive event in humans?\n","1, What is the reproductive event in humans?"
1,"1,What is the rationalised 202324?\n","1,What is the rationalized 202324?"
2,"1,Where is the female reproductive system loca...","1, Where is the female reproductive system loc..."
3,"1,What is the name of the genital tract insemi...","1,What is the name of the genital tract insemi..."
4,"1,Where is the male reproductive system locate...","1,Where is the male reproductive system located?"
...,...,...
485,"15,What hormones are involved in induction of ...","15,What hormones are involved in the formation..."
486,"15,What is the process of release of ovum from...",15 What is the process of release of ovum from...
487,"15,Where do truefalse d leydig cells synthesis...","15,Where do truefalse d leydig cells synthesiz..."
488,"15,What is not a reliable indicator of virgini...","15,What is not a reliable indicator of virgini..."


In [32]:
def save_questions_to_file(questions, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        for question in questions:
            file.write(question + '\n')

output_file_path = 'keyword_filtered.txt'

save_questions_to_file(keyword_out, output_file_path)

In [21]:
filtered_questions

['What is the male reproductive system located in?',
 'What is the name of the text that includes a pair of testes along with accessory ducts glands?',
 'Where is the female reproductive system located?',
 'What is the female reproductive system located in?',
 'What is the male reproductive viviparous?',
 'What is the name of the sperms in males?',
 'What is the female reproductive system?',
 'What is the name of the sperms in females?',
 'Where is the male reproductive?',
 'What is the name of the genital tract insemination?',
 'Where is the male reproductive system located?',
 'What is the term for transfer of sperms into the female?',
 'What is the male reproductive system?',
 'What is the female reproductive viviparous?',
 'What does ovum cease to do in women around fifty years?',
 'What is the reproductive event in humans?',
 'What are the differences between the reproductive events in the male and female?',
 'What is the external opening called?',
 'What is the male sex accessory

# saved them 

In [18]:
def save_questions_to_file(questions, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        for question in questions:
            file.write(question)


output_file_path = 'output_questions.txt'

save_questions_to_file(filtered_questions, output_file_path)

# For assigning page numbers

In [69]:
matched_strings = []
for filtered_question in filtered_questions:
    for entry in generated_questions:
        if filtered_question.lower() in entry.lower():
            matched_strings.append(entry)

print(matched_strings)

['1,What is the reproductive event in humans?\n', '1,What is the male reproductive system located in?\n', '1,Where is the male reproductive?\n', '1,What is the name of the sperms in males?\n', '1,What is the name of the text that includes a pair of testes along with accessory ducts glands?\n', '1,Where is the female reproductive system located?\n', '1,What are the differences between the reproductive events in the male and female?\n', '1,What is the female reproductive system?\n', '1,What is the term for transfer of sperms into the female?\n', '1,Where is the male reproductive system located?\n', '1,What is the female reproductive viviparous?\n', '1,What is the female reproductive system located in?\n', '1,What is the male reproductive system?\n', '1,What is the name of the sperms in females?\n', '1,What is the name of the genital tract insemination?\n', '1,What is the male reproductive viviparous?\n', '1,What does ovum cease to do in women around fifty years?\n', '2,What are interstit

In [76]:

# New lists to store page numbers and questions
page_numbers = []
questions = []

# Check if each element in filtered_questions is present in generated_questions
for filtered_question in filtered_questions:
    for entry in generated_questions:
        if filtered_question.lower() in entry.lower():
            # Split the entry to extract page number and question
            parts = entry.split(',')
            page_numbers.append(parts[0])
            questions.append(parts[1].strip())

# Save page_numbers and questions to a CSV file
output_csv_path = 'matched_strings_output.csv'
with open(output_csv_path, 'w', newline='', encoding='utf-8') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(['Page', 'Question'])  # Header

    # Write the data to the CSV file
    for page, question in zip(page_numbers, questions):
        csv_writer.writerow([page, question])

print(f"Page numbers and questions have been saved to '{output_csv_path}'.")


Page numbers and questions have been saved to 'matched_strings_output.csv'.


# Answer Generation

In [35]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("susindhar/aiproject-bert-qa")
model = AutoModelForQuestionAnswering.from_pretrained("susindhar/aiproject-bert-qa", from_tf=True)

# Your question
question = "What is the term for transfer of sperms into the female?"

# Your context
context = "chapter 2 human reproduction 2.1 the male reproductive system as you are aware humans are sexually reproducing and 2.2 the female reproductive viviparous. the reproductive events in humans include system formation of gametes gametogenesis i.e. sperms in males 2.3 gametogenesis and ovum in females transfer of sperms into the female 2.4 menstrual cycle genital tract insemination and fusion of male and female gametes fertilisation leading to formation of zygote. this 2.5 fertilisation and is followed by formation and development of blastocyst implantation and its attachment to the uterine wall implantation 2.6 pregnancy and embryonic embryonic development gestation and delivery of the development baby parturition. you have learnt that these reproductive events occur after puberty. there are remarkable 2.7 parturition and lactation differences between the reproductive events in the male and in the female for example sperm formation continues even in old men but formation of ovum ceases in women around the age of fifty years. let us examine the male and female reproductive systems in human. 2.1 t m r s he ale eproductive ystem the male reproductive system is located in the pelvis region figure 2.1a. it includes a pair of testes alongwith accessory ducts glands and the external genitalia. rationalised 202324"

# Tokenize input
inputs = tokenizer(question, context, return_tensors="pt")

# Get the predicted answer
outputs = model(**inputs)

# Extract the start and end scores from the model outputs
answer_start_scores = outputs.start_logits
answer_end_scores = outputs.end_logits

# Convert to PyTorch tensors
answer_start_scores = torch.tensor(answer_start_scores)
answer_end_scores = torch.tensor(answer_end_scores)

# Get the indices of the maximum values
answer_start = torch.argmax(answer_start_scores)
answer_end = torch.argmax(answer_end_scores) + 1

answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))

print("Answer:", answer)


All TF 2.0 model weights were used when initializing DistilBertForQuestionAnswering.

All the weights of DistilBertForQuestionAnswering were initialized from the TF 2.0 model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use DistilBertForQuestionAnswering for predictions without further training.


Answer: ovum


  answer_start_scores = torch.tensor(answer_start_scores)
  answer_end_scores = torch.tensor(answer_end_scores)


In [36]:
answer

'ovum'