#Importing necessary libraries

In [15]:
import spacy

#Load the pre-trained model

In [16]:
nlp = spacy.load("en_core_web_sm")

#Process a sample text with the model

In [17]:
doc = nlp("Hello! My name is Varnika. I'm currently in my final year of engineering.")

#Tokenization

In [18]:
for token in doc:
  print(token.text)

Hello
!
My
name
is
Varnika
.
I
'm
currently
in
my
final
year
of
engineering
.


#POS tagging

In [19]:
for token in doc:
  print(token.text, token.pos_)

Hello INTJ
! PUNCT
My PRON
name NOUN
is AUX
Varnika PROPN
. PUNCT
I PRON
'm AUX
currently ADV
in ADP
my PRON
final ADJ
year NOUN
of ADP
engineering NOUN
. PUNCT


#Dependency parsing

In [24]:
for token in doc:
    print(f"{token.text}: {token.dep_} -> {token.head.text}")

Hello: ROOT -> Hello
!: punct -> Hello
My: poss -> name
name: nsubj -> is
is: ROOT -> is
Varnika: attr -> is
.: punct -> is
I: nsubj -> 'm
'm: ROOT -> 'm
currently: advmod -> 'm
in: prep -> 'm
my: poss -> year
final: amod -> year
year: pobj -> in
of: prep -> year
engineering: pobj -> of
.: punct -> 'm


#Visually display the dependencies

In [26]:
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Hello! My name is Varnika. I'm currently in my final year of engineering.")
sentence_spans = list(doc.sents)
displacy.serve(sentence_spans, style="dep")


Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


#Named Entity Recognition

In [33]:
doc = nlp("Hello! My name is Milind. I'm currently in my final year of engineering.")
for ent in doc.ents:
  print(ent.text, ent.label_)

Milind PERSON
my final year DATE


#Lemmatization

In [40]:
for token in doc:
  print(token.text, token.lemma_)

Hello hello
! !
My my
name name
is be
Milind Milind
. .
I I
'm be
currently currently
in in
my my
final final
year year
of of
engineering engineering
. .


#Sentence Boundary Detection

In [41]:
for sent in doc.sents:
  print(sent.text)

Hello!
My name is Milind.
I'm currently in my final year of engineering.


#**Applying spaCy to create a sample QnA system.**

In [44]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [63]:
def qna_system(context, questions):
  doc = nlp(context)

  #Process the question
  question_doc = nlp(questions)
  question_keywords=set() # Initialize as an empty set
  for token in question_doc:
    if not token.is_stop and not token.is_punct:
      question_keywords.add(token.lemma_) # Add each lemma to the set

  #Initialize the best suitable answer and its score
  optimal_answer = ""
  best_score = 0

  #Split context into sentences
  sentences = []
  for sent in doc.sents:
    sentences.append(sent.text)

  #Iterate through each sentence in the context and score them based on its keyword
  for sentence in sentences:
    sentence_doc = nlp(sentence)
    sentence_keywords = set()
    for token in sentence_doc:
      if not token.is_stop and not token.is_punct:
        sentence_keywords.add(token.lemma_)
        score = len(question_keywords & sentence_keywords)

    if score > best_score:
        best_score = score
        optimal_answer = sentence

  if optimal_answer:
    return optimal_answer
  else:
    return "No suitable answer found."

In [64]:
# Sample context and questions
context = ("SpaCy is an open-source library for advanced NLP in Python. It is designed specifically for production use. "
           "SpaCy comes with pre-trained models and supports various NLP tasks such as tokenization, named entity recognition, "
           "and part-of-speech tagging.")

questions = [
    "What is SpaCy?",
    "What tasks does SpaCy support?",
    "Is SpaCy open-source?"
]

# Get answers for the questions
for question in questions:
    answer = qna_system(context, question)
    print(f"Question: {question}\nAnswer: {answer}\n")

Question: What is SpaCy?
Answer: SpaCy is an open-source library for advanced NLP in Python.

Question: What tasks does SpaCy support?
Answer: SpaCy comes with pre-trained models and supports various NLP tasks such as tokenization, named entity recognition, and part-of-speech tagging.

Question: Is SpaCy open-source?
Answer: SpaCy is an open-source library for advanced NLP in Python.

