In [6]:
!python -m spacy download en_core_web_lg

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting en-core-web-lg==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.5.0/en_core_web_lg-3.5.0-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')


In [7]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [8]:
from transformers import pipeline
whisper = pipeline('automatic-speech-recognition', model='openai/whisper-medium', device=0)

In [9]:
text = whisper('call.wav')



In [10]:
text = text['text']


In [11]:
text

" Hello. Hi Nancy. This is Mike from AT&T Inc. Yes. How can I help you? Nancy, you have been using our prepaid connection for a couple of years now, right? Yeah, that's right. How would you like a postpaid connection that allows you to make free unlimited voice calls to three AT&T numbers?"

In [12]:
import spacy
from spacy import displacy
from spacy import tokenizer
nlp = spacy.load('en_core_web_lg')


In [13]:
doc = nlp(text)


In [14]:
ruler = nlp.add_pipe("entity_ruler")

patterns = [{"label": "INTRO", "pattern": "Hi"},{"label": "INTRO", "pattern": "Hello"},{"label": "INTRO", "pattern": "morning"},{"label": "INTRO", "pattern": "afternoon"}]

# patterns = [{"label": "INTRO", "pattern":[{"LOWER":"Hello"},{"LOWER":"Hi"},{"LOWER":"morning"},{"LOWER":"afternoon"}]}]

ruler.add_patterns(patterns)
doc = nlp(text)

for ent in doc.ents:
  print(ent.text, ent.label_)

Hello INTRO
Hi INTRO
Nancy PERSON
Mike PERSON
AT&T Inc. ORG
Nancy PERSON
a couple of years DATE
three CARDINAL
AT&T ORG


In [15]:
sent_data =[]

for sent in doc.sents:
  data ={"sentence": sent}

  entities = []
  labels = [ e.label_ for e in sent.ents]
 
  if 'INTRO' in labels:
      data["intent"] = "intro"
  else:
      data["intent"] = "purpose"

  for ents in sent.ents:
    en ={"entity_name":ents.label_,"entity_value":ents.text}
    entities.append(en)
  
  data["entities"] = entities

  sent_data.append(data)

sent_data
    

[{'sentence':  Hello.,
  'intent': 'intro',
  'entities': [{'entity_name': 'INTRO', 'entity_value': 'Hello'}]},
 {'sentence': Hi Nancy.,
  'intent': 'intro',
  'entities': [{'entity_name': 'INTRO', 'entity_value': 'Hi'},
   {'entity_name': 'PERSON', 'entity_value': 'Nancy'}]},
 {'sentence': This is Mike from AT&T Inc.,
  'intent': 'purpose',
  'entities': [{'entity_name': 'PERSON', 'entity_value': 'Mike'},
   {'entity_name': 'ORG', 'entity_value': 'AT&T Inc.'}]},
 {'sentence': Yes., 'intent': 'purpose', 'entities': []},
 {'sentence': How can I help you?, 'intent': 'purpose', 'entities': []},
 {'sentence': Nancy, you have been using our prepaid connection for a couple of years now, right?,
  'intent': 'purpose',
  'entities': [{'entity_name': 'PERSON', 'entity_value': 'Nancy'},
   {'entity_name': 'DATE', 'entity_value': 'a couple of years'}]},
 {'sentence': Yeah, that's right., 'intent': 'purpose', 'entities': []},
 {'sentence': How would you like a postpaid connection that allows you t

In [16]:
result = {
    "task_1_output" : text,
    "task_3_output" : sent_data
}
result

{'task_1_output': " Hello. Hi Nancy. This is Mike from AT&T Inc. Yes. How can I help you? Nancy, you have been using our prepaid connection for a couple of years now, right? Yeah, that's right. How would you like a postpaid connection that allows you to make free unlimited voice calls to three AT&T numbers?",
 'task_3_output': [{'sentence':  Hello.,
   'intent': 'intro',
   'entities': [{'entity_name': 'INTRO', 'entity_value': 'Hello'}]},
  {'sentence': Hi Nancy.,
   'intent': 'intro',
   'entities': [{'entity_name': 'INTRO', 'entity_value': 'Hi'},
    {'entity_name': 'PERSON', 'entity_value': 'Nancy'}]},
  {'sentence': This is Mike from AT&T Inc.,
   'intent': 'purpose',
   'entities': [{'entity_name': 'PERSON', 'entity_value': 'Mike'},
    {'entity_name': 'ORG', 'entity_value': 'AT&T Inc.'}]},
  {'sentence': Yes., 'intent': 'purpose', 'entities': []},
  {'sentence': How can I help you?, 'intent': 'purpose', 'entities': []},
  {'sentence': Nancy, you have been using our prepaid connec