In [1]:
!pip install spacy
!python -m spacy download en_core_web_lg
!python -m spacy download en_core_web_trf

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:02[0m
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
Collecting en-core-web-trf==3.7.3
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl (457.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m457.4/457.4 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:02[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_trf')


# The implementations of two pipelines with models from SpaCy library
### The two methods used are en_core_web_lg, which is standard SpaCy NER pipeline and en_core_web_trf, which utilises RoBERTa model
### Dates are converted to standard format with dateparser library
### The models can also be found in ner_models.py

In [1]:
import spacy
from scripts import load_test_data, test_n_samples, convert_to_standard_date

# The en_core_web_lg model, optimized for CPU

In [2]:
class SpacyLgModel():
    def __init__(self):
        self.model = spacy.load("en_core_web_lg")
        
    def extract_flight_details(self, user_request):
        sentence = self.model(user_request)
         
        name = 'Unspecified'
        departure = 'Unspecified'
        destination = 'Unspecified'
        date = 'Unspecified'
         
        for entity in sentence.ents:
            if entity.label_ == 'PERSON' and name == 'Unspecified':
                name = entity.text
            elif entity.label_ in ['NORP', 'GPE', 'LOC']:
                if departure == 'Unspecified':
                    if ' from ' in user_request[:user_request.index(entity.text)]:
                        departure = entity.text
                    elif ' to ' in user_request[:user_request.index(entity.text)]:
                        destination = entity.text
                    else: departure = entity.text
                elif destination == 'Unspecified':
                    destination = entity.text 
            elif entity.label_ == "DATE":  # Recognize date
                date = entity.text
                if 'the ' in date:
                    date = date.replace('the ', '')
                if ' of ' in date:
                    date = date.replace(' of ', ' ')
                if 'around' in date:
                    date = date.replace('around', '')
                date = date.strip()
                date = convert_to_standard_date(date)
                    
        return name, departure, destination, date


In [3]:
user_requests, required_data = load_test_data()

In [4]:
model = SpacyLgModel()

In [5]:
test_n_samples(model, user_requests, required_data,)

Name: Olivia Parker, True_Name: Olivia Parker
Departure: Barcelona, True_Departure: Barcelona
Destination: Amsterdam, True_Destination: Amsterdam
Date: 20-05-2024, True_Date: 20-05-2024
---------------------------------------------
Name: Henry Wright, True_Name: Henry Wright
Departure: Seoul, True_Departure: Seoul
Destination: Sydney, True_Destination: Sydney
Date: 08-09-2024, True_Date: 08-09-2024
---------------------------------------------
Name: Lily Johnson, True_Name: Lily Johnson
Departure: Rome, True_Departure: Rome
Destination: Paris, True_Destination: Paris
Date: 12-07-2024, True_Date: 12-07-2024
---------------------------------------------
Name: Lucas Thompson, True_Name: Lucas Thompson
Departure: New Delhi, True_Departure: New Delhi
Destination: Dubai, True_Destination: Dubai
Date: 30-10-2024, True_Date: 05-10-2024
---------------------------------------------
Name: Isabelle Brown, True_Name: Isabelle Brown
Departure: Tokyo, True_Departure: Tokyo
Destination: London, True_

# The en_core_web_trf model

In [6]:
class SpacyTrfModel():
    def __init__(self):
        self.model = spacy.load("en_core_web_trf")
        
    def extract_flight_details(self, user_request):
        sentence = self.model(user_request)
         
        name = 'Unspecified'
        departure = 'Unspecified'
        destination = 'Unspecified'
        date = 'Unspecified'
         
        for entity in sentence.ents:
            if entity.label_ == 'PERSON' and name == 'Unspecified':
                name = entity.text
            elif entity.label_ in ['NORP', 'GPE', 'LOC']:
                if departure == 'Unspecified':
                    if ' from ' in user_request[:user_request.index(entity.text)]:
                        departure = entity.text
                    elif ' to ' in user_request[:user_request.index(entity.text)]:
                        destination = entity.text
                    else: departure = entity.text
                elif destination == 'Unspecified':
                    destination = entity.text 
            elif entity.label_ == "DATE":  # Recognize date
                date = entity.text
                if 'the ' in date:
                    date = date.replace('the ', '')
                if ' of ' in date:
                    date = date.replace(' of ', ' ')
                if 'around' in date:
                    date = date.replace('around', '')
                date = date.strip()
                date = convert_to_standard_date(date)
                    
        return name, departure, destination, date


In [7]:
model = SpacyTrfModel()

In [8]:
test_n_samples(model, user_requests, required_data,)

Name: Olivia Parker, True_Name: Olivia Parker
Departure: Barcelona, True_Departure: Barcelona
Destination: Amsterdam, True_Destination: Amsterdam
Date: 20-05-2024, True_Date: 20-05-2024
---------------------------------------------
Name: Henry Wright, True_Name: Henry Wright
Departure: Seoul, True_Departure: Seoul
Destination: Sydney, True_Destination: Sydney
Date: 08-09-2024, True_Date: 08-09-2024
---------------------------------------------
Name: Lily Johnson, True_Name: Lily Johnson
Departure: Rome, True_Departure: Rome
Destination: Paris, True_Destination: Paris
Date: 12-07-2024, True_Date: 12-07-2024
---------------------------------------------
Name: Lucas Thompson, True_Name: Lucas Thompson
Departure: New Delhi, True_Departure: New Delhi
Destination: Dubai, True_Destination: Dubai
Date: 05-10-2024, True_Date: 05-10-2024
---------------------------------------------
Name: Isabelle Brown, True_Name: Isabelle Brown
Departure: Tokyo, True_Departure: Tokyo
Destination: London, True_