# The regex-based baseline for NER task
### Model contains RegEx rules to find the important data
### The model can also be found in ner_models.py

In [1]:
import re
from scripts import load_test_data, test_n_samples, convert_to_standard_date
# Baseline regex-based method 
class RegexModel():
    def __init__(self):
        # Regular expressions to extract information
        self.pattern_name = r'[A-Z][a-z]+\s[A-Z][a-z]+' # gets first name and last name
        self.pattern_departure = r'from\s([A-Z][a-z]+)' # gets capitalized noun after word 'from'
        self.pattern_destination = r'to\s([A-Z][a-z]+)' # gets capitalized noun after word 'to' 
        self.pattern_date = r'\b(?:\d{1,2}(?:st|nd|rd|th)?\s)?(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s\d{4}\b'

    def extract_flight_details(self, user_request):
        # Initializing variables
        name = 'Unspecified'
        departure = 'Unspecified'
        destination = 'Unspecified'
        date = 'Unspecified'
    
        # Extracting information
        match_name = re.search(self.pattern_name, user_request)
        if match_name:
            name = match_name.group()
    
        match_departure = re.search(self.pattern_departure, user_request)
        if match_departure:
            departure = match_departure.group(1)
    
        match_destination = re.search(self.pattern_destination, user_request)
        if match_destination:
            destination = match_destination.group(1)
    
        match_date = re.search(self.pattern_date, user_request)
        if match_date:
            date = match_date.group()
            date = convert_to_standard_date(date)
        
        return name, departure, destination, date


In [2]:
user_requests, required_data = load_test_data()

In [3]:
model = RegexModel()

In [4]:
test_n_samples(model, user_requests, required_data, 5)

Name: Olivia Parker, True_Name: Olivia Parker
Departure: Barcelona, True_Departure: Barcelona
Destination: Amsterdam, True_Destination: Amsterdam
Date: 20-05-2024, True_Date: 20-05-2024
---------------------------------------------
Name: Henry Wright, True_Name: Henry Wright
Departure: Seoul, True_Departure: Seoul
Destination: Sydney, True_Destination: Sydney
Date: 08-09-2024, True_Date: 08-09-2024
---------------------------------------------
Name: Lily Johnson, True_Name: Lily Johnson
Departure: Rome, True_Departure: Rome
Destination: Paris, True_Destination: Paris
Date: 12-07-2024, True_Date: 12-07-2024
---------------------------------------------
Name: Lucas Thompson, True_Name: Lucas Thompson
Departure: New, True_Departure: New Delhi
Destination: Dubai, True_Destination: Dubai
Date: 05-10-2024, True_Date: 05-10-2024
---------------------------------------------
Name: Isabelle Brown, True_Name: Isabelle Brown
Departure: Tokyo, True_Departure: Tokyo
Destination: London, True_Destin