In [10]:
!pip install spacy
!python -m spacy download en_core_web_sm
!pip install joblib
!pip install Flask

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     --------------------------------------- 0.0/12.8 MB 960.0 kB/s eta 0:00:14
      --------------------------------------- 0.2/12.8 MB 2.0 MB/s eta 0:00:07
      --------------------------------------- 0.3/12.8 MB 2.0 MB/s eta 0:00:07
      --------------------------------------- 0.3/12.8 MB 1.9 MB/s eta 0:00:07
      --------------------------------------- 0.3/12.8 MB 1.9 MB/s eta 0:00:07
     - -------------------------------------- 0.5/12.8 MB 1.9 MB/s eta 0:00:07
     - -------------------------------------- 0.6/12.8 MB 1.9 MB/s eta 0:00:07
     -- ------------------------------------- 0.7/12.8 MB 1.9 MB/s eta 0:00:07
     -- ------------------------------------- 0.7/12.8 MB 1.8 MB/s eta 0:00:07
     -- --------------------------------

In [1]:
from flask import Flask, request, jsonify
import spacy
import re

In [None]:
app = Flask(__name__)

class ExtractFromToDo:
    def __init__(self):
        # Load the spaCy model
        self.nlp = spacy.load("en_core_web_sm")

    def extract_locations(self, text):
        # Process the sentence using spaCy
        doc = self.nlp(text)
        # Extract entities
        locations = [ent.text for ent in doc.ents if ent.label_ in ["GPE", "LOC", "FAC"]]
        return locations

    def extract_time(self, text):
        time_pattern = re.compile(r'\b(?:[0-1]?[0-9]|2[0-3])[:.][0-5][0-9](?:\s*(?:AM|PM|am|pm|Pm|Am))?\b')
        matches = re.findall(time_pattern, text)
        return matches

    def extract_dates(self, text):
        date_pattern = re.compile(r'\b(?:\d{1,2}[-./]\d{1,2}[-./]\d{2,4}|\d{4}[-./]\d{1,2}[-./]\d{1,2})\b')
        matches = re.findall(date_pattern, text)
        return matches

    def extract_title(self, text):
        doc = self.nlp(text)
        # Extracting determiners (DET) and nouns (NOUN)
        det_noun_pairs = [(token.text, token.head.text) for token in doc if token.pos_ == "DET" and token.head.pos_ == "NOUN"]
        # Return the extracted pairs as a list of titles
        titles = [f"{det} {noun}" for det, noun in det_noun_pairs]
        return titles

    def extract_info_from_sentence(self, sentence):
        locations = self.extract_locations(sentence)
        times = self.extract_time(sentence)
        dates = self.extract_dates(sentence)
        titles = self.extract_title(sentence)

        max_length = max(len(locations), len(times), len(dates), len(titles))

        event_info_list = []
        for i in range(max_length):
            event_info = {
                "Title": titles[i] if i < len(titles) else "Empty",
                "Venue": locations[i] if i < len(locations) else "Empty",
                "Date": dates[i] if i < len(dates) else "Empty",
                "Time": times[i].strip() if i < len(times) else "Empty"
            }
            event_info_list.append(event_info)

        return event_info_list

    def determine_missing_info(self, event_info_list):
        missing_info_list = []

        # Check if all fields are present in any entry
        all_present = all(info != "Empty" for info in event_info_list[0].values())

        if not all_present:
            # At least one field is missing, determine missing info
            for event_info in event_info_list:
                missing_info = {}
                for key, value in event_info.items():
                    if value == "Empty":
                        # Determine the missing information based on available data
                        if key == "Title":
                            missing_info[key] = "Event Name"  # Placeholder for missing event name
                        elif key == "Venue":
                            missing_info[key] = "Venue Name"  # Placeholder for missing venue name
                        elif key == "Date":
                            missing_info[key] = "Date"  # Placeholder for missing date
                        elif key == "Time":
                            missing_info[key] = "Time"  # Placeholder for missing time
                    else:
                        missing_info[key] = value
                missing_info_list.append(missing_info)

        return missing_info_list

extractor = ExtractFromToDo()

@app.route('/extract-todo-info', methods=['POST'])
def extract_todo_info():
    data = request.get_json()
    todo_text = data.get('todo_text', '')
    
    # Extract information from the todo text
    event_info_list = extractor.extract_info_from_sentence(todo_text)
    #missing_info_list = extractor.determine_missing_info(event_info_list)
    
    # Return the extracted information as JSON response
    return jsonify({'event_info': event_info_list})

if __name__ == '__main__':
    app.run(port=5001)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5001
Press CTRL+C to quit
127.0.0.1 - - [11/Feb/2024 12:11:46] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0.1 - - [11/Feb/2024 12:12:07] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0.1 - - [11/Feb/2024 12:13:58] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0.1 - - [11/Feb/2024 12:15:03] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0.1 - - [11/Feb/2024 12:15:53] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0.1 - - [11/Feb/2024 12:15:53] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0.1 - - [11/Feb/2024 12:16:11] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0.1 - - [11/Feb/2024 12:16:11] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0.1 - - [11/Feb/2024 12:16:29] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0.1 - - [11/Feb/2024 12:16:46] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0.1 - - [11/Feb/2024 12:17:02] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0.1 - - [11/Feb/2024 12:17:28] "POST /extract-todo-info HTTP/1.1" 200 -
127.0.0