In [None]:
!pip install spacy requests
!python -m spacy download en_core_web_sm

In [None]:
# Import the necessary libraries
import spacy
import requests

In [None]:
# Load the pre-trained English model
nlp = spacy.load('en_core_web_sm')

In [None]:
def download_text(url):
    """
    Download text from the given URL.

    Parameters:
    url (str): The URL to download the text from.

    Returns:
    str: The downloaded text.
    """
    response = requests.get(url)
    response.raise_for_status()  # Ensure we notice bad responses
    return response.text

In [None]:
def extract_entities(text):
    """
    Extract entities from the given text using spaCy.

    Parameters:
    text (str): The text from which to extract entities.

    Returns:
    list: A list of tuples where each tuple contains the entity text and its label.
    """
    # Process the text with the spaCy model
    doc = nlp(text)

    # Extract entities
    entities = [(entity.text, entity.label_) for entity in doc.ents]

    return entities

In [None]:
# Example usage
if __name__ == "__main__":
    url = "http://www.gutenberg.org/files/1342/1342-0.txt"  # Example URL for "Pride and Prejudice"
    text = download_text(url)
    entities = extract_entities(text[:10000])  # Process only the first 10,000 characters for demo purposes
    print("Entities found:", entities)