In [40]:
import spacy
import re

def extract_information(text):
    # There are various spaCy models for different languages. The default model for the English language is designated as en_core_web_sm
    nlp = spacy.load("en_core_web_sm") #The load() function returns a Language callable object, which is commonly assigned to a variable called nlp.
    
    doc = nlp(text) # To start processing your input, you construct a Doc object. A Doc object is a sequence of Token objects representing a lexical token

    # to fetch email using regex
    email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b'

    # create empty lists to store the named entities
    names = []
    entities = []
    dates = []
    countries = []
    # emails = []

    for ent in doc.ents: # spaCy has the property .ents on Doc objects. You can use it to extract named entities:
        if ent.label_ == "PERSON":
            names.append(ent.text)
        elif ent.label_ == "ORG":
            entities.append(ent.text)
        elif ent.label_ == "GPE":
            countries.append(ent.text)
        elif ent.label_ == "DATE":
            dates.append(ent.text)
        
        # elif ent.label_ == "EMAIL":
        #     emails.append(ent.text)

    emails = re.findall(email_pattern, text)


    return {
        "names": names,
        "dates": dates,
        "countries": countries,
        "emails": emails
    }


In [41]:
# Example usage
text = "Vinay Shankar is a Data Scientist from India working at TroveHealth Inc. He was born on 05/09/1989. You can contact him at manvins567@gmail.com and vinayxyz@yahoo.com."
information = extract_information(text)
print(information)

{'names': ['Vinay Shankar'], 'dates': ['05/09/1989'], 'countries': ['India'], 'emails': ['manvins567@gmail.com', 'vinayxyz@yahoo.com']}


In [42]:
text2 = "John Doe is a dedicated and motivated professional with a Bachelor of Science in Computer Science from XYZ University. \
    With a strong proficiency in programming languages such as Python, Java, and C++, \
    John has developed expertise in web development using HTML, CSS, and JavaScript. \
    His skills also include database management with MySQL and PostgreSQL. \
    John has a proven track record as a Software Engineer at ABC Tech Solutions, \
    where he was responsible for developing and maintaining software applications using Python and the Django framework. \
    He collaborated effectively with cross-functional teams to deliver high-quality software products, conducted code reviews, \
    and implemented best practices for software development. John also demonstrated his problem-solving skills by troubleshooting and resolving software defects and performance issues.\
During his internship at XYZ Corporation, John gained valuable experience assisting in the development and testing of software modules. He conducted research and analysis to support project requirements and actively participated in team meetings, providing valuable input on software design and functionality.\
John has showcased his abilities through various projects, including the development of a personal blog website with user authentication and blog post creation features using HTML, CSS, and JavaScript. He also designed and implemented a web-based inventory management system using Python and Django, which included features for tracking stock levels, generating reports, and managing orders.\
With excellent communication skills and a collaborative mindset, John is adept at working in team environments and contributing to the success of organizations. He is now seeking a challenging position in the field of [industry/field] where he can utilize his skills and knowledge to drive innovation and make a significant impact. Contact John at johndoe@example.com or via phone at +1 123-456-7890 for further inquiries."

info2 = extract_information(text2)

print(info2)

{'names': ['John Doe', 'Java', 'Django', 'John', 'John', 'John', 'Django', 'John'], 'dates': [], 'countries': ['PostgreSQL'], 'emails': ['johndoe@example.com']}
