In [1]:
import pandas as pd
from datetime import datetime

# Create some sample data
data = {
    'docId': list(range(1, 21)),  # Document IDs from 1 to 20
    'analyst_name': [
        'Alice Johnson', 'Bob Smith', 'Cathy Lee', 'David Brown',
        'Alice Johnson', 'Bob Smith', 'Cathy Lee', 'David Brown',
        'Alice Johnson', 'Bob Smith', 'Cathy Lee', 'David Brown',
        'Alice Johnson', 'Bob Smith', 'Cathy Lee', 'David Brown',
        'Alice Johnson', 'Bob Smith', 'Cathy Lee', 'David Brown'
    ],
    'summary': [
        'Market trends analysis for Q1 2024', 'Quarterly earnings report review',
        'Competitor analysis and positioning', 'Investment strategy for emerging markets',
        'Update on currency market trends', 'Annual profit and loss summary',
        'New market entry research', 'Sustainability and corporate responsibility report',
        'Bi-weekly stock market overview', 'Tech industry growth report',
        'Healthcare sector trends', 'Real estate market annual review',
        'Consumer behavior analysis', 'Automotive industry forecast',
        'Energy sector market dynamics', 'Global economic impacts of trade policy',
        'Retail industry seasonal review', 'Construction market quarterly update',
        'Agriculture industry insights', 'Travel and tourism market trends'
    ],
    'date': [
        datetime(2024, 1, 15), datetime(2024, 1, 20),
        datetime(2024, 1, 25), datetime(2024, 2, 1),
        datetime(2024, 2, 5), datetime(2024, 2, 10),
        datetime(2024, 2, 15), datetime(2024, 2, 20),
        datetime(2024, 2, 25), datetime(2024, 3, 1),
        datetime(2024, 3, 5), datetime(2024, 3, 10),
        datetime(2024, 3, 15), datetime(2024, 3, 20),
        datetime(2024, 3, 25), datetime(2024, 4, 1),
        datetime(2024, 4, 5), datetime(2024, 4, 10),
        datetime(2024, 4, 15), datetime(2024, 4, 20)
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df.head()

Unnamed: 0,docId,analyst_name,summary,date
0,1,Alice Johnson,Market trends analysis for Q1 2024,2024-01-15
1,2,Bob Smith,Quarterly earnings report review,2024-01-20
2,3,Cathy Lee,Competitor analysis and positioning,2024-01-25
3,4,David Brown,Investment strategy for emerging markets,2024-02-01
4,5,Alice Johnson,Update on currency market trends,2024-02-05


In [2]:
import spacy

# Load the English NLP model
nlp = spacy.load('en_core_web_sm')

# Sample text
text = "Apple is looking at buying U.K. startup for $1 billion"

# Process the text
doc = nlp(text)

# Extract entities
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)


Apple 0 5 ORG
U.K. 27 31 GPE
$1 billion 44 54 MONEY


In [3]:
def filter_data_based_on_names(query, dataframe):
    # Extract names
    names = extract_person_names(query)

    # Filter DataFrame where 'analyst_name' contains any of the extracted names
    filtered_df = dataframe[dataframe['analyst_name'].apply(lambda x: any(name in x for name in names))]

    # Sort by date in descending order
    filtered_df = filtered_df.sort_values(by='date', ascending=False)

    # Display the required columns
    print(filtered_df[['docId', 'date','analyst_name', 'summary']])
    filtered_df.head()

In [4]:
def extract_person_names(text):
    # Process the text through the spaCy NLP pipeline
    doc = nlp(text)

    # List comprehension to extract person names
    person_names = [ent.text for ent in doc.ents if ent.label_ == 'PERSON']

    return person_names

# Example usage
query = "Alice Johnson is working at Google."
person_names = extract_person_names(query)
filter_data_based_on_names(query, df)

print("Person names found:", person_names)


    docId       date   analyst_name                             summary
16     17 2024-04-05  Alice Johnson     Retail industry seasonal review
12     13 2024-03-15  Alice Johnson          Consumer behavior analysis
8       9 2024-02-25  Alice Johnson     Bi-weekly stock market overview
4       5 2024-02-05  Alice Johnson    Update on currency market trends
0       1 2024-01-15  Alice Johnson  Market trends analysis for Q1 2024
Person names found: ['Alice Johnson']
