In [2]:
import pandas as pd
df = pd.read_parquet('../data/news_feed_flattened.parquet')
# first entry

df.head(10)
# last entry

df.tail(10)
# number of entries

df.link.nunique()

49

In [3]:
# Display DataFrame columns and sample data
print("DataFrame columns:")
print(df.columns.tolist())
print("\nSample row:")
print(df.iloc[0])


DataFrame columns:
['type', 'start_date', 'end_date', 'ticker', 'count', 'growth', 'text', 'link', 'model']

Sample row:
type                                                 individual
start_date                                           2023-07-17
end_date                                             2023-07-24
ticker                                                     TSLA
count                                                        42
growth                                                    2.43%
text          Billionaire investor Chamath Palihapitiya beli...
link          https://pythoninvest.com/tpost/yk09rupzv1-week...
model                                                      None
Name: 0, dtype: object


In [4]:
from minsearch import Index

# Convert DataFrame to list of dictionaries with string values
docs = [{k: str(v) for k, v in record.items()} for record in df.to_dict('records')]

# Create index with correct DataFrame fields
index = Index(
    text_fields=["type", "start_date", "end_date", "ticker", "count", "growth", "text", "model"],
    keyword_fields=["link"]
)

# Fit the index
index.fit(docs)

<minsearch.minsearch.Index at 0x73e051d2fa10>

In [9]:
def print_results(results, num_results=5):
    """Print formatted search results.
    
    Args:
        results: List of search results from minsearch
        num_results: Number of results to display (default: 5)
    """
    for i, result in enumerate(results[:num_results], 1):
        print(f"Result {i}:")
        print(f"Text: {result['text'][:200]}...")
        print(f"Type: {result['type']}")
        print(f"Ticker: {result['ticker']}")
        print(f"Growth: {result['growth']}")
        print(f"Count: {result['count']}")
        print(f"Model: {result['model']}")
        print(f"Period: {result['start_date']} to {result['end_date']}")
        print(f"Link: {result['link']}")
        print("-" * 80)


In [14]:
def search_news(query, link=None, boost_dict=None):
    """Search news articles using minsearch.
    
    Args:
        query: Search query string
        link: Optional link to filter results
        boost_dict: Optional dictionary of field boost values
    
    Returns:
        List of search results
    """
    if boost_dict is None:
        boost_dict = {
            "text": 3,
            "type": 2,
            "ticker": 2,
            # "growth": 1.5,
            # "model": 1.5,
            # "count": 1,
            "start_date": 2,
            "end_date": 2
        }
    
    filter_dict = {}
    if link:
        filter_dict["link"] = link
    
    return index.search(query, filter_dict=filter_dict, boost_dict=boost_dict)

In [15]:
# Test the search functionality
test_query = "technology growth"
results = search_news(test_query)

print(f"Search results for '{test_query}':\n")
print_results(results, num_results=3)

Search results for 'technology growth':

Result 1:
Text: Apple is gearing up for a potentially significant announcement, expected to involve artificial intelligence (AI) technology in its products. Analysts have revised the price target for Apple stock, ant...
Type: individual
Ticker: AAPL
Growth: 1.66%
Count: 10
Model: None
Period: 2024-05-21 to 2024-05-27
Link: https://pythoninvest.com/tpost/hcufmog3c1-week-21-27-may-2024
--------------------------------------------------------------------------------
Result 2:
Text: Tesla's stock has been in the spotlight, with anticipation building for the company's upcoming Robotaxi event on October 10th, which could showcase its progress in autonomous driving and AI technology...
Type: individual
Ticker: TSLA
Growth: 0.45%
Count: 13
Model: None
Period: 2024-09-24 to 2024-09-30
Link: https://pythoninvest.com/tpost/fukvxbgbl1-week-24-30-september-2024
--------------------------------------------------------------------------------
Result 3:
Text: T

In [16]:
test_query = "Can you share some insights about Tesla's share from 2024-11-01 to 2024-11-07 ?"
results = search_news(test_query)

print(f"Search results for '{test_query}':\n")
print_results(results, num_results=3)

Search results for 'Can you share some insights about Tesla's share from 2024-11-01 to 2024-11-07 ?':

Result 1:
Text: Uber Technologies (NYSE: UBER) has received a mix of analyst ratings in the last quarter, with 4 analysts bullish, 8 somewhat bullish, and no indifferent, somewhat bearish, or bearish ratings. The ave...
Type: individual
Ticker: UBER
Growth: 1.28%
Count: 11
Model: None
Period: 2023-11-07 to 2023-11-13
Link: https://pythoninvest.com/tpost/tk3bt13m41-week-7-13-november-2023
--------------------------------------------------------------------------------
Result 2:
Text: Home Depot's stock faced adjustments as several analysts cut price targets and estimates following the company's Q3 FY23 results, which saw a 3% decline in sales but beat EPS expectations. Despite an ...
Type: individual
Ticker: HD
Growth: 0.3%
Count: 11
Model: None
Period: 2023-11-14 to 2023-11-20
Link: https://pythoninvest.com/tpost/dj1ay4rjl1-week-14-20-november-2023
------------------------------------

In [17]:
test_query = "Can you share some insights about TSLA share from 2024-11-01 to 2024-11-07 ?"
results = search_news(test_query)

print(f"Search results for '{test_query}':\n")
print_results(results, num_results=3)

Search results for 'Can you share some insights about TSLA share from 2024-11-01 to 2024-11-07 ?':

Result 1:
Text: Tesla has reported a year-over-year increase in production and delivery of its electric vehicles. Analysts anticipate the company's highly anticipated robotaxi event on October 10 to bring game-changi...
Type: individual
Ticker: TSLA
Growth: -3.7%
Count: 11
Model: None
Period: 2024-10-01 to 2024-10-07
Link: https://pythoninvest.com/tpost/tta2omepc1-week-1-7-october-2024
--------------------------------------------------------------------------------
Result 2:
Text: Tesla's stock rose by 4% on Monday due to positive news from abroad. This includes speculation about the potential impact of Tesla's new, more affordable Model 2 electric vehicle, and hopes around its...
Type: individual
Ticker: TSLA
Growth: 4.22%
Count: 16
Model: None
Period: 2023-11-07 to 2023-11-13
Link: https://pythoninvest.com/tpost/tk3bt13m41-week-7-13-november-2023
---------------------------------------

# Observations:

* When we give the ticker name it is extracting better results
* Still it needs some sense of contextual search and re ranking as the relevant results is in the 3rd
