In [3]:
import pandas as pd

# Load abstracts.csv into a DataFrame
df = pd.read_csv("abstracts.csv")

# Display the rows of the DataFrame
df

Unnamed: 0,Abstract
0,Successful collateral channel (CC) crossing is...
1,Colorectal cancer (CRC) is one of the most agg...
2,Subtle loss of functionality in healthy older ...
3,Two-dimensional (2D) materials have attracted ...
4,Sickle cell disease (SCD) is an inherited diso...
...,...
95,Multifunctional nanoprobes have attracted sign...
96,This study reports data from the first evaluat...
97,Mesoporous polyethylene glycol (PEG)/silica an...
98,Local-area cartilage segmentation (LACS) softw...


In [7]:
# Define a function for free-form queries
def perform_query(query_string, corpus_df):
    """
    Perform a free-form query against the document corpus DataFrame.

    Args:
        query_string (str): The query string.
        corpus_df (pandas.DataFrame): The DataFrame containing the document corpus.

    Returns:
        pandas.DataFrame: Subset of the corpus DataFrame containing relevant documents.
    """
    # Drop rows with NaN values in the 'Abstract' column
    corpus_df = corpus_df.dropna(subset=['Abstract'])

    # Perform the query based on the provided query string
    relevant_documents = corpus_df[corpus_df['Abstract'].str.contains(query_string, case=False)]
    return relevant_documents

In [11]:
# Define a function for faceted search
def perform_faceted_search(start_year, end_year, corpus_df):
    """
    Perform faceted search based on assumed publication time range against the document corpus DataFrame.

    Args:
        start_year (int): The start year of the assumed publication time range.
        end_year (int): The end year of the assumed publication time range.
        corpus_df (pandas.DataFrame): The DataFrame containing the document corpus.

    Returns:
        pandas.DataFrame: Subset of the corpus DataFrame containing relevant documents.
    """
    # Assuming each abstract corresponds to a publication in the specified year range
    relevant_documents = corpus_df[(corpus_df.index >= start_year) & (corpus_df.index <= end_year)]
    return relevant_documents

In [12]:
# Perform free-form query
query_result = perform_query("intelligence", df)

# Perform faceted search
faceted_search_result = perform_faceted_search(2013, 2023, df)

# Display the results
print("Free-form Query Result:")
print(query_result)

print("\nFaceted Search Result:")
print(faceted_search_result)

Free-form Query Result:
                                             Abstract
6   Deep brain stimulation has been in clinical us...
55  An increasing interest in machine learning (ML...
57  Uncertainty estimation in healthcare involves ...

Faceted Search Result:
Empty DataFrame
Columns: [Abstract]
Index: []
