In [0]:
%%capture
%restart_python

if spark is None:
    spark = SparkSession.builder.getOrCreate()
spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "false")

In [0]:
%pip install git+https://github.com/OdyOSG/pubMedSearch.git
%pip install git+https://github.com/OdyOSG/llmInvocation.git

In [0]:
import epamPubMedSearch.pubmed as pmed
import llmInvocation.invoke as invoke

In [0]:
import pandas as pd

# Set execution variables
pm_key = ""                             # Key for PubMed search
dial_key = ""                           # Key for LLM invocation
rwd_terms = pmed.pubmedSearchTerms()    # RWD terms from package
date_filter =                           # Date filter for PubMed search (should be an integer)
date_term = '("' + str(date_filter) + '"[Publication Date] : "3000"[Publication Date])'  # Date term for PubMed search

# Run PubMed search
pubMedSearchDf_1 = pmed.run_pubmed_search(
                        mesh_term = "",         # Name of Mesh term e.g. smoking
                        rwd_terms = rwd_terms, 
                        date_term =date_term, 
                        pm_key =pm_key, 
                        saved_file_name="",     # Name of SQL table to save results
                        return_max=1,           # Maximum number of PubMed articles to process
                        spark=spark
                    )

# Display output data frame for review
display(pubMedSearchDf_1)

In [0]:
pmed.export_df_to_excel(
    df = pubMedSearchDf_1,                      # Name of data frame to export from
    output_filename="pubMedSearchDf_1.xlsx",    # Name of file to export results to
    displayHTML=displayHTML,
    download_link=True
)

In [0]:
# Run LLM invocation
llm_output = invoke.process_df_with_llms(
    df=pubMedSearchDf_1,                # Name of data frame to process
    llm_dict=invoke.getLLMmodel(        # Get LLM model from package (two options; claude and gpt-4o-full)
        dial_key=dial_key
    ),
    input_prompt=invoke.inputPrompt(),  # Prompt template for LLM invocation from package
    text_col="methods",                 # Name of data frame column to process
    saved_table_name=""                 # Name of SQL table to save results
)

# Display results for review
llm_output