In [15]:
import json
import os
import google.generativeai as genai
from google.generativeai import caching
import datetime
import time
from dotenv import load_dotenv

load_dotenv()
QUERY_UUID="debd9b3c-4531-462c-b2c2-983b2710fe81"

PROMPT= open("instructions.txt", "r").read()


genai.configure(api_key=os.environ['GEMINI_API_KEY'])

base = f"pdfs/{QUERY_UUID}/"
paths = os.listdir(base)
files = []

for path in paths[:10]:
    files.append(genai.upload_file(path=base+path))

#unfortunately, the model is not able to handle more than 500k tokens at a time. We will limit the number of papers to 10 for now.
#This is a limitation of the context cache. 
num_papers = len(paths[:10])

generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 40,
  "max_output_tokens": 8192,
  "response_mime_type": "application/json",
}

# Create a cache with a 2 hour TTL
cache = caching.CachedContent.create(
    model='models/gemini-1.5-flash-001',
    display_name='test', # used to identify the cache
    system_instruction=(PROMPT),
    contents=files,
    ttl=datetime.timedelta(hours=2),
)



model = genai.GenerativeModel.from_cached_content(cached_content=cache,  generation_config=generation_config)

response = model.generate_content([(
    f"There are {num_papers} articles in this research. Please return a json array with the title of each of the articles."
)])

paper_titles = json.loads(response.text)




In [16]:
#for each paper, we will generate a prompt that asks the user to classify the paper according to the PICO/PECO criteria
classifications = []
for paper in paper_titles:
    print(paper)
    response = model.generate_content([(
        'For the article titled {paper}, please List if it meets each of the PICO/PECO criteria, and then classify them as passing or failing. It must pass every point to be included. Allow for Interventions and/or Exposures. A study should be included if the PICO/PECO question can be answered by the data (e.g., a study of adults, which reports out the age group of 65+ would still be included). Allow for the studies which measure association as well a causation. For the JSON document return the keys "Article","Population","Intervention/Exposure","Comparison","Outcome","PICO","PECO","Classification".'
    )])

    classifications.append(response.text)

print(classifications)


Socioeconomic status and health, in social epidemiology
Educational attainment and cognitive decline in old age
Does childhood schooling affect old age memory or mental status? Using state schooling laws as natural experiments
The effect of education on old age cognitive abilities
Depression and the risk for cardiovascular diseases: systematic review and meta analysis
Depression and risk of stroke morbidity and mortality: a meta-analysis and systematic review
Major depression as a neuroprogressive prelude to dementia: what is the evidence?
Depression as a risk factor for the onset of type 2 diabetes mellitus. A meta-analysis
Social support and protection from depression: systematic review of current findings in Western countries
Behavioural activation for depression in older people: systematic review and meta-analysis
Depression and cognition in the elderly
Psychosocial interventions for the prevention of depression in older adults: systematic review and meta-analysis
Effect of singing

In [20]:
#what are the articles titles in the cache?

prompt = 'Please return a json array with the title of each of the articles.'
response = model.generate_content([(
    prompt
)])

print(response.text)


[
"Cultural engagement and incident depression in older adults: evidence from the English Longitudinal Study of Ageing",
"The Berthoin reform, educational attainment and adult health in France",
"Arts engagement in older adults: a systematic review and meta-analysis"
]

