Retreive 20 records including the last 4 columns from the GCS's file generated from ex01.ipynb
Using "gemini-2.5-flash", ask it to summarize the reports.

- Pre-requisite : Generate API keys through [Google AI Studio](https://aistudio.google.com/app/apikey)

In [23]:
import json
import os

from dotenv import load_dotenv
from google import genai
from google.oauth2 import service_account
from google.cloud import storage

In [24]:
load_dotenv(dotenv_path='../.env', override=True)

True

In [25]:
genai_api_key = os.getenv("GCP_GENAI_API_KEY")
service_account_key = os.getenv("GCP_SERVICE_ACCOUNT_KEY")
project_id = os.getenv("GCP_PROJECT_ID")
bucket_name = os.getenv("GCP_BUCKET_NAME")
file_name = f"sf_police_report/2025-09-08.json"

In [26]:
def retrieve_data_from_gcs(service_account_key: str,
                           project_id: str,
                           bucket_name: str,
                           file_name: str,
                           key_list: list
                           ) -> list:
    credentials = service_account.Credentials.from_service_account_file(service_account_key)
    client = storage.Client(project=project_id,
                            credentials=credentials)
    bucket = client.bucket(bucket_name)
    file = bucket.blob(file_name)
    content = json.loads(file.download_as_string())

    output = []
    for data in content:
        row = []
        
        for key in key_list:
            row.append(data.get(key, None))
        output.append(row)
    return output

In [27]:
key_list = ['incident_datetime', 'report_datetime', 'incident_code',
            'incident_category', 'incident_description', 'latitude',
            'longitude', 'police_district']
data = retrieve_data_from_gcs(service_account_key, 
                              project_id,
                              bucket_name,
                              file_name,
                              key_list)

In [28]:
filtered_data = [row[-4:] for row in data][:20]

In [29]:
# The client gets the API key from the environment variable `GEMINI_API_KEY`.
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

In [30]:
model_name = "gemini-2.5-flash"

In [31]:
prompt_content = f"There has been a police report\
on the following list of [description, lon, lat, district] : {filtered_data} recently.\
Summarize the reports"

In [32]:
response = client.models.generate_content(
    model=model_name,
    contents=prompt_content
)

In [33]:
response.text

'Here\'s a summary of the police reports:\n\nThere are a total of **20 police reports**.\n\n**Key Incident Types:**\n\n*   **Vehicle-Related Crimes (7 reports):** This is the most frequent category. Incidents include:\n    *   4 cases of "Theft, From Locked Vehicle, >$950"\n    *   1 case of "Theft, From Unlocked Vehicle, >$950" (missing coordinates)\n    *   1 "Vehicle, Stolen, Other Vehicle"\n    *   1 "Vehicle, Recovered, Auto" (reported as \'Out of SF\', missing coordinates)\n    *   1 "Arson of Vehicle"\n*   **Property Crimes (excluding vehicle theft):**\n    *   1 "Malicious Mischief, Vandalism to Property"\n    *   1 "False Personation"\n*   **Violent Crimes & Threats (2 reports):**\n    *   1 "Battery"\n    *   1 "Terrorist Threats"\n*   **Police Interaction & Arrests (3 reports):**\n    *   1 "Evading a Police Officer Recklessly"\n    *   1 "Resisting, Delaying, or Obstructing Peace Officer Duties"\n    *   1 "Warrant Arrest, Enroute To Adult Authority"\n*   **Drug Offenses (1