# Research with Gemini

We're going to be testing Google's Gemini API. 

Credentials are located in config.yaml in the ai-research root folder.

## Setup

In [1]:
# Imports
from chiefai.ai import analyze_campaign_data
from chiefai.db import query
import polars as pl

# Notebook formatting
from IPython.display import display, HTML, Markdown

In [2]:
data = query("SELECT * FROM web_visit_results LIMIT 5")
data.head(3)

id,core_client,project_id,user_id,session_id,unique_key,postal_code,region,dma,dma_code,city,country,browser,device,device_type,search_engine,medium,source,platform,platform_version,bounce,session_date_time,ip_address,pages,search_terms,language,latitude,longitude,organization,referrer,session_length,created_at,updated_at,core_product,last_access,content,session
i64,str,i64,str,str,str,str,str,str,i64,str,str,str,str,str,str,str,str,str,str,str,datetime[μs],str,list[struct[2]],str,str,f64,f64,str,str,str,datetime[μs],datetime[μs],str,datetime[μs],null,null
99735578,"""OPOS""",82884616,"""50182511673882""","""3288761085059530754""","""828846165018251167388232887610…",,,,0,,"""USA""","""Unknown""",,"""Desktop/laptop""",,"""paidsocial""","""snapchat""","""Unknown""","""Unknown""","""true""",2024-01-03 13:11:13,"""34.123.204.87""","[{""collections/vaginal-health"",0}]",,"""Unknown""",37.751,-97.822,"""Google""","""Direct""","""0""",2024-01-03 13:13:00.337214,2024-01-03 13:13:00.337784,"""""",2024-01-03 13:11:13,,
99735579,"""OPOS""",82884616,"""78135251450464""","""5120671839057608705""","""828846167813525145046451206718…","""32163""","""Florida""","""Orlando, FL""",534,"""The Villages""","""USA""","""Safari""","""Apple iPhone""","""Mobile""",,"""paidsocial""","""ig""","""iOS""","""iOS 17.1""","""true""",2024-01-03 13:11:13,"""68.205.39.5""","[{""collections/vaginal-health"",0}]",,"""English (United States)""",28.9265,-81.9928,"""Spectrum""","""instagram.com""","""0""",2024-01-03 13:13:00.337214,2024-01-03 13:13:00.337784,"""""",2024-01-03 13:11:13,,
99735580,"""OPOS""",82884616,"""103390995781948""","""6775832299565744285""","""828846161033909957819486775832…",,,,0,,"""USA""","""Chrome""","""Generic Android""","""Mobile""",,,"""Direct""","""Android""","""Android 9.0""","""true""",2024-01-03 13:11:13,"""147.160.184.123""","[{""tools/recurring/login"",0}]",,"""English (United States)""",37.751,-97.822,"""Unknown""","""Direct""","""0""",2024-01-03 13:13:00.337214,2024-01-03 13:13:00.337784,"""""",2024-01-03 13:11:13,,


## Assess AI Approach to Data Analysis

We're going to use the make_gemini_request function in chiefmedai.ai to send data. 

The approach we're going to try is to create summary statistics from the database and programmatically generate a text query to feed to the AI model. We'll use prompt injection of the data to do this. 

Firstly we'll try getting simple counts of orders by platform and ask the AI model to assess the results. 


In [3]:
query_str = """
WITH results AS (
    select 
        p.unique_key,
        COALESCE(s.station_name, p.station) AS station, 
        p.date,
        CASE 
            WHEN EXTRACT(ISODOW FROM p.date) = 1 THEN 'Monday'
            WHEN EXTRACT(ISODOW FROM p.date) = 2 THEN 'Tuesday'
            WHEN EXTRACT(ISODOW FROM p.date) = 3 THEN 'Wednesday'
            WHEN EXTRACT(ISODOW FROM p.date) = 4 THEN 'Thursday'
            WHEN EXTRACT(ISODOW FROM p.date) = 5 THEN 'Friday'
            WHEN EXTRACT(ISODOW FROM p.date) = 6 THEN 'Saturday'
            ELSE 'Sunday'
        END AS weekday,
        cbd.cdaypart AS daypart, 
        p.length, 
        p.buyrate AS spend, 
        lam.online_visits, 
        lam.online_orders, 
        lam.online_revenue, 
        lam.online_leads, 
        lam.impressions AS target_demo_impressions, 
        lam.impressions2 AS total_impressions
    from core_post_time p
    left outer join core_tape_details td ON td.tapecd = p.tape
        AND td.cmedia = p.media
        AND td.cclient = p.client
        AND td.cproduct = p.product
        AND td.startdate <= p.bcdate
        AND td.enddate >= p.bcdate
    left outer join core_tape_parent tpp ON tpp.ctpparent = td.ctpparent
        AND tpp.cclient = p.client
        AND tpp.cproduct = p.product
    left outer join linear_attribution_metrics lam ON lam.unique_key = p.unique_key
    join core_estimate est ON est.cmedia = p.media
        AND est.cclient = p.client
        AND est.cproduct = p.product
        AND est.cestimate = p.estimate
    join core_buy_detail cbd ON cbd.nbuydetid = p.buydetid
    join core_buy_table cbt ON cbt.nbuyid = p.buyid
    join stations s ON s.core_label = p.station
    where p.client = 'OPOS'
        AND p.media != 'OL'
        AND p.date >= NOW() - INTERVAL '4 weeks'
)

SELECT 
    station,
    weekday,
    daypart,
    COUNT(unique_key) AS spot_count,
    SUM(spend) AS total_spend,
    ROUND(AVG(spend)) AS average_spend,
    SUM(online_visits) AS total_visits,
    ROUND(AVG(online_visits)) AS average_visits,
    SUM(online_leads) AS total_leads,
    ROUND(AVG(online_leads)) AS average_leads,
    SUM(online_orders) AS total_orders,
    ROUND(AVG(online_orders)) AS average_orders,
    ROUND(SUM(online_revenue)::NUMERIC, 2) AS total_revenue,
    ROUND(AVG(online_revenue)::NUMERIC, 2) AS average_order_value,
    SUM(target_demo_impressions) AS total_target_demo_impressions,
    ROUND(AVG(target_demo_impressions)) AS average_target_demo_impressions,
    CASE 
        WHEN SUM(target_demo_impressions) = 0 THEN 0 
        ELSE ROUND(SUM(spend) / SUM(target_demo_impressions), 2)
    END AS cpm_target_demo,
    SUM(total_impressions) AS total_all_demo_impressions,
    ROUND(AVG(total_impressions)) AS average_all_demo_impressions,
    CASE 
        WHEN SUM(total_impressions) = 0 THEN 0 
        ELSE ROUND(SUM(spend) / SUM(total_impressions), 2)
    END AS cpm_all_demo
FROM results
GROUP BY     
    station,
    weekday,
    daypart
"""

In [4]:
data = query(query_str)
print(data.head(3))

shape: (3, 20)
┌────────────┬─────────┬─────────┬────────────┬───┬────────────┬───────────┬───────────┬───────────┐
│ station    ┆ weekday ┆ daypart ┆ spot_count ┆ … ┆ cpm_target ┆ total_all ┆ average_a ┆ cpm_all_d │
│ ---        ┆ ---     ┆ ---     ┆ ---        ┆   ┆ _demo      ┆ _demo_imp ┆ ll_demo_i ┆ emo       │
│ str        ┆ str     ┆ str     ┆ i64        ┆   ┆ ---        ┆ ressions  ┆ mpression ┆ ---       │
│            ┆         ┆         ┆            ┆   ┆ decimal[*, ┆ ---       ┆ s         ┆ decimal[* │
│            ┆         ┆         ┆            ┆   ┆ 2]         ┆ i64       ┆ ---       ┆ ,2]       │
│            ┆         ┆         ┆            ┆   ┆            ┆           ┆ decimal[* ┆           │
│            ┆         ┆         ┆            ┆   ┆            ┆           ┆ ,0]       ┆           │
╞════════════╪═════════╪═════════╪════════════╪═══╪════════════╪═══════════╪═══════════╪═══════════╡
│ A&E        ┆ Friday  ┆ DA      ┆ 1          ┆ … ┆ 58.33      ┆ 188       ┆

### Basic data validations

In [10]:
data['total_revenue']

total_revenue
"decimal[*,2]"
""
222.21
207.72
113.22
""
…
218.54
837.99
265.56
287.84


## Hybrid Prompt

In [5]:
results = analyze_campaign_data(
    data,
    min_sample_size=5,
    confidence_level=0.95,
    verbose_prompt=True,
    save_debug_files=True
)

# Even if Gemini fails, you can still access:
print(results['summary_statistics'])
print(results['dimensional_analysis'])
print(results['statistical_insights'])

Prompt length: 14994 characters (~3748 tokens)
Prompt saved to: ./debug_output/prompt_20250602_170010.txt
Gemini response type: <class 'str'>
Response length: 7722 characters
First 200 chars: ```json
{
  "insights": [
    {
      "finding": "The overall campaign is significantly unprofitable, but high-performing segments offer a clear path to profitability.",
      "evidence": "The campaig
Full Gemini response saved to: ./debug_output/gemini_response_20250602_170044.txt
Full analysis results saved to: ./debug_output/analysis_results_20250602_170044.json
{'visits_per_spot': {'mean': 19.79, 'median': 14.5, 'std_dev': 15.84, 'min': 0.0, 'max': 63.0, 'sample_size': 142}, 'leads_per_spot': {'mean': 8.62, 'median': 5.66, 'std_dev': 8.0, 'min': 0.0, 'max': 42.0, 'sample_size': 142}, 'orders_per_spot': {'mean': 1.63, 'median': 1.2, 'std_dev': 1.5, 'min': 0.0, 'max': 6.5, 'sample_size': 142}, 'revenue_per_spot': {'mean': 78.88, 'median': 53.4, 'std_dev': 76.0, 'min': 0.0, 'max': 333.63, 'sample