In [5]:
# Notebook Setup
# Run this cell: 
# The lines below will instruct jupyter to reload imported modules before 
# executing code cells. This enables you to quickly iterate and test revisions
# to your code without having to restart the kernel and reload all of your 
# modules each time you make a code change in a separate python file.

%load_ext autoreload
%autoreload 2

import os

# Change path to project root
if os.getcwd().endswith("notebooks"):
    os.chdir(os.path.dirname(os.getcwd()))
print(os.getcwd())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/Users/shloknatarajan/stanford/research/daneshjou/AutoGKB


In [12]:
from src.inference import PMCIDGenerator

generator = PMCIDGenerator(pmcid="PMC11730665", model="gpt-4.1")

prompt = """
What are all the pharmacogenomic relationships found in this paper?
Output your response in markdown table format with nothing except the table. The columns should be Gene, Polymorphism, Relationship/Effect, and p-value.
"""

response = generator.generate(prompt)
print(response)

Generating 1 Responses:   0%|          | 0/1 [00:00<?, ?it/s][32m2025-07-29 09:23:57.293[0m | [1mINFO    [0m | [36msrc.article_parser[0m:[36m__init__[0m:[36m43[0m - [1mGetting article text from PMCID: PMC11730665[0m
[32m2025-07-29 09:23:57.294[0m | [1mINFO    [0m | [36msrc.article_parser[0m:[36mremove_references_section[0m:[36m90[0m - [1mRemoved References section from article text[0m
Generating 1 Responses: 100%|██████████| 1/1 [00:04<00:00,  4.33s/it]

| Gene      | Polymorphism         | Relationship/Effect                                                                                   | p-value  |
|-----------|---------------------|------------------------------------------------------------------------------------------------------|----------|
| DPP-4     | rs2909451 TT        | Lower efficacy of sitagliptin (less HbA1c improvement vs gliclazide)                                 | <.001    |
| DPP-4     | rs4664443 GG        | Lower efficacy of sitagliptin (less HbA1c improvement vs gliclazide)                                 | <.001    |
| GLP1R     | rs6923761 AA        | Reduced glycemic response to sitagliptin (less HbA1c improvement vs gliclazide)                      | .010     |
| GLP1R     | rs3765467 AG        | Better response to sitagliptin (greater HbA1c improvement vs gliclazide)                             | .023     |
| KCNQ1     | rs163184 GG         | Lower responsiveness to sitagliptin, better response to glicla




In [13]:
# Test the new PharmacogenomicTableGenerator component
from src.components.pharmacogenomic_table import PharmacogenomicTableGenerator
import json

# Initialize the generator
table_generator = PharmacogenomicTableGenerator(pmcid="PMC11730665", model="gpt-4.1")

# Generate structured JSON directly
print("=== Generating structured JSON directly ===")
json_data = table_generator.generate_table_json()
print(json.dumps(json_data.model_dump(), indent=2))

# Alternative: Convert existing markdown table to JSON
print("\n=== Converting existing markdown to JSON ===")
json_from_markdown = table_generator.convert_markdown_to_json(response)
print(json.dumps(json_from_markdown.model_dump(), indent=2))

=== Generating structured JSON directly ===


Generating 1 Responses:   0%|          | 0/1 [00:00<?, ?it/s][32m2025-07-29 09:56:00.514[0m | [1mINFO    [0m | [36msrc.article_parser[0m:[36m__init__[0m:[36m43[0m - [1mGetting article text from PMCID: PMC11730665[0m
[32m2025-07-29 09:56:00.515[0m | [1mINFO    [0m | [36msrc.article_parser[0m:[36mremove_references_section[0m:[36m90[0m - [1mRemoved References section from article text[0m
Generating 1 Responses: 100%|██████████| 1/1 [00:07<00:00,  7.53s/it]

{
  "relationships": [
    {
      "gene": "DPP-4",
      "polymorphism": "rs2909451 TT",
      "relationship_effect": "Lower efficacy of sitagliptin in reducing HbA1c compared to gliclazide.",
      "p_value": "<.001"
    },
    {
      "gene": "DPP-4",
      "polymorphism": "rs4664443 GG",
      "relationship_effect": "Lower efficacy of sitagliptin in reducing HbA1c compared to gliclazide.",
      "p_value": "<.001"
    },
    {
      "gene": "GLP1R",
      "polymorphism": "rs6923761 AA",
      "relationship_effect": "Reduced glycemic response to sitagliptin compared to gliclazide.",
      "p_value": ".010"
    },
    {
      "gene": "GLP1R",
      "polymorphism": "rs3765467 AG",
      "relationship_effect": "Favorable response to sitagliptin (greater HbA1c improvement) compared to gliclazide.",
      "p_value": ".023"
    },
    {
      "gene": "KCNQ1",
      "polymorphism": "rs163184 GG",
      "relationship_effect": "Lower responsiveness to sitagliptin and better response to glicl


