In [1]:
import duckdb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from datetime import datetime, timedelta

# 1. Retrieve entries where the Project End and Award Amount have changed

This first set of data focuses on grants which do not have an activity code that starts with F. Later on in this notebook, I pull out F grants, but I believe some date and funding changes for F type grants may reflect graduations. Date and funding changes for non-training grants may be rarer.

In [2]:
data = duckdb.query(
    """
    SELECT new_data.date_added,
           new_data.appl_id,
           new_data.project_num,
           new_data.project_num_split.activity_code activity_code,
           new_data.project_title,
           new_data.organization.org_name,
           old_data.project_start_date,
           new_data.project_end_date new_project_end_date,
           old_data.project_end_date old_project_end_date,
           old_data.budget_start,
           new_data.budget_end new_budget_end_date,
           old_data.budget_end old_budget_end_date,
           new_data.award_amount new_award_amount,
           old_data.award_amount old_award_amount,
           new_data.award_amount - old_data.award_amount award_amount_change
    FROM read_json('../../../data/json/projects/year_added=202[345]/*/*') AS new_data
    INNER JOIN read_json('../../../data/json_2025_03_03/projects/year_added=202[345]/*/*') AS old_data
      ON new_data.appl_id = old_data.appl_id
    WHERE new_data.project_end_date < old_data.project_end_date
      AND new_award_amount < old_award_amount
      AND new_data.project_num_split.activity_code NOT LIKE 'F%'
    ORDER BY new_project_end_date DESC
    """
).to_df()

data.to_csv(
    "/home/jovyan/public/weekly/week_of_2025_03_09/data/project_changes_level_1_project_end_and_award_changes.csv",
    index=False
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

For convenience, below I will write out the projects which have new project end dates after the last innauguration in a more human readable format.

In [3]:
select = data.new_project_end_date >= datetime(2025, 1, 20)
for ind, row in data[select].iterrows():
    print(row.project_num, row.appl_id)
    print(row.org_name)
    print(row.project_title)
    print("-" * 75)
    print(f"Project Start:      {row.project_start_date}  --  Budget Start:      {row.budget_start}")
    print(f"Old Project End:    {row.old_project_end_date}  --  Old Budget End:    {row.old_budget_end_date}")
    print(f"New Project End:    {row.new_project_end_date}  --  New Budget End:    {row.new_budget_end_date}")
    print("-" * 75)
    print(f"Old Award Amount:   {row.old_award_amount}")
    print(f"New Award Amount:   {row.new_award_amount}")
    print(f"Award Change:      {row.new_award_amount - row.old_award_amount}")
    print("-" * 75)
    print("\n")

5R01DK129239-03 10689869
UNIVERSITY OF ALABAMA AT BIRMINGHAM
Mitochondrial-based Determinants of Sex Differences in Acute Kidney Injury
---------------------------------------------------------------------------
Project Start:      2021-09-15 12:09:00  --  Budget Start:      2023-07-01 12:07:00
Old Project End:    2025-06-30 12:06:00  --  Old Budget End:    2025-06-30 12:06:00
New Project End:    2025-03-03 12:03:00  --  New Budget End:    2025-03-03 12:03:00
---------------------------------------------------------------------------
Old Award Amount:   222750
New Award Amount:   221521
Award Change:      -1229
---------------------------------------------------------------------------


5R01AI171984-03 10910038
VAN ANDEL RESEARCH INSTITUTE
The roles of genetics, hormones, and gender in sexually dimorphic immune response
---------------------------------------------------------------------------
Project Start:      2022-09-01 12:09:00  --  Budget Start:      2024-09-01 12:09:00
Old Pro

# 2. Retrieve entries where just the Project End has changed

It is likely that some projects were terminated after using up all of their funding. These projects will only have a date change. I separate them out from the previous category because when I look at date changes in the database, it is difficult to know whether a project had its end date moved up for legitimate reasons or whether a nefarious grant termination was involved.

In [4]:
data = duckdb.query(
    """
    SELECT new_data.date_added,
           new_data.appl_id,
           new_data.project_num,
           new_data.project_num_split.activity_code activity_code,
           new_data.project_title,
           new_data.organization.org_name,
           old_data.project_start_date,
           new_data.project_end_date new_project_end_date,
           old_data.project_end_date old_project_end_date,
           old_data.budget_start,
           new_data.budget_end new_budget_end_date,
           old_data.budget_end old_budget_end_date,
           new_data.award_amount new_award_amount,
           old_data.award_amount old_award_amount,
           new_data.award_amount - old_data.award_amount award_amount_change
    FROM read_json('../../../data/json/projects/year_added=202[345]/*/*') AS new_data
    INNER JOIN read_json('../../../data/json_2025_03_03/projects/year_added=202[345]/*/*') AS old_data
      ON new_data.appl_id = old_data.appl_id
    WHERE new_data.project_end_date < old_data.project_end_date
      AND new_award_amount = old_award_amount
      AND new_data.project_num_split.activity_code NOT LIKE 'F%'
    ORDER BY new_project_end_date DESC
    """
).to_df()

data.to_csv(
    "/home/jovyan/public/weekly/week_of_2025_03_09/data/project_changes_level_2_project_end_changes.csv",
    index=False
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [5]:
select = data.new_project_end_date >= datetime(2025, 1, 20)
for ind, row in data[select].iterrows():
    print(row.project_num, row.appl_id)
    print(row.org_name)
    print(row.project_title)
    print("-" * 75)
    print(f"Project Start:      {row.project_start_date}  --  Budget Start:      {row.budget_start}")
    print(f"Old Project End:    {row.old_project_end_date}  --  Old Budget End:    {row.old_budget_end_date}")
    print(f"New Project End:    {row.new_project_end_date}  --  New Budget End:    {row.new_budget_end_date}")
    print("-" * 75)
    print(f"Old Award Amount:   {row.old_award_amount}")
    print(f"New Award Amount:   {row.new_award_amount}")
    print(f"Award Change:       {row.new_award_amount - row.old_award_amount}")
    print("-" * 75)
    print("\n")

5R01ES030616-05 10762347
COLUMBIA UNIVERSITY HEALTH SCIENCES
Integrating Air Pollution Prediction Models: Uncertainty Quantification and Propagation in Health Studies
---------------------------------------------------------------------------
Project Start:      2020-03-16 12:03:00  --  Budget Start:      2024-01-01 12:01:00
Old Project End:    2025-12-31 12:12:00  --  Old Budget End:    2025-12-31 12:12:00
New Project End:    2025-06-30 12:06:00  --  New Budget End:    2025-06-30 12:06:00
---------------------------------------------------------------------------
Old Award Amount:   601613
New Award Amount:   601613
Award Change:       0
---------------------------------------------------------------------------


5U54MD015946-05 10845589
UNIVERSITY OF HOUSTON
HEALTH Center for Addictions Research and Cancer Prevention
---------------------------------------------------------------------------
Project Start:      2020-09-08 12:09:00  --  Budget Start:      2024-05-01 12:05:00
Old Proj

# 3. Retrieve F grant entries where the Project End has changed

As mentioned above, I believe it can be difficult to determine which grants under this list had project end changes because of the current administation and which grants had individuals graduate. Therefore, I am a more cautious with the filtering on this list.

In [6]:
data = duckdb.query(
    """
    SELECT new_data.date_added,
           new_data.appl_id,
           new_data.project_num,
           new_data.project_num_split.activity_code activity_code,
           new_data.project_title,
           new_data.organization.org_name,
           old_data.project_start_date,
           new_data.project_end_date new_project_end_date,
           old_data.project_end_date old_project_end_date,
           old_data.budget_start,
           new_data.budget_end new_budget_end_date,
           old_data.budget_end old_budget_end_date,
           new_data.award_amount new_award_amount,
           old_data.award_amount old_award_amount,
           new_data.award_amount - old_data.award_amount award_amount_change
    FROM read_json('../../../data/json/projects/year_added=202[345]/*/*') AS new_data
    INNER JOIN read_json('../../../data/json_2025_03_03/projects/year_added=202[345]/*/*') AS old_data
      ON new_data.appl_id = old_data.appl_id
    WHERE new_data.project_end_date < old_data.project_end_date
      AND new_data.project_num_split.activity_code LIKE 'F%'
      AND new_data.project_end_date >= '2025-01-20'
    ORDER BY new_project_end_date DESC
    """
).to_df()

data.to_csv(
    "/home/jovyan/public/weekly/week_of_2025_03_09/data/project_changes_level_3_training_grant_changes.csv",
    index=False
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [7]:
select = data.new_project_end_date >= datetime(2025, 1, 20)
for ind, row in data[select].iterrows():
    print(row.project_num, row.appl_id)
    print(row.org_name)
    print(row.project_title)
    print("-" * 75)
    print(f"Project Start:      {row.project_start_date}  --  Budget Start:      {row.budget_start}")
    print(f"Old Project End:    {row.old_project_end_date}  --  Old Budget End:    {row.old_budget_end_date}")
    print(f"New Project End:    {row.new_project_end_date}  --  New Budget End:    {row.new_budget_end_date}")
    print("-" * 75)
    print(f"Old Award Amount:   {row.old_award_amount}")
    print(f"New Award Amount:   {row.new_award_amount}")
    print(f"Award Change:       {row.new_award_amount - row.old_award_amount}")
    print("-" * 75)
    print("\n")

5F31HD112236-02 10814787
UNIVERSITY OF WASHINGTON
Planning for delivery of novel PrEP formulations to pregnant and postpartum women in Kenya
---------------------------------------------------------------------------
Project Start:      2023-06-15 12:06:00  --  Budget Start:      2024-06-15 12:06:00
Old Project End:    2026-06-14 12:06:00  --  Old Budget End:    2025-06-14 12:06:00
New Project End:    2025-03-31 12:03:00  --  New Budget End:    2025-03-31 12:03:00
---------------------------------------------------------------------------
Old Award Amount:   48780
New Award Amount:   43050
Award Change:       -5730
---------------------------------------------------------------------------


1F31GM153136-01 10826027
JOHNS HOPKINS UNIVERSITY
Understanding how chromosomal makeup and cross-sex hormone administration affect wound healing in mice
---------------------------------------------------------------------------
Project Start:      2024-06-01 12:06:00  --  Budget Start:      2024-0