In [1]:
import duckdb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from datetime import datetime, timedelta

# 1. Retrieve entries where the Project End and Award Amount have changed

This first set of data focuses on grants which do not have an activity code that starts with F. Later on in this notebook, I pull out F grants, but I believe some date and funding changes for F type grants may reflect graduations. Date and funding changes for non-training grants may be rarer.

In [2]:
data = duckdb.query(
    """
    SELECT new_data.date_added,
           new_data.appl_id,
           new_data.project_num,
           new_data.project_num_split.activity_code activity_code,
           new_data.project_title,
           new_data.organization.org_name,
           old_data.project_start_date,
           new_data.project_end_date new_project_end_date,
           old_data.project_end_date old_project_end_date,
           old_data.budget_start,
           new_data.budget_end new_budget_end_date,
           old_data.budget_end old_budget_end_date,
           new_data.award_amount new_award_amount,
           old_data.award_amount old_award_amount,
           new_data.award_amount - old_data.award_amount award_amount_change
    FROM read_json('../../../data/json/projects/year_added=202[345]/*/*') AS new_data
    INNER JOIN read_json('../../../data/json_2025_03_23/projects/year_added=202[345]/*/*') AS old_data
      ON new_data.appl_id = old_data.appl_id
    WHERE new_data.project_end_date < old_data.project_end_date
      AND new_award_amount < old_award_amount
      AND new_data.project_num_split.activity_code NOT LIKE 'F%'
    ORDER BY new_project_end_date DESC
    """
).to_df()

data.to_csv(
    "/home/jovyan/public/weekly/week_of_2025_03_30/data/project_changes_level_1_project_end_and_award_changes.csv",
    index=False
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

For convenience, below I will write out the projects which have new project end dates after the last innauguration in a more human readable format.

In [3]:
select = data.new_project_end_date >= datetime(2025, 1, 20)
for ind, row in data[select].iterrows():
    print(row.project_num, row.appl_id)
    print(row.org_name)
    print(row.project_title)
    print("-" * 75)
    print(f"Project Start:      {row.project_start_date}  --  Budget Start:      {row.budget_start}")
    print(f"Old Project End:    {row.old_project_end_date}  --  Old Budget End:    {row.old_budget_end_date}")
    print(f"New Project End:    {row.new_project_end_date}  --  New Budget End:    {row.new_budget_end_date}")
    print("-" * 75)
    print(f"Old Award Amount:   {row.old_award_amount}")
    print(f"New Award Amount:   {row.new_award_amount}")
    print(f"Award Change:      {row.new_award_amount - row.old_award_amount}")
    print("-" * 75)
    print("\n")

7R01HD105033-05 11247288
MORGRIDGE INSTITUTE FOR RESEARCH, INC.
Regulation of Appendage Regeneration in Zebrafish
---------------------------------------------------------------------------
Project Start:      2025-01-01 12:01:00  --  Budget Start:      2025-01-01 12:01:00
Old Project End:    2027-05-31 12:05:00  --  Old Budget End:    2026-05-31 12:05:00
New Project End:    2026-05-31 12:05:00  --  New Budget End:    2025-05-31 12:05:00
---------------------------------------------------------------------------
Old Award Amount:   101551
New Award Amount:   98255
Award Change:      -3296
---------------------------------------------------------------------------


5DP1DA056667-03 10898010
VANDERBILT UNIVERSITY
Framework to Accelerate Substance Use Disorder Genetic Studies through Customizable, EHR-Based Precision Phenotyping
---------------------------------------------------------------------------
Project Start:      2022-09-01 12:09:00  --  Budget Start:      2024-08-01 12:08:00
Ol

# 2. Retrieve entries where just the Project End has changed

It is likely that some projects were terminated after using up all of their funding. These projects will only have a date change. I separate them out from the previous category because when I look at date changes in the database, it is difficult to know whether a project had its end date moved up for legitimate reasons or whether a nefarious grant termination was involved.

In [4]:
data = duckdb.query(
    """
    SELECT new_data.date_added,
           new_data.appl_id,
           new_data.project_num,
           new_data.project_num_split.activity_code activity_code,
           new_data.project_title,
           new_data.organization.org_name,
           old_data.project_start_date,
           new_data.project_end_date new_project_end_date,
           old_data.project_end_date old_project_end_date,
           old_data.budget_start,
           new_data.budget_end new_budget_end_date,
           old_data.budget_end old_budget_end_date,
           new_data.award_amount new_award_amount,
           old_data.award_amount old_award_amount,
           new_data.award_amount - old_data.award_amount award_amount_change
    FROM read_json('../../../data/json/projects/year_added=202[345]/*/*') AS new_data
    INNER JOIN read_json('../../../data/json_2025_03_23/projects/year_added=202[345]/*/*') AS old_data
      ON new_data.appl_id = old_data.appl_id
    WHERE new_data.project_end_date < old_data.project_end_date
      AND new_award_amount = old_award_amount
      AND new_data.project_num_split.activity_code NOT LIKE 'F%'
    ORDER BY new_project_end_date DESC
    """
).to_df()

data.to_csv(
    "/home/jovyan/public/weekly/week_of_2025_03_30/data/project_changes_level_2_project_end_changes.csv",
    index=False
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [5]:
select = np.logical_and(
    data.new_project_end_date >= datetime(2025, 1, 20),
    data.new_project_end_date <= datetime(2025, 6, 30)
)
data[select].groupby("org_name").appl_id.count().sort_values(ascending=False)

org_name
UNIV OF NORTH CAROLINA CHAPEL HILL         19
UNIVERSITY OF CALIFORNIA, SAN FRANCISCO    17
COLUMBIA UNIVERSITY HEALTH SCIENCES        13
FLORIDA STATE UNIVERSITY                   13
UNIVERSITY OF CHICAGO                      13
                                           ..
UNIVERSITY OF UTAH                          1
WAKE FOREST UNIVERSITY HEALTH SCIENCES      1
WADSWORTH CENTER                            1
WEILL MEDICAL COLL OF CORNELL UNIV          1
WHITMAN-WALKER INSTITUTE, INC.              1
Name: appl_id, Length: 152, dtype: int64

In [6]:
select = np.logical_and(
    data.new_project_end_date >= datetime(2025, 1, 20),
    data.new_project_end_date <= datetime(2025, 6, 30)
)
print(
    f"Displaying {sum(select)} projects with changes"
)
print()
for ind, row in data[select].iterrows():
    print(row.project_num, row.appl_id)
    print(row.org_name)
    print(row.project_title)
    print("-" * 75)
    print(f"Project Start:      {row.project_start_date}  --  Budget Start:      {row.budget_start}")
    print(f"Old Project End:    {row.old_project_end_date}  --  Old Budget End:    {row.old_budget_end_date}")
    print(f"New Project End:    {row.new_project_end_date}  --  New Budget End:    {row.new_budget_end_date}")
    print("-" * 75)
    print(f"Old Award Amount:   {row.old_award_amount}")
    print(f"New Award Amount:   {row.new_award_amount}")
    print(f"Award Change:       {row.new_award_amount - row.old_award_amount}")
    print("-" * 75)
    print("\n")

Displaying 400 projects with changes

5P30AG068345-05 10861846
BUCK INSTITUTE FOR RESEARCH ON AGING
The University of Southern California and Buck Institute Nathan Shock Center
---------------------------------------------------------------------------
Project Start:      2020-09-15 12:09:00  --  Budget Start:      2024-06-01 12:06:00
Old Project End:    2026-05-31 12:05:00  --  Old Budget End:    2026-05-31 12:05:00
New Project End:    2025-05-31 12:05:00  --  New Budget End:    2025-05-31 12:05:00
---------------------------------------------------------------------------
Old Award Amount:   923306
New Award Amount:   923306
Award Change:       0
---------------------------------------------------------------------------


5P30AG068345-05 10861850
BUCK INSTITUTE FOR RESEARCH ON AGING
Cellular Senescence and Beyond Core
---------------------------------------------------------------------------
Project Start:      2020-09-15 12:09:00  --  Budget Start:      2024-06-01 12:06:00
Old Pro

# 3. Retrieve F grant entries where the Project End has changed

As mentioned above, I believe it can be difficult to determine which grants under this list had project end changes because of the current administation and which grants had individuals graduate. Therefore, I am a more cautious with the filtering on this list.

In [7]:
data = duckdb.query(
    """
    SELECT new_data.date_added,
           new_data.appl_id,
           new_data.project_num,
           new_data.project_num_split.activity_code activity_code,
           new_data.project_title,
           new_data.organization.org_name,
           old_data.project_start_date,
           new_data.project_end_date new_project_end_date,
           old_data.project_end_date old_project_end_date,
           old_data.budget_start,
           new_data.budget_end new_budget_end_date,
           old_data.budget_end old_budget_end_date,
           new_data.award_amount new_award_amount,
           old_data.award_amount old_award_amount,
           new_data.award_amount - old_data.award_amount award_amount_change
    FROM read_json('../../../data/json/projects/year_added=202[345]/*/*') AS new_data
    INNER JOIN read_json('../../../data/json_2025_03_23/projects/year_added=202[345]/*/*') AS old_data
      ON new_data.appl_id = old_data.appl_id
    WHERE new_data.project_end_date < old_data.project_end_date
      AND new_data.project_num_split.activity_code LIKE 'F%'
      AND new_data.project_end_date >= '2025-01-20'
    ORDER BY new_project_end_date DESC
    """
).to_df()

data.to_csv(
    "/home/jovyan/public/weekly/week_of_2025_03_30/data/project_changes_level_3_training_grant_changes.csv",
    index=False
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [8]:
select = np.logical_and(
    data.new_project_end_date >= datetime(2025, 1, 20),
    data.new_project_end_date <= datetime(2025, 6, 30)
)
print(
    f"Displaying {sum(select)} projects with changes"
)
print()
for ind, row in data[select].iterrows():
    print(row.project_num, row.appl_id)
    print(row.org_name)
    print(row.project_title)
    print("-" * 75)
    print(f"Project Start:      {row.project_start_date}  --  Budget Start:      {row.budget_start}")
    print(f"Old Project End:    {row.old_project_end_date}  --  Old Budget End:    {row.old_budget_end_date}")
    print(f"New Project End:    {row.new_project_end_date}  --  New Budget End:    {row.new_budget_end_date}")
    print("-" * 75)
    print(f"Old Award Amount:   {row.old_award_amount}")
    print(f"New Award Amount:   {row.new_award_amount}")
    print(f"Award Change:       {row.new_award_amount - row.old_award_amount}")
    print("-" * 75)
    print("\n")

Displaying 14 projects with changes

5F30AG081092-02 10831386
UNIVERSITY OF CONNECTICUT SCH OF MED/DNT
Investigating the Role of p21-Highly Expressing Senescent Cells in Alzheimer's Dementia
---------------------------------------------------------------------------
Project Start:      2023-04-15 12:04:00  --  Budget Start:      2024-04-15 12:04:00
Old Project End:    2027-04-14 12:04:00  --  Old Budget End:    2025-04-14 12:04:00
New Project End:    2025-04-14 12:04:00  --  New Budget End:    2025-04-14 12:04:00
---------------------------------------------------------------------------
Old Award Amount:   45187
New Award Amount:   45187
Award Change:       0
---------------------------------------------------------------------------


5F31EY034386-02 10876269
VANDERBILT UNIVERSITY
Targeted prostanoid inhibition as an anti-inflammatory therapy for diabetic retinopathy
---------------------------------------------------------------------------
Project Start:      2023-07-01 12:07:00  -