In [1]:
import duckdb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from datetime import datetime, timedelta

# 1. Retrieve entries where the Project End and Award Amount have changed

This first set of data focuses on grants which do not have an activity code that starts with F. Later on in this notebook, I pull out F grants, but I believe some date and funding changes for F type grants may reflect graduations. Date and funding changes for non-training grants may be rarer.

In [2]:
data = duckdb.query(
    """
    SELECT new_data.date_added,
           new_data.appl_id,
           new_data.project_num,
           new_data.project_num_split.activity_code activity_code,
           new_data.project_title,
           new_data.organization.org_name,
           old_data.project_start_date,
           new_data.project_end_date new_project_end_date,
           old_data.project_end_date old_project_end_date,
           old_data.budget_start,
           new_data.budget_end new_budget_end_date,
           old_data.budget_end old_budget_end_date,
           new_data.award_amount new_award_amount,
           old_data.award_amount old_award_amount,
           new_data.award_amount - old_data.award_amount award_amount_change
    FROM read_json('../../../data/json/projects/year_added=202[345]/*/*') AS new_data
    INNER JOIN read_json('../../../data/json_2025_03_09/projects/year_added=202[345]/*/*') AS old_data
      ON new_data.appl_id = old_data.appl_id
    WHERE new_data.project_end_date < old_data.project_end_date
      AND new_award_amount < old_award_amount
      AND new_data.project_num_split.activity_code NOT LIKE 'F%'
    ORDER BY new_project_end_date DESC
    """
).to_df()

data.to_csv(
    "/home/jovyan/public/weekly/week_of_2025_03_16/data/project_changes_level_1_project_end_and_award_changes.csv",
    index=False
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

For convenience, below I will write out the projects which have new project end dates after the last innauguration in a more human readable format.

In [3]:
select = data.new_project_end_date >= datetime(2025, 1, 20)
for ind, row in data[select].iterrows():
    print(row.project_num, row.appl_id)
    print(row.org_name)
    print(row.project_title)
    print("-" * 75)
    print(f"Project Start:      {row.project_start_date}  --  Budget Start:      {row.budget_start}")
    print(f"Old Project End:    {row.old_project_end_date}  --  Old Budget End:    {row.old_budget_end_date}")
    print(f"New Project End:    {row.new_project_end_date}  --  New Budget End:    {row.new_budget_end_date}")
    print("-" * 75)
    print(f"Old Award Amount:   {row.old_award_amount}")
    print(f"New Award Amount:   {row.new_award_amount}")
    print(f"Award Change:      {row.new_award_amount - row.old_award_amount}")
    print("-" * 75)
    print("\n")

1P50MD019473-01 10899830
STATE UNIVERSITY OF NEW YORK AT BUFFALO
Igniting Hope in Buffalo New York communities: Training the Next Generation of Health Equity Researchers
---------------------------------------------------------------------------
Project Start:      2024-07-19 12:07:00  --  Budget Start:      2024-04-01 12:04:00
Old Project End:    2029-06-30 12:06:00  --  Old Budget End:    2025-03-31 12:03:00
New Project End:    2025-03-12 12:03:00  --  New Budget End:    2025-03-12 12:03:00
---------------------------------------------------------------------------
Old Award Amount:   125331
New Award Amount:   125330
Award Change:      -1
---------------------------------------------------------------------------




# 2. Retrieve entries where just the Project End has changed

It is likely that some projects were terminated after using up all of their funding. These projects will only have a date change. I separate them out from the previous category because when I look at date changes in the database, it is difficult to know whether a project had its end date moved up for legitimate reasons or whether a nefarious grant termination was involved.

In [4]:
data = duckdb.query(
    """
    SELECT new_data.date_added,
           new_data.appl_id,
           new_data.project_num,
           new_data.project_num_split.activity_code activity_code,
           new_data.project_title,
           new_data.organization.org_name,
           old_data.project_start_date,
           new_data.project_end_date new_project_end_date,
           old_data.project_end_date old_project_end_date,
           old_data.budget_start,
           new_data.budget_end new_budget_end_date,
           old_data.budget_end old_budget_end_date,
           new_data.award_amount new_award_amount,
           old_data.award_amount old_award_amount,
           new_data.award_amount - old_data.award_amount award_amount_change
    FROM read_json('../../../data/json/projects/year_added=202[345]/*/*') AS new_data
    INNER JOIN read_json('../../../data/json_2025_03_09/projects/year_added=202[345]/*/*') AS old_data
      ON new_data.appl_id = old_data.appl_id
    WHERE new_data.project_end_date < old_data.project_end_date
      AND new_award_amount = old_award_amount
      AND new_data.project_num_split.activity_code NOT LIKE 'F%'
    ORDER BY new_project_end_date DESC
    """
).to_df()

data.to_csv(
    "/home/jovyan/public/weekly/week_of_2025_03_16/data/project_changes_level_2_project_end_changes.csv",
    index=False
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [5]:
select = np.logical_and(
    data.new_project_end_date >= datetime(2025, 1, 20),
    data.new_project_end_date <= datetime(2025, 6, 30)
)
data[select].groupby("org_name").appl_id.count().sort_values(ascending=False)

org_name
COLUMBIA UNIVERSITY HEALTH SCIENCES         178
UNIVERSITY OF SOUTH CAROLINA AT COLUMBIA      5
UNIVERSITY OF CALIFORNIA, SAN FRANCISCO       4
JOHNS HOPKINS UNIVERSITY                      3
COLUMBIA UNIV NEW YORK MORNINGSIDE            3
                                           ... 
UNIVERSITY OF UTAH                            1
UNIVERSITY OF WASHINGTON                      1
UNIVERSITY OF VERMONT & ST AGRIC COLLEGE      1
US HELPING US, PEOPLE INTO LIVING, INC.       1
YALE UNIVERSITY                               1
Name: appl_id, Length: 72, dtype: int64

In [6]:
select = np.logical_and(
    data.new_project_end_date >= datetime(2025, 1, 20),
    data.new_project_end_date <= datetime(2025, 6, 30)
)
print(
    f"Displaying {sum(select)} projects with changes"
)
print()
for ind, row in data[select].iterrows():
    print(row.project_num, row.appl_id)
    print(row.org_name)
    print(row.project_title)
    print("-" * 75)
    print(f"Project Start:      {row.project_start_date}  --  Budget Start:      {row.budget_start}")
    print(f"Old Project End:    {row.old_project_end_date}  --  Old Budget End:    {row.old_budget_end_date}")
    print(f"New Project End:    {row.new_project_end_date}  --  New Budget End:    {row.new_budget_end_date}")
    print("-" * 75)
    print(f"Old Award Amount:   {row.old_award_amount}")
    print(f"New Award Amount:   {row.new_award_amount}")
    print(f"Award Change:       {row.new_award_amount - row.old_award_amount}")
    print("-" * 75)
    print("\n")

Displaying 283 projects with changes

5R01CA244185-05 10851995
SLOAN-KETTERING INST CAN RESEARCH
A Randomized Controlled Trial of Emotion Regulation Therapy for Cancer Caregivers: A Mechanism-Targeted Approach to Addressing Caregiver Distress
---------------------------------------------------------------------------
Project Start:      2020-09-01 12:09:00  --  Budget Start:      2024-06-01 12:06:00
Old Project End:    2026-05-31 12:05:00  --  Old Budget End:    2026-05-31 12:05:00
New Project End:    2025-05-31 12:05:00  --  New Budget End:    2025-05-31 12:05:00
---------------------------------------------------------------------------
Old Award Amount:   577094
New Award Amount:   577094
Award Change:       0
---------------------------------------------------------------------------


5K08CA241296-06 10684912
FRED HUTCHINSON CANCER CENTER
Developing an intervention to improve follow-up of abnormal fecal immunochemical test results in a safety-net population using a mixed methods a

# 3. Retrieve F grant entries where the Project End has changed

As mentioned above, I believe it can be difficult to determine which grants under this list had project end changes because of the current administation and which grants had individuals graduate. Therefore, I am a more cautious with the filtering on this list.

In [7]:
data = duckdb.query(
    """
    SELECT new_data.date_added,
           new_data.appl_id,
           new_data.project_num,
           new_data.project_num_split.activity_code activity_code,
           new_data.project_title,
           new_data.organization.org_name,
           old_data.project_start_date,
           new_data.project_end_date new_project_end_date,
           old_data.project_end_date old_project_end_date,
           old_data.budget_start,
           new_data.budget_end new_budget_end_date,
           old_data.budget_end old_budget_end_date,
           new_data.award_amount new_award_amount,
           old_data.award_amount old_award_amount,
           new_data.award_amount - old_data.award_amount award_amount_change
    FROM read_json('../../../data/json/projects/year_added=202[345]/*/*') AS new_data
    INNER JOIN read_json('../../../data/json_2025_03_09/projects/year_added=202[345]/*/*') AS old_data
      ON new_data.appl_id = old_data.appl_id
    WHERE new_data.project_end_date < old_data.project_end_date
      AND new_data.project_num_split.activity_code LIKE 'F%'
      AND new_data.project_end_date >= '2025-01-20'
    ORDER BY new_project_end_date DESC
    """
).to_df()

data.to_csv(
    "/home/jovyan/public/weekly/week_of_2025_03_16/data/project_changes_level_3_training_grant_changes.csv",
    index=False
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [8]:
select = np.logical_and(
    data.new_project_end_date >= datetime(2025, 1, 20),
    data.new_project_end_date <= datetime(2025, 6, 30)
)
print(
    f"Displaying {sum(select)} projects with changes"
)
print()
for ind, row in data[select].iterrows():
    print(row.project_num, row.appl_id)
    print(row.org_name)
    print(row.project_title)
    print("-" * 75)
    print(f"Project Start:      {row.project_start_date}  --  Budget Start:      {row.budget_start}")
    print(f"Old Project End:    {row.old_project_end_date}  --  Old Budget End:    {row.old_budget_end_date}")
    print(f"New Project End:    {row.new_project_end_date}  --  New Budget End:    {row.new_budget_end_date}")
    print("-" * 75)
    print(f"Old Award Amount:   {row.old_award_amount}")
    print(f"New Award Amount:   {row.new_award_amount}")
    print(f"Award Change:       {row.new_award_amount - row.old_award_amount}")
    print("-" * 75)
    print("\n")

Displaying 43 projects with changes

5F30DK134109-03 10862718
COLUMBIA UNIVERSITY HEALTH SCIENCES
Elucidating the role of Fra1 in pancreatic Kras-driven acinar to ductal metaplasia
---------------------------------------------------------------------------
Project Start:      2022-07-01 12:07:00  --  Budget Start:      2024-07-01 12:07:00
Old Project End:    2026-06-30 12:06:00  --  Old Budget End:    2025-06-30 12:06:00
New Project End:    2025-03-14 12:03:00  --  New Budget End:    2025-03-14 12:03:00
---------------------------------------------------------------------------
Old Award Amount:   53974
New Award Amount:   53974
Award Change:       0
---------------------------------------------------------------------------


1F31NR021239-01 10900078
COLUMBIA UNIVERSITY HEALTH SCIENCES
Personal Healthcare Networks of Transgender and Gender-Diverse Adults After Gender-Affirming Surgery
---------------------------------------------------------------------------
Project Start:      2024-