In [15]:
import os
import pandas as pd
from llama_cloud_services import LlamaExtract
from schemas import StatementOfCashFlows2024  #This could be adjusted through schemas.py
from dotenv import load_dotenv

In [17]:
PDF_ROOT = "university_pdfs"
OUTPUT_ROOT = "output_cash_flow"
os.makedirs(OUTPUT_ROOT, exist_ok=True)  
AGENT_ID = "54164e43-e77a-4add-89a0-b99a31c1ed87" #Different based on your LLamaCloud account

In [19]:
load_dotenv() #make sure the API key is in the .env file
extractor = LlamaExtract(project_id = '8c10e62e-3810-4193-915d-d2d11105826d')

#uncomment the below line if you are creating the agent for the first time
# agent = extractor.create_agent(name = "statement_of_cash_flows-2024", data_schema=StatementOfCashFlows2024)

agent = extractor.get_agent(id = AGENT_ID)

#uncomment the following lines if you updated the schema
agent.data_schema = StatementOfCashFlows2024
agent.save()

In [20]:
agent.data_schema

{'additionalProperties': False,
 'description': 'Statement of Cash Flows for the fiscal year 2024 or 2023–2024.\nOnly extract data from the 2023–2024 fiscal period (e.g. statements labeled ‘Fiscal Year 2024’ or date ranges covering 2023–2024).\nIgnore any figures outside this period.\nValues must be captured as dollar amounts (e.g., 1,234.56 means US$1,234.56), not in thousands or other units.\nDo not derive or calculate values unless they appear explicitly in the document.',
 'properties': {'total_change_in_net_assets': {'anyOf': [{'type': 'number'},
    {'type': 'null'}],
   'description': "Cash amount labeled 'Total Change in Net Assets' for the 2024 or 2023–2024 fiscal year, in US dollars. Only extract the exact figure for that period."},
  'total_non_cash_exp': {'anyOf': [{'type': 'number'}, {'type': 'null'}],
   'description': "Aggregate non-cash expenses (e.g., depreciation, amortization) labeled 'Total Non-Cash Exp' for the 2024 or 2023–2024 fiscal year, in US dollars. Only ext

In [23]:
OUTPUT_FILE = os.path.join(OUTPUT_ROOT, "all_schools.xlsx")

writer = pd.ExcelWriter(OUTPUT_FILE, engine="openpyxl")

for school in sorted(os.listdir(PDF_ROOT)):
    school_dir = os.path.join(PDF_ROOT, school)
    if not os.path.isdir(school_dir):
        continue

    combined   = {}
    first_keys = None
    for fname in sorted(os.listdir(school_dir)):
        if not fname.lower().endswith(".pdf"):
            continue
        path = os.path.join(school_dir, fname)
        print(f"Extracting data from {school}/{fname}")
        try:
            run  = agent.extract(path)
            data = run.data or {}
            if first_keys is None:
                first_keys = list(data.keys())
                combined  = {k: None for k in first_keys}
            for k, v in data.items():
                if v not in (None, "", []):
                    combined[k] = v
        except Exception as err:
            print(f"Skipped {fname}: {err}")

    if first_keys:
        df = pd.DataFrame.from_dict(combined, orient="index", columns=["2023-24"])
        df.index.name = "Metric"
        sheet_name = school[:31]
        df.to_excel(writer, sheet_name=sheet_name)
    else:
        print(f"No data for {school}.")

writer.close()
print(f"All schools written to {OUTPUT_FILE}")

Extracting data from ARIZONA_STATE_UNIVERSITY/FY_2024_Arizona_State_University_Annual_Comprehensive_Financial_Report_for_the_year_ended_06_30_2024__4.1_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:09<00:00,  9.89s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:05<00:00,  5.21s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:22<00:00, 22.11s/it]


Extracting data from ARIZONA_STATE_UNIVERSITY/FY_2024_Arizona_State_University_Continuing_Disclosure_Undertaking_for_the_year_ended_06_30_2024__160_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.59s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:03<00:00,  3.38s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:09<00:00,  9.41s/it]


Extracting data from BRADLEY_UNIVERSITY/Annual_Financial_Information_and_Operating_Data__Rule_15c2-12__for_FY24_for_the_year_ended_05_31_2024__227_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.68s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:03<00:00,  3.79s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:08<00:00,  8.56s/it]


Extracting data from BRADLEY_UNIVERSITY/Audited_Financial_Statements_or_ACFR__Rule_15c2-12__for_FY24_for_the_year_ended_05_31_2024__541_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.80s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.97s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:12<00:00, 12.79s/it]


Extracting data from CALIFORNIA_STATE_UNIVERSITY/Exhibit_1_CSU_Combined_Financial_Stmts_6-30-24_for_the_year_ended_06_30_2024__15.6_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:18<00:00, 19.00s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.99s/it]
Extracting files:   0%|                                   | 0/1 [00:19<?, ?it/s]


Skipped Exhibit_1_CSU_Combined_Financial_Stmts_6-30-24_for_the_year_ended_06_30_2024__15.6_MB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
Extracting data from CALIFORNIA_STATE_UNIVERSITY/Exhibit_1_CSU_Combined_Financial_Stmts_6-30-24_for_the_year_ended_06_30_2024__308_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.05s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.59s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:01<00:00,  1.74s/it]


Extracting data from CALIFORNIA_STATE_UNIVERSITY/Exhibit_1_DRAFT_UNAUDITED_CSU_Combined_Financial_Statements_6-30-24_for_the_year_ended_06_30_2024__1.2_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.18s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.08s/it]
Extracting files:   0%|                                   | 0/1 [00:01<?, ?it/s]


Skipped Exhibit_1_DRAFT_UNAUDITED_CSU_Combined_Financial_Statements_6-30-24_for_the_year_ended_06_30_2024__1.2_MB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
Extracting data from CALIFORNIA_STATE_UNIVERSITY/Exhibit_1_DRAFT_UNAUDITED_CSU_Combined_Financial_Statements_6-30-24_for_the_year_ended_06_30_2024__298_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.40s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.74s/it]
Extracting files:   0%|                                   | 0/1 [00:07<?, ?it/s]


Skipped Exhibit_1_DRAFT_UNAUDITED_CSU_Combined_Financial_Statements_6-30-24_for_the_year_ended_06_30_2024__298_KB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
No data for CALIFORNIA_STATE_UNIVERSITY.
Extracting data from CORNELL_UNIVERSITY/2024_Audited_Financial_Statements_for_the_year_ended_06_30_2024__788_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.32s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.54s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:11<00:00, 11.66s/it]


Extracting data from CORNELL_UNIVERSITY/2024_Operating_Data_for_the_year_ended_06_30_2024__109_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.11s/it]
Creating extraction jobs:   0%|                           | 0/1 [00:00<?, ?it/s]


Skipped 2024_Operating_Data_for_the_year_ended_06_30_2024__109_KB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
Extracting data from CORNELL_UNIVERSITY/Incorporate_OS_by_Reference_as_of_04_25_2024__2.4_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.32s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.08s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:14<00:00, 14.81s/it]


Extracting data from CULINARY_INSTITUTE_OF_AMERICA_THE/2024_Annual_Report_-_Corrected_for_the_year_ended_05_31_2024__130_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.19s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:04<00:00,  4.45s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:10<00:00, 10.58s/it]


Extracting data from CULINARY_INSTITUTE_OF_AMERICA_THE/2024_Annual_Report_for_the_year_ended_05_31_2024__129_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.61s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.70s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:01<00:00,  1.81s/it]


Extracting data from CULINARY_INSTITUTE_OF_AMERICA_THE/2024_Audited_Financial_Statements_for_the_year_ended_05_31_2024__277_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.94s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.25s/it]
Extracting files:   0%|                                   | 0/1 [00:10<?, ?it/s]


Skipped 2024_Audited_Financial_Statements_for_the_year_ended_05_31_2024__277_KB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
No data for CULINARY_INSTITUTE_OF_AMERICA_THE.
Extracting data from GANNON_UNIVERSITY/Audited_Financial_Statements_for_the_year_ended_06_30_2024__786_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.13s/it]
Creating extraction jobs:   0%|                           | 0/1 [00:00<?, ?it/s]


Skipped Audited_Financial_Statements_for_the_year_ended_06_30_2024__786_KB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
Extracting data from GANNON_UNIVERSITY/Continued_Disclosures_Fall_2024_for_the_year_ended_06_30_2024_Document1__203_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:03<00:00,  3.42s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:06<00:00,  6.53s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:12<00:00, 12.61s/it]


Extracting data from LEWIS_UNIVERSITY/Audited_Financial_Statements_for_the_year_ended_06_30_2024__430_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.19s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.65s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:14<00:00, 14.83s/it]


Extracting data from LEWIS_UNIVERSITY/Continuing_Disclosure_for_the_year_ended_06_30_2024__298_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.62s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.16s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:02<00:00,  2.72s/it]


Extracting data from MICHIGAN_STATE_UNIVERSITY/Updates_of_Tables_of_Operating_Information_for_the_year_ended_06_30_2024_Document1__143_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.48s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.69s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:02<00:00,  2.97s/it]


Extracting data from MICHIGAN_STATE_UNIVERSITY/Updates_of_Tables_of_Operating_Information_for_the_year_ended_06_30_2024_Document2__8.4_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:11<00:00, 11.04s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.70s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:29<00:00, 29.26s/it]


No data for MICHIGAN_STATE_UNIVERSITY.
Extracting data from MOLLOY_COLLEGE/Financial_Operating_Filing_for_the_year_ended_06_30_2024_Document1__304_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.73s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.76s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:10<00:00, 10.03s/it]


Extracting data from MOLLOY_COLLEGE/Financial_Operating_Filing_for_the_year_ended_06_30_2024_Document2__142_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.09s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.91s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:03<00:00,  3.81s/it]


Extracting data from MOUNT_ST_MARY_S_UNIVERSITY_INC/Audited_Annual_Financials_for_the_year_ended_06_30_2024_Document1__29.1_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:21<00:00, 21.40s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.11s/it]
Extracting files:   0%|                                   | 0/1 [00:18<?, ?it/s]


Skipped Audited_Annual_Financials_for_the_year_ended_06_30_2024_Document1__29.1_MB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
Extracting data from MOUNT_ST_MARY_S_UNIVERSITY_INC/Audited_Annual_Financials_for_the_year_ended_06_30_2024_Document2__5_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:07<00:00,  7.79s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.34s/it]
Extracting files:   0%|                                   | 0/1 [00:15<?, ?it/s]


Skipped Audited_Annual_Financials_for_the_year_ended_06_30_2024_Document2__5_MB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
No data for MOUNT_ST_MARY_S_UNIVERSITY_INC.
Extracting data from NEW_YORK_UNIVERSITY/2024_Certificate_of_Compliance_-_Audit_for_the_year_ended_06_30_2024__330_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.54s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.33s/it]
Extracting files:   0%|                                   | 0/1 [00:01<?, ?it/s]


Skipped 2024_Certificate_of_Compliance_-_Audit_for_the_year_ended_06_30_2024__330_KB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
Extracting data from NEW_YORK_UNIVERSITY/2024_Certificate_of_Compliance_for_the_year_ended_06_30_2024__323_KB_.pdf


Uploading files:   0%|                                    | 0/1 [00:01<?, ?it/s]


Skipped 2024_Certificate_of_Compliance_for_the_year_ended_06_30_2024__323_KB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
Extracting data from NEW_YORK_UNIVERSITY/2024_Consolidated_Financial_Statements_New_York_University_for_the_year_ended_06_30_2024__466_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:03<00:00,  3.65s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:03<00:00,  3.59s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:08<00:00,  8.28s/it]


Extracting data from NEW_YORK_UNIVERSITY/2024_Operating_Data_for_the_year_ended_06_30_2024__244_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.80s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.88s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:10<00:00, 10.05s/it]


Extracting data from OHIO_STATE_UNIVERSITY_THE/The_Ohio_State_University_Annual_Update_to_Bondholders_for_the_year_ended_06_30_2024__577_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.94s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.15s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:01<00:00,  1.72s/it]


Extracting data from OHIO_STATE_UNIVERSITY_THE/The_Ohio_State_University_Quarterly_Update_to_Bondholders_for_the_quarter_ended_03_31_2024__577_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.98s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:04<00:00,  4.99s/it]
Extracting files:   0%|                                   | 0/1 [00:10<?, ?it/s]


Skipped The_Ohio_State_University_Quarterly_Update_to_Bondholders_for_the_quarter_ended_03_31_2024__577_KB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
Extracting data from OHIO_STATE_UNIVERSITY_THE/The_Ohio_State_University_Quarterly_Update_to_Bondholders_for_the_quarter_ended_06_30_2024__576_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:04<00:00,  4.17s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.47s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:09<00:00,  9.65s/it]


Extracting data from OHIO_STATE_UNIVERSITY_THE/The_Ohio_State_University_Quarterly_Update_to_Bondholders_for_the_quarter_ended_09_30_2024__574_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.06s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.74s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:09<00:00,  9.87s/it]


Extracting data from OHIO_STATE_UNIVERSITY_THE/The_Ohio_State_University_Quarterly_Update_to_Bondholders_for_the_quarter_ended_12_31_2024__815_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:04<00:00,  4.98s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.29s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:11<00:00, 11.09s/it]


No data for OHIO_STATE_UNIVERSITY_THE.
Extracting data from PRESIDENT___FELLOWS_OF_HARVARD_COLLEGE/Harvard_University_Audited_Financial_Information_for_the_year_ended_06_30_2024__10.6_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:03<00:00,  3.81s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.77s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:16<00:00, 16.80s/it]


Extracting data from PRESIDENT___FELLOWS_OF_HARVARD_COLLEGE/Harvard_University_Financial_Report_for_the_year_ended_06_30_2024__10.6_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:08<00:00,  8.45s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:03<00:00,  3.99s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:01<00:00,  1.94s/it]


Extracting data from PRESIDENT___FELLOWS_OF_HARVARD_COLLEGE/Harvard_University_Student_Applications_and_Enrollment_for_the_year_ended_06_30_2024__557_KB_.pdf


Uploading files:   0%|                                    | 0/1 [00:01<?, ?it/s]


Skipped Harvard_University_Student_Applications_and_Enrollment_for_the_year_ended_06_30_2024__557_KB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end: Internal Server Error. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: 578a26da-654c-4940-8ef9-e09a7a97ad8a'}
Extracting data from STEVENSON_UNIVERSITY_INC/2024_Annual_Compliance_Certificate__EagleBank__for_the_year_ended_06_30_2024__3_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.32s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.03s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:15<00:00, 15.98s/it]


Extracting data from STEVENSON_UNIVERSITY_INC/2024_Annual_Compliance_Certificate_for_the_year_ended_06_30_2024__2.8_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:06<00:00,  6.95s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.26s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:11<00:00, 11.28s/it]


Extracting data from STEVENSON_UNIVERSITY_INC/2024_Audited_Financial_Statements_for_the_year_ended_06_30_2024__348_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.02s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:02<00:00,  2.90s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:12<00:00, 12.70s/it]


Extracting data from STEVENSON_UNIVERSITY_INC/2024_Operating_Data_for_the_year_ended_06_30_2024__196_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.16s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.69s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:10<00:00, 10.38s/it]


Extracting data from STEVENS_INSTITUTE_OF_TECHNOLOGY/Annual_Report_for_the_year_ended_06_30_2024__216_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.20s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:03<00:00,  3.17s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:10<00:00, 10.02s/it]


Extracting data from STEVENS_INSTITUTE_OF_TECHNOLOGY/Audit_Financial_Statement_for_the_year_ended_06_30_2024__626_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.37s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.40s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:01<00:00,  1.76s/it]


Extracting data from ST_LOUIS_UNIVERSITY_US/Amendment_to_Continuing_Disclosure_Undertaking_dated_01_05_2024__392_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.04s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.54s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:09<00:00,  9.11s/it]


Extracting data from ST_LOUIS_UNIVERSITY_US/Audited_Financials_and_Operating_Data_for_the_year_ended_06_30_2024_Document1__561_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:03<00:00,  3.18s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:03<00:00,  3.02s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:10<00:00, 10.75s/it]


Extracting data from ST_LOUIS_UNIVERSITY_US/Audited_Financials_and_Operating_Data_for_the_year_ended_06_30_2024_Document2__174_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:01<00:00,  1.38s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:01<00:00,  1.59s/it]
Extracting files:   0%|                                   | 0/1 [00:14<?, ?it/s]


Skipped Audited_Financials_and_Operating_Data_for_the_year_ended_06_30_2024_Document2__174_KB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
Extracting data from TEXAS_A_M_UNIVERSITY/FY_2024_Continuing_Disclosure_Annual_Report_for_the_year_ended_08_31_2024__2.3_MB_.pdf


Uploading files:   0%|                                    | 0/1 [00:01<?, ?it/s]


Skipped FY_2024_Continuing_Disclosure_Annual_Report_for_the_year_ended_08_31_2024__2.3_MB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
Extracting data from TEXAS_A_M_UNIVERSITY/Texas_A_M_University_System_Unaudited_Annual_Financial_Reports_for_the_year_ended_08_31_2024__2_MB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:02<00:00,  2.32s/it]
Creating extraction jobs:   0%|                           | 0/1 [00:00<?, ?it/s]


Skipped Texas_A_M_University_System_Unaudited_Annual_Financial_Reports_for_the_year_ended_08_31_2024__2_MB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
No data for TEXAS_A_M_UNIVERSITY.
Extracting data from UNIVERSITY_OF_COLORADO/Financial_and_Operating_Data__Fiscal_Year_2024_for_the_year_ended_06_30_2024__278_KB_.pdf


Uploading files: 100%|████████████████████████████| 1/1 [00:03<00:00,  3.29s/it]
Creating extraction jobs: 100%|███████████████████| 1/1 [00:04<00:00,  4.80s/it]
Extracting files: 100%|███████████████████████████| 1/1 [00:01<00:00,  1.64s/it]


No data for UNIVERSITY_OF_COLORADO.
Extracting data from UNIVERSITY_OF_MINNESOTA/Annual_Financial_Information_and_Operating_Data_for_the_year_ended_06_30_2024_Document1__1.3_MB_.pdf


Uploading files:   0%|                                    | 0/1 [00:01<?, ?it/s]


Skipped Annual_Financial_Information_and_Operating_Data_for_the_year_ended_06_30_2024_Document1__1.3_MB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
Extracting data from UNIVERSITY_OF_MINNESOTA/Annual_Financial_Information_and_Operating_Data_for_the_year_ended_06_30_2024_Document2__315_KB_.pdf


Uploading files:   0%|                                    | 0/1 [00:02<?, ?it/s]

Skipped Annual_Financial_Information_and_Operating_Data_for_the_year_ended_06_30_2024_Document2__315_KB_.pdf: status_code: 500, body: {'detail': 'Oops! Something went wrong on our end. Please try again in a few minutes. If the problem persists, please contact support by clicking the chat icon on cloud.llamaindex.ai providing this correlation ID: None'}
No data for UNIVERSITY_OF_MINNESOTA.
All schools written to output_cash_flow/all_schools.xlsx





In [24]:
#Combine all the tabs into one sheet if wanted
file_path   = "output_cash_flow/all_schools.xlsx"
output_path = "output_cash_flow/all_schools_combined.xlsx"

raw = pd.read_excel(file_path, sheet_name=None, index_col=0)

school_series = {
    school: df.iloc[:, 0]                      # first (only) value column
    for school, df in raw.items()
}

df_comb = pd.DataFrame(school_series).T
df_comb.index.name = "School"                 
df_comb.insert(0, "Year", "2023‑2024")

with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
    df_comb.to_excel(writer, sheet_name="Combined")

print("Saved:", output_path)

Saved: output_cash_flow/all_schools_combined.xlsx
