<a href="https://colab.research.google.com/github/ReidelVichot/LC_identification/blob/main/USASpendingData_4_18_24.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import time

# -- this line is to make pandas future-proof, Copy-on-Write will become the default in pandas 3.0.
pd.options.mode.copy_on_write = True

# -- Set the data path
dpath = "/content/drive/MyDrive/Disertation/"
fname = "USASpendingData/PrimeTransactionsAndSubawards_2024-03-28_H20M13S03380006/Assistance_PrimeTransactions_2024-03-28_H20M13S55_1.csv"


In [None]:
df = pd.read_csv(dpath + fname, low_memory = False)

In [None]:
# columns of interest:
# federal_action_obligation : Amount of Federal Government's obligation,
#       de-obligation, or liability in dollars for an award transaction. It
#       is what the governemnt promises to pay to non-federal recipients.
# total_obligated_amount : Sum of all the amounts entered in the Action
#       Obligation field.
# indirect_cost_federal_share_amount : Amount of any single Federal award that
#       it is allocated to indirect costs according to the approved budget.
# generated_pragmatic_obligations
# non_federal_funding_amount: Amount of the award funded by non-Federal sources
# total_non_federal_funding_amount: The amount of the total award funded by non-
#       Federal sources.
# action_date : The date the action being reported was issued / signed by the
#       Government or a binding agreement was reached.
# action_date_fiscal_year : Note that the Federal fiscal year begins on
#       October 1 and ends on September 30, thus October 1, 2018 is the
#       first day of the 2019 fiscal year.
# period_of_performance_start_date : The Period of Performance is defined in
#       the 2 CFR 200 as the total estimated time interval between the start of
#       an initial Federal award and the planned end date, which may include
#       one or more funded portions, or budget periods.
# period_of_performance_current_end_date : The contract completion date based
#       on the schedule in the contract. For an initial award, this is the
#       scheduled completion date for the base contract and for any options
#       exercised at time of award.
# assistance_transaction_unique_key : System-generated database key used to
#       uniquely identify each financial assistance transaction record and
#       facilitate record lookup, correction, and deletion. A concatenation
#       of AwardingSubTierAgencyCode, FAIN, URI, AssistanceListingNumber, and
#       AwardModificationAmendmentNumber with a single underscore ('_')
#       character inserted in between each. If a field is blank, it is
#       recorded as "".
# recipient_address_line_1
# recipient_address_line_2
# recipient_city_code
# recipient_city_name
# prime_award_transaction_recipient_county_fips_code
# recipient_county_name
# prime_award_transaction_recipient_state_fips_code
# recipient_state_code
# recipient_state_name
# recipient_zip_code


In [None]:
cols = ['federal_action_obligation', 'total_obligated_amount', "generated_pragmatic_obligations",
        'indirect_cost_federal_share_amount', 'non_federal_funding_amount',
        'action_date', 'action_date_fiscal_year', 'period_of_performance_start_date',
        'period_of_performance_current_end_date', 'assistance_transaction_unique_key',
        'recipient_address_line_1', 'recipient_address_line_2', 'recipient_city_code',
        'recipient_city_name', 'prime_award_transaction_recipient_county_fips_code',
        'recipient_county_name', 'prime_award_transaction_recipient_state_fips_code',
        'recipient_state_code', 'recipient_state_name', 'recipient_zip_code']
df = df[cols]

In [None]:
colsname = ["State", "State_fips", "County_fips", "County_name", "FIPS_class"]
from_census = "https://www2.census.gov/geo/docs/reference/codes/files/national_county.txt"
county_fips = pd.read_csv(from_census, names=colsname, header=None)
county_fips = county_fips.drop(columns="FIPS_class")
county_fips["GEOID"] =  county_fips.State_fips.astype(str).str.zfill(2) + county_fips.County_fips.astype(str).str.zfill(3)

In [None]:
# Removing Foreign investment
df = df[df["recipient_city_code"] != "FORGN"]
# Removing NaN fips codes
df = df[~df.prime_award_transaction_recipient_state_fips_code.isna()]