# Working with the FERC Form 1 Extract / Transform
This notebook steps through PUDL's extract and transform steps for FERC Form 1 to make it easier to test and add new years of data, or new tables from the various spreadsheets that haven't been integrated yet.

In [233]:
%load_ext autoreload
%autoreload 3
import pudl
import logging
import sys
from pathlib import Path
import pandas as pd
pd.options.display.max_columns = None

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [234]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

In [235]:
pudl_settings = pudl.workspace.setup.get_defaults()

## Pick the tables you want to load

In [236]:
# tables = [
#     "balance_sheet_assets_ferc1",
#     "balance_sheet_liabilities_ferc1",
#     "depreciation_amortization_summary_ferc1",
#     "electric_energy_dispositions_ferc1",
#     "electric_energy_sources_ferc1",
#     "electric_opex_ferc1",
#     "electric_plant_depreciation_changes_ferc1",
#     "fuel_ferc1",
#     "income_statement_ferc1",
#     "plants_hydro_ferc1",
#     "plants_pumped_storage_ferc1",
#     "plants_small_ferc1",
#     "plants_steam_ferc1",
#     "plant_in_service_ferc1",
#     "purchased_power_ferc1",
#     "retained_earnings_ferc1",
#     "transmission_statistics_ferc1",
#     "utility_plant_summary_ferc1",
#     "electricity_sales_by_rate_schedule_ferc1"
# ]

#tables = ["electricity_sales_by_rate_schedule_ferc1", "income_statement_ferc1", "balance_sheet_assets_ferc1"]
tables = ["plants_steam_ferc1", "fuel_ferc1"]

#table = "electricity_sales_by_rate_schedule_ferc1"

In [237]:
ferc1_settings = pudl.settings.Ferc1Settings(tables=tables)

## Extract DBF and XBRL Data:

In [None]:
# Extract old FERC form 1 data from DBF (2020 -)
ferc1_dbf_raw_dfs = pudl.extract.ferc1.extract_dbf(
    ferc1_settings=ferc1_settings, pudl_settings=pudl_settings
)
# Extract new FERC form 1 data from XBRL (2021 + )
ferc1_xbrl_raw_dfs = pudl.extract.ferc1.extract_xbrl(
    ferc1_settings=ferc1_settings, pudl_settings=pudl_settings
)
# Extract XBRL metadata
xbrl_metadata_json_dict = pudl.extract.ferc1.extract_xbrl_metadata(
    ferc1_settings=ferc1_settings, pudl_settings=pudl_settings
)
#xbrl_metadata_json_dict = {table: pudl.extract.ferc1.extract_xbrl_metadata(ferc1_settings, pudl_settings)[table] for table in tables}
xbrl_metadata_json_dict = pudl.extract.ferc1.extract_xbrl_metadata(
    ferc1_settings=ferc1_settings, pudl_settings=pudl_settings
)

In [None]:
ferc1_xbrl_raw_dfs[table]

## Transform FERC 1 Tables:

In [None]:
from pudl.transform.ferc1 import *
from pudl.transform.params import *

transformers = [
    bsa := BalanceSheetAssetsFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["balance_sheet_assets_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    bsl := BalanceSheetLiabilitiesFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["balance_sheet_liabilities_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    das := DepreciationAmortizationSummaryFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["depreciation_amortization_summary_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    eed := ElectricEnergyDispositionsFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["electric_energy_dispositions_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ees := ElectricEnergySourcesFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["electric_energy_sources_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    eo := ElectricOpexFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["electric_opex_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    epdc := ElectricPlantDepreciationChangesFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["electric_plant_depreciation_changes_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ff := FuelFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["fuel_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ins := IncomeStatementFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["income_statement_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ph := PlantsHydroFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["plants_hydro_ferc1"], 
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    pps := PlantsPumpedStorageFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["plants_pumped_storage_ferc1"], 
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    psm := PlantsSmallFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["plants_small_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    pst := PlantsSteamFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["plants_steam_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    pis := PlantInServiceFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["plant_in_service_ferc1"], 
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    pp := PurchasedPowerFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["purchased_power_ferc1"], 
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    re := RetainedEarningsFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["retained_earnings_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ts := TransmissionStatisticsFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["transmission_statistics_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    ups := UtilityPlantSummaryFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["utility_plant_summary_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    ),
    esbrs := ElectricitySalesByRateScheduleFerc1TableTransformer(
    #xbrl_metadata_json=xbrl_metadata_json_dict["electricity_sales_by_rate_schedule_ferc1"],
    cache_dfs=True, 
    clear_cached_dfs=False
    ),
]

In [257]:
from pudl.transform.ferc1 import *
from pudl.transform.params import *

ff = FuelFerc1TableTransformer(
    xbrl_metadata_json=xbrl_metadata_json_dict["fuel_ferc1"],
    cache_dfs=True, 
    clear_cached_dfs=False
)

pst = PlantsSteamFerc1TableTransformer(
        xbrl_metadata_json=xbrl_metadata_json_dict["plants_steam_ferc1"],
        cache_dfs=True, 
        clear_cached_dfs=False
    )



2023-01-31 16:04:53 [    INFO] catalystcoop.pudl.transform.ferc1:896 fuel_ferc1: Processing XBRL metadata.


fuel_ferc1: Processing XBRL metadata.


2023-01-31 16:04:53 [    INFO] catalystcoop.pudl.transform.ferc1:896 plants_steam_ferc1: Processing XBRL metadata.


plants_steam_ferc1: Processing XBRL metadata.


### Transform Individual Tables

In [258]:
# Pick one table to transform
TRANSFORMER = ff
table = TRANSFORMER.table_id.value

In [259]:
#for transformer in transformers:

bb = TRANSFORMER.process_xbrl(
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[table]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[table]["duration"]
)

2023-01-31 16:06:49 [    INFO] catalystcoop.pudl.transform.classes:1194 fuel_ferc1: Attempting to rename 0 columns.


fuel_ferc1: Attempting to rename 0 columns.


2023-01-31 16:06:49 [    INFO] catalystcoop.pudl.transform.ferc1:1056 fuel_ferc1: Unstacking balances to the report years.


fuel_ferc1: Unstacking balances to the report years.


2023-01-31 16:06:49 [    INFO] catalystcoop.pudl.transform.classes:1194 fuel_ferc1: Attempting to rename 0 columns.


fuel_ferc1: Attempting to rename 0 columns.


2023-01-31 16:06:49 [    INFO] catalystcoop.pudl.transform.ferc1:1255 fuel_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


fuel_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


2023-01-31 16:06:49 [    INFO] catalystcoop.pudl.transform.ferc1:1159 fuel_ferc1: No XBRL instant table found.


fuel_ferc1: No XBRL instant table found.


2023-01-31 16:06:49 [    INFO] catalystcoop.pudl.transform.classes:1194 fuel_ferc1: Attempting to rename 15 columns.


fuel_ferc1: Attempting to rename 15 columns.


2023-01-31 16:06:49 [    INFO] catalystcoop.pudl.transform.classes:1267 fuel_ferc1: Converting units and renaming columns accordingly.


fuel_ferc1: Converting units and renaming columns accordingly.


2023-01-31 16:06:49 [    INFO] catalystcoop.pudl.transform.classes:1218 fuel_ferc1: Normalizing freeform string columns.


fuel_ferc1: Normalizing freeform string columns.


2023-01-31 16:06:49 [    INFO] catalystcoop.pudl.transform.classes:1242 fuel_ferc1: Categorizing string columns using a controlled vocabulary.


fuel_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-31 16:06:49 [    INFO] catalystcoop.pudl.transform.ferc1:1663 fuel_ferc1: Aggregating 30 rows with duplicate primary keys out of 1312 total rows.


fuel_ferc1: Aggregating 30 rows with duplicate primary keys out of 1312 total rows.


2023-01-31 16:06:49 [    INFO] catalystcoop.pudl.transform.ferc1:1667 fuel_ferc1: Dropping 98 records with inconsistent fuel units preventing aggregation out of 1312 total rows.


fuel_ferc1: Dropping 98 records with inconsistent fuel units preventing aggregation out of 1312 total rows.


In [260]:
# Process steam
TRANSFORMER = pst
#print(TRANSFORMER)
#table = TRANSFORMER.table_id.value

steam = TRANSFORMER.process_xbrl(
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[table]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[table]["duration"]
)

2023-01-31 16:06:53 [    INFO] catalystcoop.pudl.transform.ferc1:1003 plants_steam_ferc1: Processing XBRL data pre-concatenation.


plants_steam_ferc1: Processing XBRL data pre-concatenation.


2023-01-31 16:06:53 [    INFO] catalystcoop.pudl.transform.classes:1194 plants_steam_ferc1: Attempting to rename 0 columns.


plants_steam_ferc1: Attempting to rename 0 columns.


2023-01-31 16:06:53 [    INFO] catalystcoop.pudl.transform.ferc1:1056 plants_steam_ferc1: Unstacking balances to the report years.


plants_steam_ferc1: Unstacking balances to the report years.


2023-01-31 16:06:53 [    INFO] catalystcoop.pudl.transform.classes:1194 plants_steam_ferc1: Attempting to rename 0 columns.


plants_steam_ferc1: Attempting to rename 0 columns.


2023-01-31 16:06:53 [    INFO] catalystcoop.pudl.transform.ferc1:1255 plants_steam_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


plants_steam_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


2023-01-31 16:06:53 [    INFO] catalystcoop.pudl.transform.ferc1:1159 plants_steam_ferc1: No XBRL instant table found.


plants_steam_ferc1: No XBRL instant table found.


2023-01-31 16:06:53 [    INFO] catalystcoop.pudl.transform.classes:1194 plants_steam_ferc1: Attempting to rename 43 columns.


plants_steam_ferc1: Attempting to rename 43 columns.


Missing columns: {'net_continuous_plant_capability', 'net_peak_demand_on_plant', 'maintenance_of_boiler_plant_steam_power_generation', 'year_plant_originally_constructed', 'net_generation_excluding_plant_use', 'plant_average_number_of_employees', 'plant_hours_connected_to_load', 'electric_expenses_steam_power_generation', 'miscellaneous_steam_power_expenses', 'net_continuous_plant_capability_not_limited_by_condenser_water', 'maintenance_of_electric_plant_steam_power_generation', 'plant_name', 'asset_retirement_costs_steam_production', 'coolants_and_water', 'cost_per_kilowatt_of_installed_capacity', 'plant_construction_type', 'expenses_per_net_kilowatt_hour', 'maintenance_supervision_and_engineering_steam_power_generation', 'power_production_expenses_steam_power', 'steam_expenses_steam_power_generation', 'allowances', 'maintenance_of_miscellaneous_steam_plant', 'net_continuous_plant_capability_limited_by_condenser_water', 'year_last_unit_of_plant_installed', 'date', 'cost_of_structures_

plants_steam_ferc1: Attempting to rename columns which are not present in the dataframe.
Missing columns: {'net_continuous_plant_capability', 'net_peak_demand_on_plant', 'maintenance_of_boiler_plant_steam_power_generation', 'year_plant_originally_constructed', 'net_generation_excluding_plant_use', 'plant_average_number_of_employees', 'plant_hours_connected_to_load', 'electric_expenses_steam_power_generation', 'miscellaneous_steam_power_expenses', 'net_continuous_plant_capability_not_limited_by_condenser_water', 'maintenance_of_electric_plant_steam_power_generation', 'plant_name', 'asset_retirement_costs_steam_production', 'coolants_and_water', 'cost_per_kilowatt_of_installed_capacity', 'plant_construction_type', 'expenses_per_net_kilowatt_hour', 'maintenance_supervision_and_engineering_steam_power_generation', 'power_production_expenses_steam_power', 'steam_expenses_steam_power_generation', 'allowances', 'maintenance_of_miscellaneous_steam_plant', 'net_continuous_plant_capability_limit

['steam_electric_generating_plant_statistics_large_plants_402_2021_c008999_baxter_wilson'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c008999_gerald_andrus'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c008999_independence'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c001330_unit_4'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c001330_units_13'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c000906_fort_martin'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c000906_harrison'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c001322_glenwood_steam'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c001322_port_jefferson_steam'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c001322_northport_steam'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c001322_ef_barrett_steam'
 'steam_electric_

plants_steam_ferc1: Found 500 duplicate record_ids: 
['steam_electric_generating_plant_statistics_large_plants_402_2021_c008999_baxter_wilson'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c008999_gerald_andrus'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c008999_independence'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c001330_unit_4'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c001330_units_13'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c000906_fort_martin'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c000906_harrison'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c001322_glenwood_steam'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c001322_port_jefferson_steam'
 'steam_electric_generating_plant_statistics_large_plants_402_2021_c001322_northport_steam'
 'steam_electric_generating_plant_statistics_large_plants

In [261]:
# View missing plant names
steam[steam['plant_name_ferc1'] == "0"]

Unnamed: 0,utility_id_ferc1_xbrl,start_date,end_date,fuel_kind_axis,plant_name_ferc1,average_cost_of_fuel_burned_per_kilowatt_hour_net_generation,average_cost_of_fuel_per_unit_burned,average_cost_of_fuel_burned_per_million_british_thermal_unit,fuel_burned_average_heat_content,fuel_kind,average_british_thermal_unit_per_kilowatt_hour_net_generation,quantity_of_fuel_burned,fuel_unit,average_cost_of_fuel_per_unit_as_delivered,report_year,record_id,utility_id_ferc1
1180,C011304,2021-01-01,2021-12-31,COAL,0,0.027,53.876,236.732,11517.0,COAL,10800.0,2357771.0,T,59.736,2021,steam_electric_generating_plant_statistics_lar...,444
1181,C011304,2021-01-01,2021-12-31,OIL,0,0.0,2.167,1367.21,136000.0,OIL,0.0,844331.0,bbl,2.276,2021,steam_electric_generating_plant_statistics_lar...,444
1231,C000447,2021-01-01,2021-12-31,Gas,0,0.0,0.0,0.0,,Gas,0.0,,Mcf,0.0,2021,steam_electric_generating_plant_statistics_lar...,185


#### Transform Step-by-Step

In [262]:
start = TRANSFORMER.transform_start(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"]
)

2023-01-31 16:07:02 [    INFO] catalystcoop.pudl.transform.ferc1:982 plants_steam_ferc1: Processing DBF data pre-concatenation.


plants_steam_ferc1: Processing DBF data pre-concatenation.


2023-01-31 16:07:02 [    INFO] catalystcoop.pudl.transform.classes:1194 plants_steam_ferc1: Attempting to rename 43 columns.


plants_steam_ferc1: Attempting to rename 43 columns.


2023-01-31 16:07:03 [    INFO] catalystcoop.pudl.transform.ferc1:1003 plants_steam_ferc1: Processing XBRL data pre-concatenation.


plants_steam_ferc1: Processing XBRL data pre-concatenation.


2023-01-31 16:07:03 [    INFO] catalystcoop.pudl.transform.classes:1194 plants_steam_ferc1: Attempting to rename 0 columns.


plants_steam_ferc1: Attempting to rename 0 columns.


2023-01-31 16:07:03 [    INFO] catalystcoop.pudl.transform.ferc1:1056 plants_steam_ferc1: Unstacking balances to the report years.


plants_steam_ferc1: Unstacking balances to the report years.


2023-01-31 16:07:03 [    INFO] catalystcoop.pudl.transform.classes:1194 plants_steam_ferc1: Attempting to rename 0 columns.


plants_steam_ferc1: Attempting to rename 0 columns.


2023-01-31 16:07:03 [    INFO] catalystcoop.pudl.transform.ferc1:1255 plants_steam_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


plants_steam_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


2023-01-31 16:07:03 [    INFO] catalystcoop.pudl.transform.ferc1:1165 plants_steam_ferc1: Both XBRL instant & duration tables found.


plants_steam_ferc1: Both XBRL instant & duration tables found.


2023-01-31 16:07:03 [    INFO] catalystcoop.pudl.transform.ferc1:1176 plants_steam_ferc1: Combining XBRL instant & duration tables using RIGHT-MERGE.


plants_steam_ferc1: Combining XBRL instant & duration tables using RIGHT-MERGE.


2023-01-31 16:07:03 [    INFO] catalystcoop.pudl.transform.classes:1194 plants_steam_ferc1: Attempting to rename 43 columns.


plants_steam_ferc1: Attempting to rename 43 columns.


2023-01-31 16:07:03 [    INFO] catalystcoop.pudl.transform.ferc1:819 plants_steam_ferc1: Concatenating DBF + XBRL dataframes.


plants_steam_ferc1: Concatenating DBF + XBRL dataframes.


In [263]:
start[start['record_id']=='f1_steam_1999_12_72_0_1']

Unnamed: 0,utility_id_ferc1_dbf,report_year,plant_name_ferc1,plant_type,construction_type,construction_year,installation_year,capacity_mw,peak_demand_mw,plant_hours_connected_while_generating,plant_capability_mw,not_water_limited_capacity_mw,water_limited_capacity_mw,avg_num_employees,net_generation_kwh,capex_land,capex_structures,capex_equipment,capex_total,capex_per_kw,opex_operations,opex_fuel,opex_coolants,opex_steam,opex_steam_other,opex_transfer,opex_electric,opex_misc_power,opex_rents,opex_allowances,opex_engineering,opex_structures,opex_boiler,opex_plants,opex_misc_steam,opex_production_total,opex_per_kwh,asset_retirement_cost,record_id,utility_id_ferc1,utility_id_ferc1_xbrl,date,start_date,end_date,plant_name,order_number
9645,72.0,1999,,Steam,Conventional,1955,1956,1303.56,1327.0,8760.0,,1284.0,,390.0,8531868000.0,673317.0,64561620.0,322548092.0,387783029.0,297.48,1154409.0,107221337.0,,4377325.0,,,2030686.0,4309783.0,2400.0,,425959.0,1277712.0,11216023.0,2529112.0,709251.0,135253997.0,0.0159,,f1_steam_1999_12_72_0_1,409,,,NaT,NaT,,


In [264]:
# Transform fuel table for use in 

TRANSFORMERF = ff
tablef = TRANSFORMERF.table_id.value

fuel = TRANSFORMERF.transform(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMERF.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMERF.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMERF.table_id.value]["duration"]
)

2023-01-31 16:07:12 [    INFO] catalystcoop.pudl.transform.ferc1:982 fuel_ferc1: Processing DBF data pre-concatenation.


fuel_ferc1: Processing DBF data pre-concatenation.


2023-01-31 16:07:12 [    INFO] catalystcoop.pudl.transform.classes:1194 fuel_ferc1: Attempting to rename 17 columns.


fuel_ferc1: Attempting to rename 17 columns.


['f1_fuel_2000_12_24_3_1'].


fuel_ferc1: Found 1 duplicate record_ids: 
['f1_fuel_2000_12_24_3_1'].


2023-01-31 16:07:13 [    INFO] catalystcoop.pudl.transform.classes:1267 fuel_ferc1: Converting units and renaming columns accordingly.


fuel_ferc1: Converting units and renaming columns accordingly.


2023-01-31 16:07:13 [    INFO] catalystcoop.pudl.transform.classes:1218 fuel_ferc1: Normalizing freeform string columns.


fuel_ferc1: Normalizing freeform string columns.


2023-01-31 16:07:14 [    INFO] catalystcoop.pudl.transform.classes:1242 fuel_ferc1: Categorizing string columns using a controlled vocabulary.


fuel_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-31 16:07:14 [    INFO] catalystcoop.pudl.transform.classes:1194 fuel_ferc1: Attempting to rename 0 columns.


fuel_ferc1: Attempting to rename 0 columns.


2023-01-31 16:07:14 [    INFO] catalystcoop.pudl.transform.ferc1:1056 fuel_ferc1: Unstacking balances to the report years.


fuel_ferc1: Unstacking balances to the report years.


2023-01-31 16:07:14 [    INFO] catalystcoop.pudl.transform.classes:1194 fuel_ferc1: Attempting to rename 0 columns.


fuel_ferc1: Attempting to rename 0 columns.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.ferc1:1255 fuel_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


fuel_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.ferc1:1159 fuel_ferc1: No XBRL instant table found.


fuel_ferc1: No XBRL instant table found.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:1194 fuel_ferc1: Attempting to rename 15 columns.


fuel_ferc1: Attempting to rename 15 columns.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:1267 fuel_ferc1: Converting units and renaming columns accordingly.


fuel_ferc1: Converting units and renaming columns accordingly.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:1218 fuel_ferc1: Normalizing freeform string columns.


fuel_ferc1: Normalizing freeform string columns.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:1242 fuel_ferc1: Categorizing string columns using a controlled vocabulary.


fuel_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.ferc1:1663 fuel_ferc1: Aggregating 30 rows with duplicate primary keys out of 1312 total rows.


fuel_ferc1: Aggregating 30 rows with duplicate primary keys out of 1312 total rows.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.ferc1:1667 fuel_ferc1: Dropping 98 records with inconsistent fuel units preventing aggregation out of 1312 total rows.


fuel_ferc1: Dropping 98 records with inconsistent fuel units preventing aggregation out of 1312 total rows.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.ferc1:819 fuel_ferc1: Concatenating DBF + XBRL dataframes.


fuel_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:1320 fuel_ferc1: Spot fixing missing values.


fuel_ferc1: Spot fixing missing values.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:1296 fuel_ferc1: Dropping remaining invalid rows.


fuel_ferc1: Dropping remaining invalid rows.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:823 68.7% of records (107454 rows) contain only {0, '', nan, <NA>} values in required columns. Dropped these 💩💩💩 records.


68.7% of records (107454 rows) contain only {0, '', nan, <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:823 0.4% of records (217 rows) contain only {'', '-', 'must 123', 'must 456', nan, '0', 'ant1-3', 'elk 1-3', 'not applicable', <NA>} values in required columns. Dropped these 💩💩💩 records.


0.4% of records (217 rows) contain only {'', '-', 'must 123', 'must 456', nan, '0', 'ant1-3', 'elk 1-3', 'not applicable', <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.ferc1:1737 fuel_ferc1: Dropping 0/48841rows representing plant-level all-fuel totals.


fuel_ferc1: Dropping 0/48841rows representing plant-level all-fuel totals.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:1283 fuel_ferc1: Correcting inferred non-standard column units.


fuel_ferc1: Correcting inferred non-standard column units.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==coal.


Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==coal.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:719 181/10820 (1.67%) of records could not be corrected and were set to NA.


181/10820 (1.67%) of records could not be corrected and were set to NA.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==gas.


Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==gas.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:719 860/15769 (5.45%) of records could not be corrected and were set to NA.


860/15769 (5.45%) of records could not be corrected and were set to NA.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==oil.


Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==oil.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:719 643/17623 (3.65%) of records could not be corrected and were set to NA.


643/17623 (3.65%) of records could not be corrected and were set to NA.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==coal.


Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==coal.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:719 1367/10820 (12.63%) of records could not be corrected and were set to NA.


1367/10820 (12.63%) of records could not be corrected and were set to NA.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==gas.


Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==gas.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:719 2071/15769 (13.13%) of records could not be corrected and were set to NA.


2071/15769 (13.13%) of records could not be corrected and were set to NA.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==oil.


Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==oil.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:719 8083/17623 (45.87%) of records could not be corrected and were set to NA.


8083/17623 (45.87%) of records could not be corrected and were set to NA.


2023-01-31 16:07:15 [    INFO] catalystcoop.pudl.transform.classes:1327 fuel_ferc1: Enforcing database schema on dataframe.


fuel_ferc1: Enforcing database schema on dataframe.


In [266]:
main = TRANSFORMER.transform_main(
    start, transformed_fuel = fuel
)

2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:1218 plants_steam_ferc1: Normalizing freeform string columns.


plants_steam_ferc1: Normalizing freeform string columns.


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:1242 plants_steam_ferc1: Categorizing string columns using a controlled vocabulary.


plants_steam_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:1267 plants_steam_ferc1: Converting units and renaming columns accordingly.


plants_steam_ferc1: Converting units and renaming columns accordingly.


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:1229 plants_steam_ferc1: Stripping non-numeric values from [].


plants_steam_ferc1: Stripping non-numeric values from [].


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:1256 plants_steam_ferc1: Nullifying outlying values.


plants_steam_ferc1: Nullifying outlying values.


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:1309 plants_steam_ferc1: Replacing specified values with NA.


plants_steam_ferc1: Replacing specified values with NA.


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:1320 plants_steam_ferc1: Spot fixing missing values.


plants_steam_ferc1: Spot fixing missing values.


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:886 plants_steam_ferc1: Spot fixing some values


plants_steam_ferc1: Spot fixing some values


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:886 plants_steam_ferc1: Spot fixing some values


plants_steam_ferc1: Spot fixing some values


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:886 plants_steam_ferc1: Spot fixing some values


plants_steam_ferc1: Spot fixing some values


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:886 plants_steam_ferc1: Spot fixing some values


plants_steam_ferc1: Spot fixing some values


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:886 plants_steam_ferc1: Spot fixing some values


plants_steam_ferc1: Spot fixing some values


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:886 plants_steam_ferc1: Spot fixing some values


plants_steam_ferc1: Spot fixing some values


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:886 plants_steam_ferc1: Spot fixing some values


plants_steam_ferc1: Spot fixing some values


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:886 plants_steam_ferc1: Spot fixing some values


plants_steam_ferc1: Spot fixing some values


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:886 plants_steam_ferc1: Spot fixing some values


plants_steam_ferc1: Spot fixing some values


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:886 plants_steam_ferc1: Spot fixing some values


plants_steam_ferc1: Spot fixing some values


2023-01-31 16:09:28 [    INFO] catalystcoop.pudl.transform.classes:1296 plants_steam_ferc1: Dropping remaining invalid rows.


plants_steam_ferc1: Dropping remaining invalid rows.


2023-01-31 16:09:29 [    INFO] catalystcoop.pudl.transform.classes:823 41.7% of records (22039 rows) contain only {0, '', nan, 'none', '0', <NA>} values in required columns. Dropped these 💩💩💩 records.


41.7% of records (22039 rows) contain only {0, '', nan, 'none', '0', <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-31 16:09:29 [    INFO] catalystcoop.pudl.transform.classes:823 0.2% of records (71 rows) contain only {'', '-', nan, '0', 'not applicable', <NA>} values in required columns. Dropped these 💩💩💩 records.


0.2% of records (71 rows) contain only {'', '-', nan, '0', 'not applicable', <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-31 16:09:29 [    INFO] catalystcoop.pudl.analysis.classify_plants_ferc1:399 Identifying distinct large FERC plants for ID assignment.


Identifying distinct large FERC plants for ID assignment.


2023-01-31 16:28:02 [    INFO] catalystcoop.pudl.analysis.classify_plants_ferc1:432 Successfully associated 22900 of 30710 (74.57%) FERC Form 1 plant records with multi-year plant entities.


Successfully associated 22900 of 30710 (74.57%) FERC Form 1 plant records with multi-year plant entities.


2023-01-31 16:28:02 [    INFO] catalystcoop.pudl.analysis.classify_plants_ferc1:445 Assigning IDs to multi-year FERC plant entities.


Assigning IDs to multi-year FERC plant entities.


2023-01-31 16:28:09 [    INFO] catalystcoop.pudl.analysis.classify_plants_ferc1:462 Identified 4949 orphaned FERC plant records. Adding orphans to list of plant entities.


Identified 4949 orphaned FERC plant records. Adding orphans to list of plant entities.


2023-01-31 16:28:14 [    INFO] catalystcoop.pudl.analysis.classify_plants_ferc1:484 Successfully Identified 2079 multi-year plant entities.


Successfully Identified 2079 multi-year plant entities.


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=1994 2 times in plant_id_ferc1=345


Found report_year=1994 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=1995 2 times in plant_id_ferc1=345


Found report_year=1995 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=1996 2 times in plant_id_ferc1=345


Found report_year=1996 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=1998 2 times in plant_id_ferc1=345


Found report_year=1998 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=1999 2 times in plant_id_ferc1=345


Found report_year=1999 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2000 2 times in plant_id_ferc1=345


Found report_year=2000 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2001 2 times in plant_id_ferc1=345


Found report_year=2001 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2002 2 times in plant_id_ferc1=345


Found report_year=2002 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2003 2 times in plant_id_ferc1=345


Found report_year=2003 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2004 2 times in plant_id_ferc1=345


Found report_year=2004 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2005 2 times in plant_id_ferc1=345


Found report_year=2005 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2006 2 times in plant_id_ferc1=345


Found report_year=2006 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2007 2 times in plant_id_ferc1=345


Found report_year=2007 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2008 2 times in plant_id_ferc1=345


Found report_year=2008 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2009 2 times in plant_id_ferc1=345


Found report_year=2009 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2010 2 times in plant_id_ferc1=345


Found report_year=2010 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2011 2 times in plant_id_ferc1=345


Found report_year=2011 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2012 2 times in plant_id_ferc1=345


Found report_year=2012 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2013 2 times in plant_id_ferc1=345


Found report_year=2013 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2014 2 times in plant_id_ferc1=345


Found report_year=2014 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2015 2 times in plant_id_ferc1=345


Found report_year=2015 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2016 2 times in plant_id_ferc1=345


Found report_year=2016 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2017 2 times in plant_id_ferc1=345


Found report_year=2017 2 times in plant_id_ferc1=345


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=1995 2 times in plant_id_ferc1=368


Found report_year=1995 2 times in plant_id_ferc1=368


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=1996 2 times in plant_id_ferc1=368


Found report_year=1996 2 times in plant_id_ferc1=368


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=1997 2 times in plant_id_ferc1=368


Found report_year=1997 2 times in plant_id_ferc1=368


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=1998 2 times in plant_id_ferc1=368


Found report_year=1998 2 times in plant_id_ferc1=368


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=1999 2 times in plant_id_ferc1=368


Found report_year=1999 2 times in plant_id_ferc1=368


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2000 2 times in plant_id_ferc1=368


Found report_year=2000 2 times in plant_id_ferc1=368


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2001 2 times in plant_id_ferc1=368


Found report_year=2001 2 times in plant_id_ferc1=368


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2002 2 times in plant_id_ferc1=368


Found report_year=2002 2 times in plant_id_ferc1=368


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2008 2 times in plant_id_ferc1=680


Found report_year=2008 2 times in plant_id_ferc1=680


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2005 2 times in plant_id_ferc1=888


Found report_year=2005 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2006 2 times in plant_id_ferc1=888


Found report_year=2006 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2007 2 times in plant_id_ferc1=888


Found report_year=2007 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2008 2 times in plant_id_ferc1=888


Found report_year=2008 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2009 2 times in plant_id_ferc1=888


Found report_year=2009 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2010 2 times in plant_id_ferc1=888


Found report_year=2010 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2011 2 times in plant_id_ferc1=888


Found report_year=2011 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2012 2 times in plant_id_ferc1=888


Found report_year=2012 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2013 2 times in plant_id_ferc1=888


Found report_year=2013 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2014 2 times in plant_id_ferc1=888


Found report_year=2014 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2015 2 times in plant_id_ferc1=888


Found report_year=2015 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2016 2 times in plant_id_ferc1=888


Found report_year=2016 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2017 2 times in plant_id_ferc1=888


Found report_year=2017 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2018 2 times in plant_id_ferc1=888


Found report_year=2018 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2019 2 times in plant_id_ferc1=888


Found report_year=2019 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2020 2 times in plant_id_ferc1=888


Found report_year=2020 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2021 2 times in plant_id_ferc1=888


Found report_year=2021 2 times in plant_id_ferc1=888


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=1995 2 times in plant_id_ferc1=1094


Found report_year=1995 2 times in plant_id_ferc1=1094


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2000 2 times in plant_id_ferc1=1186


Found report_year=2000 2 times in plant_id_ferc1=1186


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2019 2 times in plant_id_ferc1=1281


Found report_year=2019 2 times in plant_id_ferc1=1281


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2020 2 times in plant_id_ferc1=1281


Found report_year=2020 2 times in plant_id_ferc1=1281


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2021 2 times in plant_id_ferc1=1281


Found report_year=2021 2 times in plant_id_ferc1=1281


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2004 2 times in plant_id_ferc1=1567


Found report_year=2004 2 times in plant_id_ferc1=1567


2023-01-31 16:28:26 [   ERROR] catalystcoop.pudl.analysis.classify_plants_ferc1:594 Found report_year=2005 2 times in plant_id_ferc1=1567


Found report_year=2005 2 times in plant_id_ferc1=1567


In [269]:
# Test it works!
main[main['record_id'].str.contains('f1_steam_1999_12_72_0_1')] #clifty creek
main[main['record_id'].str.contains('f1_steam_1998_12_64_0_1')] #hardee power station


Unnamed: 0,utility_id_ferc1_dbf,report_year,capacity_mw,peak_demand_mw,plant_hours_connected_while_generating,plant_capability_mw,not_water_limited_capacity_mw,water_limited_capacity_mw,avg_num_employees,capex_land,capex_structures,capex_equipment,capex_total,opex_operations,opex_fuel,opex_coolants,opex_steam,opex_steam_other,opex_transfer,opex_electric,opex_misc_power,opex_rents,opex_allowances,opex_engineering,opex_structures,opex_boiler,opex_plants,opex_misc_steam,opex_production_total,asset_retirement_cost,record_id,utility_id_ferc1,utility_id_ferc1_xbrl,date,start_date,end_date,plant_name,order_number,plant_name_ferc1,construction_type,plant_type,capex_per_mw,opex_per_mwh,net_generation_mwh,construction_year,installation_year,plant_id_ferc1
6370,64.0,1998,349.0,313.0,4649.0,295.0,295.0,,,,30484465.0,176340418.0,206824883.0,145058.0,21212847.0,,,,,1429184.0,,212211.0,,141832.0,149465.0,,699439.0,,23990036.0,,f1_steam_1998_12_64_0_1,253,,,NaT,NaT,,,hardee power station,outdoor,combustion_turbine,592621.4,26.2,916579.0,1992.0,1992.0,249


In [203]:
start.loc[start['record_id']=="f1_steam_2001_12_204_0_1", 'plant_name_ferc1'] = "TEST"
start.loc[start['record_id']=="f1_steam_2001_12_204_0_1", 'plant_name_ferc1']

14375    TEST
Name: plant_name_ferc1, dtype: object

In [213]:
df = start
for i in SpotFixValues['spot_fix']:
    idcol = list(i.keys())[0] # Get name of identifying column
    
    if idcol in list(df.columns): # For each row to be spot-fixed
        for key in i['fixes']: # For each fix
            if key in list(df.columns):
                df.loc[df[idcol]==i[idcol], key] = i['fixes'][key] # Manually update value
    else:
        # Could log error here
        pass

print(df.loc[df['plant_name_ferc1']=='seabrook'])


       utility_id_ferc1_dbf  report_year plant_name_ferc1 plant_type construction_type construction_year installation_year  capacity_mw  peak_demand_mw  plant_hours_connected_while_generating  plant_capability_mw  not_water_limited_capacity_mw  water_limited_capacity_mw  avg_num_employees  net_generation_kwh  capex_land  capex_structures  capex_equipment  capex_total  capex_per_kw  opex_operations  opex_fuel  opex_coolants  opex_steam  opex_steam_other  opex_transfer  opex_electric  opex_misc_power  opex_rents  opex_allowances  opex_engineering  opex_structures  opex_boiler  opex_plants  opex_misc_steam  opex_production_total  opex_per_kwh  asset_retirement_cost                 record_id  utility_id_ferc1 utility_id_ferc1_xbrl date start_date end_date plant_name  order_number
11185                 204.0         2000         seabrook    Nuclear   Fully contained              1990              1990         26.0            25.0                                   150.0                  NaN 

In [None]:
def spot_fix_values(self, df: pd.DataFrame, params: SpotFixValues) -> pd.DataFrame:
        """Manually fix one-off singular missing values.

        With our new ferc1 transform process we are less intense about dropping records.
        Because of that, a fair amount of records have "" or 0 as a plant name. 
        Most of these records have no other data points in them and thus can be dropped.
        But some of them actually have some information. Manual investigation of some
        of these records led to some pretty easy identification of plant names. This
        function takes a dictionary of these fixes and applies them to the dataframe.

        There are probably plenty of other spot fixes one could add here.

        Params:
            df: Pre-processed, concatenated XBRL and DBF data.
            params: an instance of :class:`SpotFixValues`

        Returns:
            The same input DataFrame but with some spot fixes corrected.
        """
        logger.info(f"{self.table_id.value}: Spot fixing some values")
        
        
        # Define rows and columns to change
        for i in params:
            idcol = list(i.keys())[0] # Get name of identifying column
    
            if idcol in list(df.columns): # For each row to be spot-fixed
                for key in i['fixes']: # For each fix
                    if key in list(df.columns):
                        df.loc[df[idcol]==i[idcol], key] = i['fixes'][key] # Manually update value
            else:
                # Could log error here
                pass

        return df

In [97]:
end = TRANSFORMER.transform_end(
    main
)
end[end['record_id']=='f1_steam_2014_12_276_0_1']

2023-01-31 10:20:15 [    INFO] catalystcoop.pudl.transform.classes:1264 plants_steam_ferc1: Enforcing database schema on dataframe.


plants_steam_ferc1: Enforcing database schema on dataframe.


KeyError: 'record_id_ferc1'

In [104]:
end[end['plant_name_ferc1']=='0']

Unnamed: 0,record_id,utility_id_ferc1,report_year,plant_id_ferc1,plant_name_ferc1,plant_type,construction_type,construction_year,installation_year,capacity_mw,peak_demand_mw,plant_hours_connected_while_generating,plant_capability_mw,water_limited_capacity_mw,not_water_limited_capacity_mw,avg_num_employees,net_generation_mwh,capex_land,capex_structures,capex_equipment,capex_total,capex_per_mw,opex_operations,opex_fuel,opex_coolants,opex_steam,opex_steam_other,opex_transfer,opex_electric,opex_misc_power,opex_rents,opex_allowances,opex_engineering,opex_structures,opex_boiler,opex_plants,opex_misc_steam,opex_production_total,opex_per_mwh,asset_retirement_cost


#### Transform All Steps Together

In [35]:
full = TRANSFORMER.transform(
    raw_dbf=ferc1_dbf_raw_dfs[TRANSFORMER.table_id.value],
    raw_xbrl_instant=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["instant"],
    raw_xbrl_duration=ferc1_xbrl_raw_dfs[TRANSFORMER.table_id.value]["duration"]
)

2023-01-27 11:07:05 [    INFO] catalystcoop.pudl.transform.ferc1:981 fuel_ferc1: Processing DBF data pre-concatenation.


fuel_ferc1: Processing DBF data pre-concatenation.


2023-01-27 11:07:06 [    INFO] catalystcoop.pudl.transform.classes:1144 fuel_ferc1: Attempting to rename 17 columns.


fuel_ferc1: Attempting to rename 17 columns.


['f1_fuel_2000_12_24_3_1'].


fuel_ferc1: Found 1 duplicate record_ids: 
['f1_fuel_2000_12_24_3_1'].


2023-01-27 11:07:06 [    INFO] catalystcoop.pudl.transform.classes:1217 fuel_ferc1: Converting units and renaming columns accordingly.


fuel_ferc1: Converting units and renaming columns accordingly.


2023-01-27 11:07:06 [    INFO] catalystcoop.pudl.transform.classes:1168 fuel_ferc1: Normalizing freeform string columns.


fuel_ferc1: Normalizing freeform string columns.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:1192 fuel_ferc1: Categorizing string columns using a controlled vocabulary.


fuel_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:1144 fuel_ferc1: Attempting to rename 0 columns.


fuel_ferc1: Attempting to rename 0 columns.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.ferc1:1055 fuel_ferc1: Unstacking balances to the report years.


fuel_ferc1: Unstacking balances to the report years.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:1144 fuel_ferc1: Attempting to rename 0 columns.


fuel_ferc1: Attempting to rename 0 columns.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.ferc1:1254 fuel_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


fuel_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.ferc1:1158 fuel_ferc1: No XBRL instant table found.


fuel_ferc1: No XBRL instant table found.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:1144 fuel_ferc1: Attempting to rename 15 columns.


fuel_ferc1: Attempting to rename 15 columns.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:1217 fuel_ferc1: Converting units and renaming columns accordingly.


fuel_ferc1: Converting units and renaming columns accordingly.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:1168 fuel_ferc1: Normalizing freeform string columns.


fuel_ferc1: Normalizing freeform string columns.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:1192 fuel_ferc1: Categorizing string columns using a controlled vocabulary.


fuel_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.ferc1:1658 fuel_ferc1: Aggregating 30 rows with duplicate primary keys out of 1312 total rows.


fuel_ferc1: Aggregating 30 rows with duplicate primary keys out of 1312 total rows.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.ferc1:1662 fuel_ferc1: Dropping 98 records with inconsistent fuel units preventing aggregation out of 1312 total rows.


fuel_ferc1: Dropping 98 records with inconsistent fuel units preventing aggregation out of 1312 total rows.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.ferc1:819 fuel_ferc1: Concatenating DBF + XBRL dataframes.


fuel_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:1246 fuel_ferc1: Dropping remaining invalid rows.


fuel_ferc1: Dropping remaining invalid rows.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:823 68.7% of records (107454 rows) contain only {0, nan, '', <NA>} values in required columns. Dropped these 💩💩💩 records.


68.7% of records (107454 rows) contain only {0, nan, '', <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:823 0.4% of records (217 rows) contain only {'', nan, 'elk 1-3', '-', 'must 123', 'ant1-3', 'not applicable', 'must 456', '0', <NA>} values in required columns. Dropped these 💩💩💩 records.


0.4% of records (217 rows) contain only {'', nan, 'elk 1-3', '-', 'must 123', 'ant1-3', 'not applicable', 'must 456', '0', <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.ferc1:1732 fuel_ferc1: Dropping 0/48841rows representing plant-level all-fuel totals.


fuel_ferc1: Dropping 0/48841rows representing plant-level all-fuel totals.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:1233 fuel_ferc1: Correcting inferred non-standard column units.


fuel_ferc1: Correcting inferred non-standard column units.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==coal.


Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==coal.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:719 181/10820 (1.67%) of records could not be corrected and were set to NA.


181/10820 (1.67%) of records could not be corrected and were set to NA.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==gas.


Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==gas.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:719 860/15769 (5.45%) of records could not be corrected and were set to NA.


860/15769 (5.45%) of records could not be corrected and were set to NA.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==oil.


Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==oil.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:719 643/17623 (3.65%) of records could not be corrected and were set to NA.


643/17623 (3.65%) of records could not be corrected and were set to NA.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==coal.


Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==coal.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:719 1367/10820 (12.63%) of records could not be corrected and were set to NA.


1367/10820 (12.63%) of records could not be corrected and were set to NA.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==gas.


Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==gas.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:719 2071/15769 (13.13%) of records could not be corrected and were set to NA.


2071/15769 (13.13%) of records could not be corrected and were set to NA.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==oil.


Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==oil.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:719 8083/17623 (45.87%) of records could not be corrected and were set to NA.


8083/17623 (45.87%) of records could not be corrected and were set to NA.


2023-01-27 11:07:07 [    INFO] catalystcoop.pudl.transform.classes:1264 fuel_ferc1: Enforcing database schema on dataframe.


fuel_ferc1: Enforcing database schema on dataframe.


In [65]:
len(full)

48841

### Transform All Tables

In [8]:
transformed_tables = {}

for transformer in transformers:
    transformed_tables[transformer.table_id.value] = transformer.transform(
        raw_dbf=ferc1_dbf_raw_dfs[transformer.table_id.value],
        raw_xbrl_instant=ferc1_xbrl_raw_dfs[transformer.table_id.value]["instant"],
        raw_xbrl_duration=ferc1_xbrl_raw_dfs[transformer.table_id.value]["duration"]
    )

2023-01-27 16:22:15 [    INFO] catalystcoop.pudl.transform.ferc1:1072 balance_sheet_assets_ferc1: Processing DBF data pre-concatenation.


balance_sheet_assets_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:15 [    INFO] catalystcoop.pudl.transform.ferc1:1135 balance_sheet_assets_ferc1: After selection only annual records, we have 40.6% of the original table.


balance_sheet_assets_ferc1: After selection only annual records, we have 40.6% of the original table.


2023-01-27 16:22:15 [    INFO] catalystcoop.pudl.transform.ferc1:348 Aligning row numbers from DBF row to XBRL map for ['f1_comp_balance_db']


Aligning row numbers from DBF row to XBRL map for ['f1_comp_balance_db']


2023-01-27 16:22:15 [    INFO] catalystcoop.pudl.transform.classes:1144 balance_sheet_assets_ferc1: Attempting to rename 10 columns.


balance_sheet_assets_ferc1: Attempting to rename 10 columns.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.ferc1:1062 balance_sheet_assets_ferc1: Dropping rows where primary key and data columns are duplicated.


balance_sheet_assets_ferc1: Dropping rows where primary key and data columns are duplicated.


  dupes_w_possible_unique_data.loc[
2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.ferc1:328 Dropped 2761 duplicate records: 1.2% of total rows.


Dropped 2761 duplicate records: 1.2% of total rows.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.ferc1:1093 balance_sheet_assets_ferc1: Processing XBRL data pre-concatenation.


balance_sheet_assets_ferc1: Processing XBRL data pre-concatenation.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.classes:1144 balance_sheet_assets_ferc1: Attempting to rename 0 columns.


balance_sheet_assets_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.ferc1:1147 balance_sheet_assets_ferc1: Unstacking balances to the report years.


balance_sheet_assets_ferc1: Unstacking balances to the report years.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.ferc1:1269 balance_sheet_assets_ferc1: No XBRL duration table found.


balance_sheet_assets_ferc1: No XBRL duration table found.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.ferc1:1185 balance_sheet_assets_ferc1: applying wide_to_tidy for xbrl


balance_sheet_assets_ferc1: applying wide_to_tidy for xbrl


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.classes:1144 balance_sheet_assets_ferc1: Attempting to rename 3 columns.


balance_sheet_assets_ferc1: Attempting to rename 3 columns.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.ferc1:910 balance_sheet_assets_ferc1: Concatenating DBF + XBRL dataframes.


balance_sheet_assets_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.classes:1168 balance_sheet_assets_ferc1: Normalizing freeform string columns.


balance_sheet_assets_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.classes:1192 balance_sheet_assets_ferc1: Categorizing string columns using a controlled vocabulary.


balance_sheet_assets_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.classes:1217 balance_sheet_assets_ferc1: Converting units and renaming columns accordingly.


balance_sheet_assets_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.classes:1179 balance_sheet_assets_ferc1: Stripping non-numeric values from [].


balance_sheet_assets_ferc1: Stripping non-numeric values from [].


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.classes:1206 balance_sheet_assets_ferc1: Nullifying outlying values.


balance_sheet_assets_ferc1: Nullifying outlying values.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.classes:1259 balance_sheet_assets_ferc1: Replacing specified values with NA.


balance_sheet_assets_ferc1: Replacing specified values with NA.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.classes:1246 balance_sheet_assets_ferc1: Dropping remaining invalid rows.


balance_sheet_assets_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.ferc1:1032 balance_sheet_assets_ferc1: Merging metadata


balance_sheet_assets_ferc1: Merging metadata


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.classes:1264 balance_sheet_assets_ferc1: Enforcing database schema on dataframe.


balance_sheet_assets_ferc1: Enforcing database schema on dataframe.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.ferc1:1072 balance_sheet_liabilities_ferc1: Processing DBF data pre-concatenation.


balance_sheet_liabilities_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.ferc1:1135 balance_sheet_liabilities_ferc1: After selection only annual records, we have 39.1% of the original table.


balance_sheet_liabilities_ferc1: After selection only annual records, we have 39.1% of the original table.


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.ferc1:348 Aligning row numbers from DBF row to XBRL map for ['f1_bal_sheet_cr']


Aligning row numbers from DBF row to XBRL map for ['f1_bal_sheet_cr']


2023-01-27 16:22:16 [    INFO] catalystcoop.pudl.transform.classes:1144 balance_sheet_liabilities_ferc1: Attempting to rename 12 columns.


balance_sheet_liabilities_ferc1: Attempting to rename 12 columns.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.ferc1:1062 balance_sheet_liabilities_ferc1: Dropping rows where primary key and data columns are duplicated.


balance_sheet_liabilities_ferc1: Dropping rows where primary key and data columns are duplicated.


  dupes_w_possible_unique_data.loc[
2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.ferc1:328 Dropped 1311 duplicate records: 0.7% of total rows.


Dropped 1311 duplicate records: 0.7% of total rows.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.ferc1:1093 balance_sheet_liabilities_ferc1: Processing XBRL data pre-concatenation.


balance_sheet_liabilities_ferc1: Processing XBRL data pre-concatenation.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.classes:1144 balance_sheet_liabilities_ferc1: Attempting to rename 0 columns.


balance_sheet_liabilities_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.ferc1:1147 balance_sheet_liabilities_ferc1: Unstacking balances to the report years.


balance_sheet_liabilities_ferc1: Unstacking balances to the report years.




Dropping unexpected years: [2019]


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.ferc1:1269 balance_sheet_liabilities_ferc1: No XBRL duration table found.


balance_sheet_liabilities_ferc1: No XBRL duration table found.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.ferc1:1185 balance_sheet_liabilities_ferc1: applying wide_to_tidy for xbrl


balance_sheet_liabilities_ferc1: applying wide_to_tidy for xbrl


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.classes:1144 balance_sheet_liabilities_ferc1: Attempting to rename 3 columns.


balance_sheet_liabilities_ferc1: Attempting to rename 3 columns.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.ferc1:910 balance_sheet_liabilities_ferc1: Concatenating DBF + XBRL dataframes.


balance_sheet_liabilities_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.classes:1168 balance_sheet_liabilities_ferc1: Normalizing freeform string columns.


balance_sheet_liabilities_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.classes:1192 balance_sheet_liabilities_ferc1: Categorizing string columns using a controlled vocabulary.


balance_sheet_liabilities_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.classes:1217 balance_sheet_liabilities_ferc1: Converting units and renaming columns accordingly.


balance_sheet_liabilities_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.classes:1179 balance_sheet_liabilities_ferc1: Stripping non-numeric values from [].


balance_sheet_liabilities_ferc1: Stripping non-numeric values from [].


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.classes:1206 balance_sheet_liabilities_ferc1: Nullifying outlying values.


balance_sheet_liabilities_ferc1: Nullifying outlying values.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.classes:1259 balance_sheet_liabilities_ferc1: Replacing specified values with NA.


balance_sheet_liabilities_ferc1: Replacing specified values with NA.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.classes:1246 balance_sheet_liabilities_ferc1: Dropping remaining invalid rows.


balance_sheet_liabilities_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.ferc1:1032 balance_sheet_liabilities_ferc1: Merging metadata


balance_sheet_liabilities_ferc1: Merging metadata


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.classes:1264 balance_sheet_liabilities_ferc1: Enforcing database schema on dataframe.


balance_sheet_liabilities_ferc1: Enforcing database schema on dataframe.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.ferc1:1072 depreciation_amortization_summary_ferc1: Processing DBF data pre-concatenation.


depreciation_amortization_summary_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.ferc1:348 Aligning row numbers from DBF row to XBRL map for ['f1_dacs_epda']


Aligning row numbers from DBF row to XBRL map for ['f1_dacs_epda']


2023-01-27 16:22:17 [    INFO] catalystcoop.pudl.transform.classes:1144 depreciation_amortization_summary_ferc1: Attempting to rename 13 columns.


depreciation_amortization_summary_ferc1: Attempting to rename 13 columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1185 depreciation_amortization_summary_ferc1: applying wide_to_tidy for dbf


depreciation_amortization_summary_ferc1: applying wide_to_tidy for dbf


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1093 depreciation_amortization_summary_ferc1: Processing XBRL data pre-concatenation.


depreciation_amortization_summary_ferc1: Processing XBRL data pre-concatenation.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1144 depreciation_amortization_summary_ferc1: Attempting to rename 0 columns.


depreciation_amortization_summary_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1147 depreciation_amortization_summary_ferc1: Unstacking balances to the report years.


depreciation_amortization_summary_ferc1: Unstacking balances to the report years.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1144 depreciation_amortization_summary_ferc1: Attempting to rename 6 columns.


depreciation_amortization_summary_ferc1: Attempting to rename 6 columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1374 depreciation_amortization_summary_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


depreciation_amortization_summary_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1266 depreciation_amortization_summary_ferc1: No XBRL instant table found.


depreciation_amortization_summary_ferc1: No XBRL instant table found.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1185 depreciation_amortization_summary_ferc1: applying wide_to_tidy for xbrl


depreciation_amortization_summary_ferc1: applying wide_to_tidy for xbrl


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1144 depreciation_amortization_summary_ferc1: Attempting to rename 3 columns.


depreciation_amortization_summary_ferc1: Attempting to rename 3 columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:910 depreciation_amortization_summary_ferc1: Concatenating DBF + XBRL dataframes.


depreciation_amortization_summary_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1168 depreciation_amortization_summary_ferc1: Normalizing freeform string columns.


depreciation_amortization_summary_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1192 depreciation_amortization_summary_ferc1: Categorizing string columns using a controlled vocabulary.


depreciation_amortization_summary_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1217 depreciation_amortization_summary_ferc1: Converting units and renaming columns accordingly.


depreciation_amortization_summary_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1179 depreciation_amortization_summary_ferc1: Stripping non-numeric values from [].


depreciation_amortization_summary_ferc1: Stripping non-numeric values from [].


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1206 depreciation_amortization_summary_ferc1: Nullifying outlying values.


depreciation_amortization_summary_ferc1: Nullifying outlying values.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1259 depreciation_amortization_summary_ferc1: Replacing specified values with NA.


depreciation_amortization_summary_ferc1: Replacing specified values with NA.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1246 depreciation_amortization_summary_ferc1: Dropping remaining invalid rows.


depreciation_amortization_summary_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:3348 depreciation_amortization_summary_ferc1: merging metadata


depreciation_amortization_summary_ferc1: merging metadata


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1264 depreciation_amortization_summary_ferc1: Enforcing database schema on dataframe.


depreciation_amortization_summary_ferc1: Enforcing database schema on dataframe.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1072 electric_energy_dispositions_ferc1: Processing DBF data pre-concatenation.


electric_energy_dispositions_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:348 Aligning row numbers from DBF row to XBRL map for ['f1_elctrc_erg_acct']


Aligning row numbers from DBF row to XBRL map for ['f1_elctrc_erg_acct']


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_energy_dispositions_ferc1: Attempting to rename 10 columns.


electric_energy_dispositions_ferc1: Attempting to rename 10 columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1093 electric_energy_dispositions_ferc1: Processing XBRL data pre-concatenation.


electric_energy_dispositions_ferc1: Processing XBRL data pre-concatenation.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_energy_dispositions_ferc1: Attempting to rename 0 columns.


electric_energy_dispositions_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1147 electric_energy_dispositions_ferc1: Unstacking balances to the report years.


electric_energy_dispositions_ferc1: Unstacking balances to the report years.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_energy_dispositions_ferc1: Attempting to rename 8 columns.


electric_energy_dispositions_ferc1: Attempting to rename 8 columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1374 electric_energy_dispositions_ferc1: After selection of dates based on the report year, we have 58.7% of the original table.


electric_energy_dispositions_ferc1: After selection of dates based on the report year, we have 58.7% of the original table.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1266 electric_energy_dispositions_ferc1: No XBRL instant table found.


electric_energy_dispositions_ferc1: No XBRL instant table found.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1185 electric_energy_dispositions_ferc1: applying wide_to_tidy for xbrl


electric_energy_dispositions_ferc1: applying wide_to_tidy for xbrl


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_energy_dispositions_ferc1: Attempting to rename 4 columns.


electric_energy_dispositions_ferc1: Attempting to rename 4 columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:910 electric_energy_dispositions_ferc1: Concatenating DBF + XBRL dataframes.


electric_energy_dispositions_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1168 electric_energy_dispositions_ferc1: Normalizing freeform string columns.


electric_energy_dispositions_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1192 electric_energy_dispositions_ferc1: Categorizing string columns using a controlled vocabulary.


electric_energy_dispositions_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1217 electric_energy_dispositions_ferc1: Converting units and renaming columns accordingly.


electric_energy_dispositions_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1179 electric_energy_dispositions_ferc1: Stripping non-numeric values from [].


electric_energy_dispositions_ferc1: Stripping non-numeric values from [].


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1206 electric_energy_dispositions_ferc1: Nullifying outlying values.


electric_energy_dispositions_ferc1: Nullifying outlying values.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1259 electric_energy_dispositions_ferc1: Replacing specified values with NA.


electric_energy_dispositions_ferc1: Replacing specified values with NA.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1246 electric_energy_dispositions_ferc1: Dropping remaining invalid rows.


electric_energy_dispositions_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:823 65.7% of records (46625 rows) contain only {'', <NA>, nan} values in required columns. Dropped these 💩💩💩 records.


65.7% of records (46625 rows) contain only {'', <NA>, nan} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1032 electric_energy_dispositions_ferc1: Merging metadata


electric_energy_dispositions_ferc1: Merging metadata


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1264 electric_energy_dispositions_ferc1: Enforcing database schema on dataframe.


electric_energy_dispositions_ferc1: Enforcing database schema on dataframe.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1072 electric_energy_sources_ferc1: Processing DBF data pre-concatenation.


electric_energy_sources_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:348 Aligning row numbers from DBF row to XBRL map for ['f1_elctrc_erg_acct']


Aligning row numbers from DBF row to XBRL map for ['f1_elctrc_erg_acct']


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_energy_sources_ferc1: Attempting to rename 10 columns.


electric_energy_sources_ferc1: Attempting to rename 10 columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1093 electric_energy_sources_ferc1: Processing XBRL data pre-concatenation.


electric_energy_sources_ferc1: Processing XBRL data pre-concatenation.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_energy_sources_ferc1: Attempting to rename 0 columns.


electric_energy_sources_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1147 electric_energy_sources_ferc1: Unstacking balances to the report years.


electric_energy_sources_ferc1: Unstacking balances to the report years.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_energy_sources_ferc1: Attempting to rename 17 columns.


electric_energy_sources_ferc1: Attempting to rename 17 columns.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1374 electric_energy_sources_ferc1: After selection of dates based on the report year, we have 58.7% of the original table.


electric_energy_sources_ferc1: After selection of dates based on the report year, we have 58.7% of the original table.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1266 electric_energy_sources_ferc1: No XBRL instant table found.


electric_energy_sources_ferc1: No XBRL instant table found.


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.ferc1:1185 electric_energy_sources_ferc1: applying wide_to_tidy for xbrl


electric_energy_sources_ferc1: applying wide_to_tidy for xbrl


2023-01-27 16:22:18 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_energy_sources_ferc1: Attempting to rename 4 columns.


electric_energy_sources_ferc1: Attempting to rename 4 columns.


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.ferc1:910 electric_energy_sources_ferc1: Concatenating DBF + XBRL dataframes.


electric_energy_sources_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.classes:1168 electric_energy_sources_ferc1: Normalizing freeform string columns.


electric_energy_sources_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.classes:1192 electric_energy_sources_ferc1: Categorizing string columns using a controlled vocabulary.


electric_energy_sources_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.classes:1217 electric_energy_sources_ferc1: Converting units and renaming columns accordingly.


electric_energy_sources_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.classes:1179 electric_energy_sources_ferc1: Stripping non-numeric values from [].


electric_energy_sources_ferc1: Stripping non-numeric values from [].


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.classes:1206 electric_energy_sources_ferc1: Nullifying outlying values.


electric_energy_sources_ferc1: Nullifying outlying values.


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.classes:1259 electric_energy_sources_ferc1: Replacing specified values with NA.


electric_energy_sources_ferc1: Replacing specified values with NA.


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.classes:1246 electric_energy_sources_ferc1: Dropping remaining invalid rows.


electric_energy_sources_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.classes:823 52.2% of records (37927 rows) contain only {'', <NA>, nan} values in required columns. Dropped these 💩💩💩 records.


52.2% of records (37927 rows) contain only {'', <NA>, nan} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.ferc1:1032 electric_energy_sources_ferc1: Merging metadata


electric_energy_sources_ferc1: Merging metadata


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.classes:1264 electric_energy_sources_ferc1: Enforcing database schema on dataframe.


electric_energy_sources_ferc1: Enforcing database schema on dataframe.


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.ferc1:3495 Heyyyy dropping that one row


Heyyyy dropping that one row


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.ferc1:1072 electric_opex_ferc1: Processing DBF data pre-concatenation.


electric_opex_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.ferc1:348 Aligning row numbers from DBF row to XBRL map for ['f1_elc_op_mnt_expn']


Aligning row numbers from DBF row to XBRL map for ['f1_elc_op_mnt_expn']


2023-01-27 16:22:19 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_opex_ferc1: Attempting to rename 9 columns.


electric_opex_ferc1: Attempting to rename 9 columns.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.ferc1:1062 electric_opex_ferc1: Dropping rows where primary key and data columns are duplicated.


electric_opex_ferc1: Dropping rows where primary key and data columns are duplicated.


  dupes_w_possible_unique_data.loc[
2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.ferc1:328 Dropped 103 duplicate records: 0.0% of total rows.


Dropped 103 duplicate records: 0.0% of total rows.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.ferc1:1093 electric_opex_ferc1: Processing XBRL data pre-concatenation.


electric_opex_ferc1: Processing XBRL data pre-concatenation.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_opex_ferc1: Attempting to rename 0 columns.


electric_opex_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.ferc1:1147 electric_opex_ferc1: Unstacking balances to the report years.


electric_opex_ferc1: Unstacking balances to the report years.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_opex_ferc1: Attempting to rename 171 columns.


electric_opex_ferc1: Attempting to rename 171 columns.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.ferc1:1374 electric_opex_ferc1: After selection of dates based on the report year, we have 50.5% of the original table.


electric_opex_ferc1: After selection of dates based on the report year, we have 50.5% of the original table.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.ferc1:1266 electric_opex_ferc1: No XBRL instant table found.


electric_opex_ferc1: No XBRL instant table found.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.ferc1:1185 electric_opex_ferc1: applying wide_to_tidy for xbrl


electric_opex_ferc1: applying wide_to_tidy for xbrl


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_opex_ferc1: Attempting to rename 3 columns.


electric_opex_ferc1: Attempting to rename 3 columns.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.ferc1:910 electric_opex_ferc1: Concatenating DBF + XBRL dataframes.


electric_opex_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.classes:1168 electric_opex_ferc1: Normalizing freeform string columns.


electric_opex_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.classes:1192 electric_opex_ferc1: Categorizing string columns using a controlled vocabulary.


electric_opex_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.classes:1217 electric_opex_ferc1: Converting units and renaming columns accordingly.


electric_opex_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.classes:1179 electric_opex_ferc1: Stripping non-numeric values from [].


electric_opex_ferc1: Stripping non-numeric values from [].


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.classes:1206 electric_opex_ferc1: Nullifying outlying values.


electric_opex_ferc1: Nullifying outlying values.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.classes:1259 electric_opex_ferc1: Replacing specified values with NA.


electric_opex_ferc1: Replacing specified values with NA.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.classes:1246 electric_opex_ferc1: Dropping remaining invalid rows.


electric_opex_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.ferc1:1032 electric_opex_ferc1: Merging metadata


electric_opex_ferc1: Merging metadata


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.classes:1264 electric_opex_ferc1: Enforcing database schema on dataframe.


electric_opex_ferc1: Enforcing database schema on dataframe.


2023-01-27 16:22:21 [    INFO] catalystcoop.pudl.transform.ferc1:1072 electric_plant_depreciation_changes_ferc1: Processing DBF data pre-concatenation.


electric_plant_depreciation_changes_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.ferc1:348 Aligning row numbers from DBF row to XBRL map for ['f1_accumdepr_prvsn']


Aligning row numbers from DBF row to XBRL map for ['f1_accumdepr_prvsn']


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_plant_depreciation_changes_ferc1: Attempting to rename 12 columns.


electric_plant_depreciation_changes_ferc1: Attempting to rename 12 columns.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.ferc1:1185 electric_plant_depreciation_changes_ferc1: applying wide_to_tidy for dbf


electric_plant_depreciation_changes_ferc1: applying wide_to_tidy for dbf


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.ferc1:1093 electric_plant_depreciation_changes_ferc1: Processing XBRL data pre-concatenation.


electric_plant_depreciation_changes_ferc1: Processing XBRL data pre-concatenation.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.ferc1:1147 electric_plant_depreciation_changes_ferc1: Unstacking balances to the report years.


electric_plant_depreciation_changes_ferc1: Unstacking balances to the report years.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_plant_depreciation_changes_ferc1: Attempting to rename 2 columns.


electric_plant_depreciation_changes_ferc1: Attempting to rename 2 columns.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_plant_depreciation_changes_ferc1: Attempting to rename 13 columns.


electric_plant_depreciation_changes_ferc1: Attempting to rename 13 columns.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.ferc1:1374 electric_plant_depreciation_changes_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


electric_plant_depreciation_changes_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.ferc1:1272 electric_plant_depreciation_changes_ferc1: Both XBRL instant & duration tables found.


electric_plant_depreciation_changes_ferc1: Both XBRL instant & duration tables found.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.ferc1:1309 electric_plant_depreciation_changes_ferc1: Combining XBRL instant & duration tables using CONCATENATION.


electric_plant_depreciation_changes_ferc1: Combining XBRL instant & duration tables using CONCATENATION.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.ferc1:1185 electric_plant_depreciation_changes_ferc1: applying wide_to_tidy for xbrl


electric_plant_depreciation_changes_ferc1: applying wide_to_tidy for xbrl


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1144 electric_plant_depreciation_changes_ferc1: Attempting to rename 4 columns.


electric_plant_depreciation_changes_ferc1: Attempting to rename 4 columns.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.ferc1:967 electric_plant_depreciation_changes_ferc1: Selecting DBF rows with desired values in depreciation_type.


electric_plant_depreciation_changes_ferc1: Selecting DBF rows with desired values in depreciation_type.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.ferc1:910 electric_plant_depreciation_changes_ferc1: Concatenating DBF + XBRL dataframes.


electric_plant_depreciation_changes_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1168 electric_plant_depreciation_changes_ferc1: Normalizing freeform string columns.


electric_plant_depreciation_changes_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1192 electric_plant_depreciation_changes_ferc1: Categorizing string columns using a controlled vocabulary.


electric_plant_depreciation_changes_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1217 electric_plant_depreciation_changes_ferc1: Converting units and renaming columns accordingly.


electric_plant_depreciation_changes_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1179 electric_plant_depreciation_changes_ferc1: Stripping non-numeric values from [].


electric_plant_depreciation_changes_ferc1: Stripping non-numeric values from [].


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1206 electric_plant_depreciation_changes_ferc1: Nullifying outlying values.


electric_plant_depreciation_changes_ferc1: Nullifying outlying values.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1259 electric_plant_depreciation_changes_ferc1: Replacing specified values with NA.


electric_plant_depreciation_changes_ferc1: Replacing specified values with NA.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1246 electric_plant_depreciation_changes_ferc1: Dropping remaining invalid rows.


electric_plant_depreciation_changes_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.ferc1:1032 electric_plant_depreciation_changes_ferc1: Merging metadata


electric_plant_depreciation_changes_ferc1: Merging metadata


2023-01-27 16:22:22 [    INFO] catalystcoop.pudl.transform.classes:1264 electric_plant_depreciation_changes_ferc1: Enforcing database schema on dataframe.


electric_plant_depreciation_changes_ferc1: Enforcing database schema on dataframe.


2023-01-27 16:22:23 [    INFO] catalystcoop.pudl.transform.ferc1:1072 fuel_ferc1: Processing DBF data pre-concatenation.


fuel_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:23 [    INFO] catalystcoop.pudl.transform.classes:1144 fuel_ferc1: Attempting to rename 17 columns.


fuel_ferc1: Attempting to rename 17 columns.


['f1_fuel_2000_12_24_3_1'].


fuel_ferc1: Found 1 duplicate record_ids: 
['f1_fuel_2000_12_24_3_1'].


2023-01-27 16:22:23 [    INFO] catalystcoop.pudl.transform.classes:1217 fuel_ferc1: Converting units and renaming columns accordingly.


fuel_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:23 [    INFO] catalystcoop.pudl.transform.classes:1168 fuel_ferc1: Normalizing freeform string columns.


fuel_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:1192 fuel_ferc1: Categorizing string columns using a controlled vocabulary.


fuel_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:1144 fuel_ferc1: Attempting to rename 0 columns.


fuel_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.ferc1:1147 fuel_ferc1: Unstacking balances to the report years.


fuel_ferc1: Unstacking balances to the report years.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:1144 fuel_ferc1: Attempting to rename 0 columns.


fuel_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.ferc1:1374 fuel_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


fuel_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.ferc1:1266 fuel_ferc1: No XBRL instant table found.


fuel_ferc1: No XBRL instant table found.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:1144 fuel_ferc1: Attempting to rename 15 columns.


fuel_ferc1: Attempting to rename 15 columns.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:1217 fuel_ferc1: Converting units and renaming columns accordingly.


fuel_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:1168 fuel_ferc1: Normalizing freeform string columns.


fuel_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:1192 fuel_ferc1: Categorizing string columns using a controlled vocabulary.


fuel_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.ferc1:1772 fuel_ferc1: Aggregating 36 rows with duplicate primary keys out of 1312 total rows.


fuel_ferc1: Aggregating 36 rows with duplicate primary keys out of 1312 total rows.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.ferc1:1776 fuel_ferc1: Dropping 92 records with inconsistent fuel units preventing aggregation out of 1312 total rows.


fuel_ferc1: Dropping 92 records with inconsistent fuel units preventing aggregation out of 1312 total rows.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.ferc1:910 fuel_ferc1: Concatenating DBF + XBRL dataframes.


fuel_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:1246 fuel_ferc1: Dropping remaining invalid rows.


fuel_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:823 68.7% of records (107471 rows) contain only {0, '', <NA>, nan} values in required columns. Dropped these 💩💩💩 records.


68.7% of records (107471 rows) contain only {0, '', <NA>, nan} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:823 0.4% of records (217 rows) contain only {'', 'ant1-3', '0', 'must 456', nan, '-', 'must 123', 'not applicable', 'elk 1-3', <NA>} values in required columns. Dropped these 💩💩💩 records.


0.4% of records (217 rows) contain only {'', 'ant1-3', '0', 'must 456', nan, '-', 'must 123', 'not applicable', 'elk 1-3', <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.ferc1:1846 fuel_ferc1: Dropping 0/48818rows representing plant-level all-fuel totals.


fuel_ferc1: Dropping 0/48818rows representing plant-level all-fuel totals.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:1233 fuel_ferc1: Correcting inferred non-standard column units.


fuel_ferc1: Correcting inferred non-standard column units.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==coal.


Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==coal.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:719 181/10820 (1.67%) of records could not be corrected and were set to NA.


181/10820 (1.67%) of records could not be corrected and were set to NA.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==gas.


Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==gas.


2023-01-27 16:22:24 [    INFO] catalystcoop.pudl.transform.classes:719 860/15769 (5.45%) of records could not be corrected and were set to NA.


860/15769 (5.45%) of records could not be corrected and were set to NA.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==oil.


Correcting units of fuel_mmbtu_per_unit where fuel_type_code_pudl==oil.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.classes:719 643/17623 (3.65%) of records could not be corrected and were set to NA.


643/17623 (3.65%) of records could not be corrected and were set to NA.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==coal.


Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==coal.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.classes:719 1367/10820 (12.63%) of records could not be corrected and were set to NA.


1367/10820 (12.63%) of records could not be corrected and were set to NA.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==gas.


Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==gas.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.classes:719 2071/15769 (13.13%) of records could not be corrected and were set to NA.


2071/15769 (13.13%) of records could not be corrected and were set to NA.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.classes:698 Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==oil.


Correcting units of fuel_cost_per_mmbtu where fuel_type_code_pudl==oil.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.classes:719 8083/17623 (45.87%) of records could not be corrected and were set to NA.


8083/17623 (45.87%) of records could not be corrected and were set to NA.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.classes:1264 fuel_ferc1: Enforcing database schema on dataframe.


fuel_ferc1: Enforcing database schema on dataframe.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.ferc1:3135 Dropped 150 records (0.0% oftotal) records from 2003 from the f1_incm_stmnt_2 DBF table that have known incorrect row numbers.


Dropped 150 records (0.0% oftotal) records from 2003 from the f1_incm_stmnt_2 DBF table that have known incorrect row numbers.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.ferc1:1072 income_statement_ferc1: Processing DBF data pre-concatenation.


income_statement_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.ferc1:1135 income_statement_ferc1: After selection only annual records, we have 38.9% of the original table.


income_statement_ferc1: After selection only annual records, we have 38.9% of the original table.


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.ferc1:348 Aligning row numbers from DBF row to XBRL map for ['f1_income_stmnt', 'f1_incm_stmnt_2']


Aligning row numbers from DBF row to XBRL map for ['f1_income_stmnt', 'f1_incm_stmnt_2']


2023-01-27 16:22:25 [    INFO] catalystcoop.pudl.transform.classes:1144 income_statement_ferc1: Attempting to rename 16 columns.


income_statement_ferc1: Attempting to rename 16 columns.


2023-01-27 16:22:26 [    INFO] catalystcoop.pudl.transform.ferc1:1185 income_statement_ferc1: applying wide_to_tidy for dbf


income_statement_ferc1: applying wide_to_tidy for dbf


2023-01-27 16:22:27 [    INFO] catalystcoop.pudl.transform.ferc1:1062 income_statement_ferc1: Dropping rows where primary key and data columns are duplicated.


income_statement_ferc1: Dropping rows where primary key and data columns are duplicated.


  dupes_w_possible_unique_data.loc[
2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.ferc1:328 Dropped 45624 duplicate records: 2.2% of total rows.


Dropped 45624 duplicate records: 2.2% of total rows.


2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.ferc1:1093 income_statement_ferc1: Processing XBRL data pre-concatenation.


income_statement_ferc1: Processing XBRL data pre-concatenation.


2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.classes:1144 income_statement_ferc1: Attempting to rename 0 columns.


income_statement_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.ferc1:1147 income_statement_ferc1: Unstacking balances to the report years.


income_statement_ferc1: Unstacking balances to the report years.


2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.classes:1144 income_statement_ferc1: Attempting to rename 68 columns.


income_statement_ferc1: Attempting to rename 68 columns.


2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.ferc1:1374 income_statement_ferc1: After selection of dates based on the report year, we have 51.0% of the original table.


income_statement_ferc1: After selection of dates based on the report year, we have 51.0% of the original table.


2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.ferc1:1266 income_statement_ferc1: No XBRL instant table found.


income_statement_ferc1: No XBRL instant table found.


2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.ferc1:1185 income_statement_ferc1: applying wide_to_tidy for xbrl


income_statement_ferc1: applying wide_to_tidy for xbrl


2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.classes:1144 income_statement_ferc1: Attempting to rename 4 columns.


income_statement_ferc1: Attempting to rename 4 columns.


2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.ferc1:910 income_statement_ferc1: Concatenating DBF + XBRL dataframes.


income_statement_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.classes:1168 income_statement_ferc1: Normalizing freeform string columns.


income_statement_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:28 [    INFO] catalystcoop.pudl.transform.classes:1192 income_statement_ferc1: Categorizing string columns using a controlled vocabulary.


income_statement_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1217 income_statement_ferc1: Converting units and renaming columns accordingly.


income_statement_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1179 income_statement_ferc1: Stripping non-numeric values from [].


income_statement_ferc1: Stripping non-numeric values from [].


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1206 income_statement_ferc1: Nullifying outlying values.


income_statement_ferc1: Nullifying outlying values.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1259 income_statement_ferc1: Replacing specified values with NA.


income_statement_ferc1: Replacing specified values with NA.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1246 income_statement_ferc1: Dropping remaining invalid rows.


income_statement_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:823 84.2% of records (1697454 rows) contain only {'', <NA>, nan} values in required columns. Dropped these 💩💩💩 records.


84.2% of records (1697454 rows) contain only {'', <NA>, nan} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.ferc1:1032 income_statement_ferc1: Merging metadata


income_statement_ferc1: Merging metadata


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1264 income_statement_ferc1: Enforcing database schema on dataframe.


income_statement_ferc1: Enforcing database schema on dataframe.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.ferc1:1072 plants_hydro_ferc1: Processing DBF data pre-concatenation.


plants_hydro_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_hydro_ferc1: Attempting to rename 41 columns.


plants_hydro_ferc1: Attempting to rename 41 columns.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.ferc1:1093 plants_hydro_ferc1: Processing XBRL data pre-concatenation.


plants_hydro_ferc1: Processing XBRL data pre-concatenation.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_hydro_ferc1: Attempting to rename 0 columns.


plants_hydro_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.ferc1:1147 plants_hydro_ferc1: Unstacking balances to the report years.


plants_hydro_ferc1: Unstacking balances to the report years.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_hydro_ferc1: Attempting to rename 0 columns.


plants_hydro_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.ferc1:1374 plants_hydro_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


plants_hydro_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.ferc1:1272 plants_hydro_ferc1: Both XBRL instant & duration tables found.


plants_hydro_ferc1: Both XBRL instant & duration tables found.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.ferc1:1291 plants_hydro_ferc1: Combining XBRL instant & duration tables using RIGHT-MERGE.


plants_hydro_ferc1: Combining XBRL instant & duration tables using RIGHT-MERGE.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_hydro_ferc1: Attempting to rename 41 columns.


plants_hydro_ferc1: Attempting to rename 41 columns.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.ferc1:910 plants_hydro_ferc1: Concatenating DBF + XBRL dataframes.


plants_hydro_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1168 plants_hydro_ferc1: Normalizing freeform string columns.


plants_hydro_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1192 plants_hydro_ferc1: Categorizing string columns using a controlled vocabulary.


plants_hydro_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1217 plants_hydro_ferc1: Converting units and renaming columns accordingly.


plants_hydro_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1179 plants_hydro_ferc1: Stripping non-numeric values from ['project_num'].


plants_hydro_ferc1: Stripping non-numeric values from ['project_num'].


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1206 plants_hydro_ferc1: Nullifying outlying values.


plants_hydro_ferc1: Nullifying outlying values.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1259 plants_hydro_ferc1: Replacing specified values with NA.


plants_hydro_ferc1: Replacing specified values with NA.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:1246 plants_hydro_ferc1: Dropping remaining invalid rows.


plants_hydro_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:29 [    INFO] catalystcoop.pudl.transform.classes:823 60.5% of records (10440 rows) contain only {0, '', 'none', '—', '0', nan, <NA>} values in required columns. Dropped these 💩💩💩 records.


60.5% of records (10440 rows) contain only {0, '', 'none', '—', '0', nan, <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:823 0.4% of records (30 rows) contain only {'', '0', nan, '-', 'not applicable', <NA>} values in required columns. Dropped these 💩💩💩 records.


0.4% of records (30 rows) contain only {'', '0', nan, '-', 'not applicable', <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1264 plants_hydro_ferc1: Enforcing database schema on dataframe.


plants_hydro_ferc1: Enforcing database schema on dataframe.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1072 plants_pumped_storage_ferc1: Processing DBF data pre-concatenation.


plants_pumped_storage_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_pumped_storage_ferc1: Attempting to rename 45 columns.


plants_pumped_storage_ferc1: Attempting to rename 45 columns.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1093 plants_pumped_storage_ferc1: Processing XBRL data pre-concatenation.


plants_pumped_storage_ferc1: Processing XBRL data pre-concatenation.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_pumped_storage_ferc1: Attempting to rename 0 columns.


plants_pumped_storage_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1147 plants_pumped_storage_ferc1: Unstacking balances to the report years.


plants_pumped_storage_ferc1: Unstacking balances to the report years.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_pumped_storage_ferc1: Attempting to rename 0 columns.


plants_pumped_storage_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1374 plants_pumped_storage_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


plants_pumped_storage_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1272 plants_pumped_storage_ferc1: Both XBRL instant & duration tables found.


plants_pumped_storage_ferc1: Both XBRL instant & duration tables found.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1291 plants_pumped_storage_ferc1: Combining XBRL instant & duration tables using RIGHT-MERGE.


plants_pumped_storage_ferc1: Combining XBRL instant & duration tables using RIGHT-MERGE.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_pumped_storage_ferc1: Attempting to rename 46 columns.


plants_pumped_storage_ferc1: Attempting to rename 46 columns.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:910 plants_pumped_storage_ferc1: Concatenating DBF + XBRL dataframes.


plants_pumped_storage_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1168 plants_pumped_storage_ferc1: Normalizing freeform string columns.


plants_pumped_storage_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1192 plants_pumped_storage_ferc1: Categorizing string columns using a controlled vocabulary.


plants_pumped_storage_ferc1: Categorizing string columns using a controlled vocabulary.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1217 plants_pumped_storage_ferc1: Converting units and renaming columns accordingly.


plants_pumped_storage_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1179 plants_pumped_storage_ferc1: Stripping non-numeric values from ['project_num'].


plants_pumped_storage_ferc1: Stripping non-numeric values from ['project_num'].


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1206 plants_pumped_storage_ferc1: Nullifying outlying values.


plants_pumped_storage_ferc1: Nullifying outlying values.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1259 plants_pumped_storage_ferc1: Replacing specified values with NA.


plants_pumped_storage_ferc1: Replacing specified values with NA.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1246 plants_pumped_storage_ferc1: Dropping remaining invalid rows.


plants_pumped_storage_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:823 89.1% of records (5636 rows) contain only {0, '', 'none', '—', '0', nan, <NA>} values in required columns. Dropped these 💩💩💩 records.


89.1% of records (5636 rows) contain only {0, '', 'none', '—', '0', nan, <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:823 21.0% of records (145 rows) contain only {'', '0', nan, '-', 'not applicable', <NA>} values in required columns. Dropped these 💩💩💩 records.


21.0% of records (145 rows) contain only {'', '0', nan, '-', 'not applicable', <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1264 plants_pumped_storage_ferc1: Enforcing database schema on dataframe.


plants_pumped_storage_ferc1: Enforcing database schema on dataframe.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1072 plants_small_ferc1: Processing DBF data pre-concatenation.


plants_small_ferc1: Processing DBF data pre-concatenation.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_small_ferc1: Attempting to rename 19 columns.


plants_small_ferc1: Attempting to rename 19 columns.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1093 plants_small_ferc1: Processing XBRL data pre-concatenation.


plants_small_ferc1: Processing XBRL data pre-concatenation.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_small_ferc1: Attempting to rename 0 columns.


plants_small_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1147 plants_small_ferc1: Unstacking balances to the report years.


plants_small_ferc1: Unstacking balances to the report years.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_small_ferc1: Attempting to rename 0 columns.


plants_small_ferc1: Attempting to rename 0 columns.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1374 plants_small_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


plants_small_ferc1: After selection of dates based on the report year, we have 100.0% of the original table.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1272 plants_small_ferc1: Both XBRL instant & duration tables found.


plants_small_ferc1: Both XBRL instant & duration tables found.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:1291 plants_small_ferc1: Combining XBRL instant & duration tables using RIGHT-MERGE.


plants_small_ferc1: Combining XBRL instant & duration tables using RIGHT-MERGE.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1144 plants_small_ferc1: Attempting to rename 20 columns.


plants_small_ferc1: Attempting to rename 20 columns.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:910 plants_small_ferc1: Concatenating DBF + XBRL dataframes.


plants_small_ferc1: Concatenating DBF + XBRL dataframes.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1168 plants_small_ferc1: Normalizing freeform string columns.


plants_small_ferc1: Normalizing freeform string columns.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1206 plants_small_ferc1: Nullifying outlying values.


plants_small_ferc1: Nullifying outlying values.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.classes:1217 plants_small_ferc1: Converting units and renaming columns accordingly.


plants_small_ferc1: Converting units and renaming columns accordingly.


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:2199 plants_small_ferc1: Extracting FERC license from plant name


plants_small_ferc1: Extracting FERC license from plant name


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:2439 plants_small_ferc1: Labeling header rows


plants_small_ferc1: Labeling header rows


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:2585 plants_small_ferc1: Labeling total rows


plants_small_ferc1: Labeling total rows


2023-01-27 16:22:30 [    INFO] catalystcoop.pudl.transform.ferc1:2555 plants_small_ferc1: Labeling notes rows


plants_small_ferc1: Labeling notes rows


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  return util_groups.apply(lambda x: self._label_note_rows_group(x))
2023-01-27 16:22:36 [    INFO] catalystcoop.pudl.transform.ferc1:2754 plants_small_ferc1: Forward filling header fuel and plant types


plants_small_ferc1: Forward filling header fuel and plant types


2023-01-27 16:22:36 [    INFO] catalystcoop.pudl.transform.ferc1:2872 plants_small_ferc1: Getting fuel type (hydro) from plant name


plants_small_ferc1: Getting fuel type (hydro) from plant name


2023-01-27 16:22:36 [    INFO] catalystcoop.pudl.transform.classes:1192 plants_small_ferc1: Categorizing string columns using a controlled vocabulary.


plants_small_ferc1: Categorizing string columns using a controlled vocabulary.




fuel_type_from_header: Found 31 uncategorized values: {'miscellaneous other power generation expense', 'steam plant', 'internal combustion: (emergency standby', 'auke bay internal combustion', 'neal shoals-hydro license', 'auke bay internal combustion:', 'other-leased:', 'renewables:', 'lemon creek internal combustion', 'lemon creek internal combustion:', 'internal combustion auxiliary', 'renewables', 'international combustion', 'other production:', 'lewiston canal facilities:', 'combined cycle plant:', 'combustion turbine', 'interal combustion:', 'steam:', 'other general ops supervision & engineering', 'internal conbustion:', 'steam heating plant', 'internal combustion peaking units', 'other:', 'manufactured gas plant remediation project', 'other / internal combustion :', 'wind - solar', 'waste water removal cost', 'internal combustion :', 'internal combustion-diesel', nan}




plant_type_from_header: Found 14 uncategorized values: {'landfill:', 'miscellaneous other power generation expense', 'renewables:', 'dinner lake gas', 'manufactured gas plant remediation project', 'wind - solar', 'other-leased:', 'lewiston canal facilities:', 'other:', 'waste water removal cost', 'renewables', 'other general ops supervision & engineering', 'other production:', nan}


2023-01-27 16:22:36 [    INFO] catalystcoop.pudl.transform.ferc1:2807 plants_small_ferc1: Filling NA and 'other' fuel and plant types with header info


plants_small_ferc1: Filling NA and 'other' fuel and plant types with header info


2023-01-27 16:22:36 [    INFO] catalystcoop.pudl.transform.ferc1:2846 Added fuel types to 6590 plant rows (40%). Added plant types to 11389 plant rows (69%).


Added fuel types to 6590 plant rows (40%). Added plant types to 11389 plant rows (69%).


2023-01-27 16:22:36 [    INFO] catalystcoop.pudl.transform.ferc1:2955 plants_small_ferc1: Mapping notes and ferc license from notes rows


plants_small_ferc1: Mapping notes and ferc license from notes rows


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  sg_notes = groups.apply(lambda x: associate_notes_with_values_group(x))
2023-01-27 16:22:38 [    INFO] catalystcoop.pudl.transform.ferc1:3009 Mapped 734 notes to plant rows.


Mapped 734 notes to plant rows.


2023-01-27 16:22:38 [    INFO] catalystcoop.pudl.transform.ferc1:3031 plants_small_ferc1: Spot fixing some rows


plants_small_ferc1: Spot fixing some rows


2023-01-27 16:22:38 [    INFO] catalystcoop.pudl.transform.classes:1246 plants_small_ferc1: Dropping remaining invalid rows.


plants_small_ferc1: Dropping remaining invalid rows.


2023-01-27 16:22:38 [    INFO] catalystcoop.pudl.transform.classes:823 20.0% of records (4257 rows) contain only {'note', 'header'} values in required columns. Dropped these 💩💩💩 records.


20.0% of records (4257 rows) contain only {'note', 'header'} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:38 [    INFO] catalystcoop.pudl.transform.classes:823 4.2% of records (715 rows) contain only {0, '', 'none', '0', nan, <NA>} values in required columns. Dropped these 💩💩💩 records.


4.2% of records (715 rows) contain only {0, '', 'none', '0', nan, <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:38 [    INFO] catalystcoop.pudl.transform.classes:823 0.4% of records (67 rows) contain only {'', '0', nan, '-', 'not applicable', <NA>} values in required columns. Dropped these 💩💩💩 records.


0.4% of records (67 rows) contain only {'', '0', nan, '-', 'not applicable', <NA>} values in required columns. Dropped these 💩💩💩 records.


2023-01-27 16:22:38 [    INFO] catalystcoop.pudl.transform.classes:1264 plants_small_ferc1: Enforcing database schema on dataframe.


plants_small_ferc1: Enforcing database schema on dataframe.


TypeError: PlantsSteamFerc1TableTransformer.transform() missing 1 required positional argument: 'transformed_fuel'

In [None]:
transformed_table