In [1]:
%use dataframe
%use kandy


In [8]:
fun String.toFuelTypeSimplified(defaultValue: String = "N/A"): String {
    return when (this) {
        "SUN" -> "Solar"
        "NG", "BFG", "LFG", "OG", "COG", "SGC", "PRG", "PG" -> "Gas"
        "WAT" -> "Hydro"
        "WND" -> "Wind"
        "GEO" -> "Geo-Thermal"
        "NUC" -> "Nuclear"
        "MWH" -> "Energy Storage"
        "MSW" -> "Municipal Solid Waste"
        "BIT", "SUB", "LIG", "RC", "WC" -> "Coal"
        "AB" -> "Agricultural byproduct"
        "OBG", "OBL", "OBS", "WDS", "WDL" -> "Wood/Biomass"
        "WH" -> "Waste heat"
        "RFO", "WO", "JF", "KER", "DFO", "PC" -> "Oil/Petroleum & Byproducts"
        "BLQ" -> "Black liquor"
        "OTH" -> "Other"
        "PUR" -> "Purchased steam"
        "TDF" -> "Tire-derived fuel"
        else -> defaultValue
    }
}

In [18]:
import java.nio.file.Paths
import kotlin.io.path.inputStream

val primaryFuel by column<String>("PLPRMFL")
val fuel by column<String>("Fuel")

DataFrame.readExcel(
    Paths.get("raw-data").resolve("epa").resolve("egrid2022_data.xlsx").inputStream(),
    "PLNT22",
    1
)
    .select("PNAME", "LAT", "LON", "PLPRMFL")
    .add(fuel) {
        primaryFuel.getValueOrNull(it)?.toFuelTypeSimplified()
    }
    .filter { 
        fuel.getValueOrNull(it)!= null
    }
    .select("PNAME", "LAT", "LON", "Fuel")
    .rename("PNAME").into("Place")
    .rename("LAT").into("Lat")
    .rename("LON").into("Lon")
    //skip the smallest categories
    .filter {
        !setOf("Waste heat","Agricultural byproduct","Purchased steam","Other").contains(fuel.getValue(it))
    }
    .writeCSV(Paths.get("refined-data").resolve("epa").resolve("power-plants.csv").toFile())
    


In [17]:
DataFrame.readCSV(Paths.get("refined-data").resolve("epa").resolve("power-plants.csv").inputStream())
    .groupBy(fuel)
    .count()
    .sortByDesc("count")
   

    

Fuel,count
Solar,5051
Gas,2331
Hydro,1462
Wind,1303
Oil/Petroleum & Byproducts,881
Coal,253
Wood/Biomass,190
Energy Storage,184
Black liquor,70
Geo-Thermal,66


In [35]:
val nameplaceCap by column<Double>("Nameplate Capacity")
val annualProduction by column<Double>("Annual Production")
val fuel by column<String>("FUELG1")
val fuelDesc by column<String>("Fuel")
val theoretical by column<Double>("Theoretical")

val diff = DataFrame.readExcel(
    Paths.get("raw-data").resolve("epa").resolve("egrid2022_data.xlsx").inputStream(),
    "GEN22",
    1
)
    .filter {
        !setOf(
            "WAT",
            "WND",
            "SUN",
            "OTH",
            "MWH",
            "OBG",
            "WH",
            "PUR",
            "TDF",
            "OBG",
            "OBL",
            "OBS",
            "WDS",
            "WDL",
            "BLQ",
            "MSW",
            "AB"
        ).contains(fuel.getValue(it))
    }
    .add(fuelDesc) {
        fuel.getValueOrNull(it)?.toFuelTypeSimplified()
    }.filter {
        fuelDesc.getValueOrNull(it) != null
    }
    .groupBy(fuelDesc)
    .aggregate {
        sum("NAMEPCAP") into nameplaceCap
        sum("GENNTAN") into annualProduction
    }
    .add(theoretical) {
        nameplaceCap.getValue(it) * 365.0 * 24.0
    }
    .add("Diff") {
        theoretical.getValue(it) - annualProduction.getValue(it)
    }
    .add("% Spare capacity") {
        100.0 - (annualProduction.getValue(it) / (theoretical.getValue(it) / 100))
    }
    .filter{
        setOf("Gas","Geo-Thermal","Nuclear").contains(fuelDesc.getValue(it))
    }
    .sum("Diff")
    .toDouble()

diff
//Total diff 4.880892886945178E9
    

3.45561326199518E9

In [34]:
4.880892886945178E9.roundToLong()

4880892887

In [36]:
3.45561326199518E9.roundToLong()

3455613262

4880892887
3455613262

