In [1]:
%use dataframe
%use kandy

Source : https://www.epa.gov/egrid/download-data

In [2]:
import java.nio.file.Paths
import kotlin.io.path.inputStream

DataFrame.readExcel(
    Paths.get("raw-data").resolve("epa").resolve("egrid2022_data.xlsx").inputStream(),
    "UNT22",
    1
)
    .take(10)

SEQUNT22,YEAR,PSTATABB,PNAME,ORISPL,UNITID,PRMVR,UNTOPST,CAMDFLAG,PRGCODE,BOTFIRTY,NUMGEN,FUELU1,HRSOP,HTIAN,HTIOZ,NOXAN,NOXOZ,SO2AN,CO2AN,HGAN,HTIANSRC,HTIOZSRC,NOXANSRC,NOXOZSRC,SO2SRC,CO2SRC,HGSRC,SO2CTLDV,NOXCTLDV,HGCTLDV,UNTYRONL
1.0,2022.0,AK,Agrium Kenai Nitrogen Operations,54452.0,744A,GT,OS,,,,,NG,,,,,,,,,,,,,,,,,,,
2.0,2022.0,AK,Agrium Kenai Nitrogen Operations,54452.0,744B,GT,OS,,,,,NG,,,,,,,,,,,,,,,,,,,
3.0,2022.0,AK,Agrium Kenai Nitrogen Operations,54452.0,744C,GT,OS,,,,,NG,,,,,,,,,,,,,,,,,,,
4.0,2022.0,AK,Agrium Kenai Nitrogen Operations,54452.0,744D,GT,OS,,,,,NG,,,,,,,,,,,,,,,,,,,
5.0,2022.0,AK,Agrium Kenai Nitrogen Operations,54452.0,744E,GT,OS,,,,,NG,,,,,,,,,,,,,,,,,,,
6.0,2022.0,AK,Alakanuk,57053.0,UNIT4,IC,SB,,,,,DFO,,,,,,,,,,,,,,,,,,,
7.0,2022.0,AK,Allison Creek Hydro,58982.0,GEN1,HY,OP,,,,,WAT,,78535.0,34849.0,,,,,,EIA Prime Mover-level Data,EIA Prime Mover-level Data,,,,,,,,,
8.0,2022.0,AK,Ambler,60243.0,1A,IC,OP,,,,,DFO,,4814.182,1878.545,7.998,3.121,0.6981,393.14,,EIA Prime Mover-level Data,EIA Prime Mover-level Data,Estimated using emissions factor,Estimated using emissions factor,Estimated using emissions factor,Estimated using emissions factor,,,,,
9.0,2022.0,AK,Ambler,60243.0,2,IC,OP,,,,,DFO,,3610.636,1408.909,5.999,2.341,0.5235,294.855,,EIA Prime Mover-level Data,EIA Prime Mover-level Data,Estimated using emissions factor,Estimated using emissions factor,Estimated using emissions factor,Estimated using emissions factor,,,,,
10.0,2022.0,AK,Ambler,60243.0,3,IC,OP,,,,,DFO,,4814.182,1878.545,7.998,3.121,0.6981,393.14,,EIA Prime Mover-level Data,EIA Prime Mover-level Data,Estimated using emissions factor,Estimated using emissions factor,Estimated using emissions factor,Estimated using emissions factor,,,,,


In [3]:
val plantName by column<String>("PNAME")

val powerPlants = DataFrame.readExcel(
    Paths.get("raw-data").resolve("epa").resolve("egrid2022_data.xlsx").inputStream(),
    "UNT22",
    1
)
    .distinctBy(plantName)
    .count()
println("Total power plants $powerPlants")

Total power plants 11942


In [4]:
val unitId by column<String>("UNITID")

val powerPlants = DataFrame.readExcel(
    Paths.get("raw-data").resolve("epa").resolve("egrid2022_data.xlsx").inputStream(),
    "UNT22",
    1
)
    .distinctBy(plantName, unitId)
    .count()
println("Total power generating units $powerPlants")

Total power generating units 25403


In [5]:
val fuel by column<String>("FUELU1")
val total by column<Int>()

val outPath = Paths.get("refined-data").resolve("epa")
outPath.toFile().mkdirs()
DataFrame.readExcel(
    Paths.get("raw-data").resolve("epa").resolve("egrid2022_data.xlsx").inputStream(),
    "UNT22",
    1
)
    .distinctBy(plantName)
    .groupBy(fuel)
    .aggregate { 
        count() into total
    }
    .writeCSV(outPath.resolve("power-plants-by-fuel.csv").toFile())
    

In [6]:
fun String.toFuelType(defaultValue: String = "N/A"): String {
    return when (this) {
        "SUN" -> "Solar"
        "NG" -> "Natural Gas"
        "WAT" -> "Hydro"
        "WND" -> "Wind"
        "DFO" -> "Destilate Fuel Oil"
        "GEO" -> "Geo-Thermal"
        "NUC" -> "Nuclear"
        "MWH" -> "Energy Storage"
        "MSW" -> "Municipal Solid Waste"
        "BIT" -> "Bituminous coal"
        "SUB" -> "Subbituminous coal"
        "BFG" -> "Blast furnace gas"
        "AB" -> "Agricultural byproduct"
        "KER" -> "Kerosene"
        "LFG" -> "Landfill gas"
        "OBG" -> "Other biomass gas"
        "OBL" -> "Other biomass liquids"
        "OBS" -> "Other biomass solids"
        "OG" -> "Other gas"
        "WDS" -> "Wood, wood waste solid"
        "WDL" -> "Wood, wood waste liquid"
        "WH" -> "Waste heat"
        "RFO" -> "Residual fuel oil, heavy fuel oil, petroleum"
        "BLQ" -> "Black liquor"
        "COG" -> "Coke oven gas"
        "WC" -> "Waste coal"
        "PC" -> "Petroleum coke"
        "LIG" -> "Lignite coal"
        "RC" -> "Refined coal"
        "WO" -> "Waste oil"
        "JF" -> "Jet fuel"
        "OTH" -> "Other"
        "PUR" -> "Purchased steam"
        "SGC" -> "Coal-derived synthetic gas"
        "PRG" -> "Process gas"
        "PG" -> "Gaseous propane"
        "TDF" -> "Tire-derived fuel"
        else -> defaultValue
    }
}

In [7]:
fun String.toFuelTypeSimplified(defaultValue: String = "N/A"): String {
    return when (this) {
        "SUN" -> "Solar"
        "NG", "BFG", "LFG", "OG", "COG", "SGC", "PRG", "PG" -> "Gas"
        "WAT" -> "Hydro"
        "WND" -> "Wind"
        "GEO" -> "Geo-Thermal"
        "NUC" -> "Nuclear"
        "MWH" -> "Energy Storage"
        "MSW" -> "Municipal Solid Waste"
        "BIT", "SUB", "LIG", "RC", "WC" -> "Coal"
        "AB" -> "Agricultural byproduct"
        "OBG", "OBL", "OBS", "WDS", "WDL" -> "Wood/Biomass"
        "WH" -> "Waste heat"
        "RFO", "WO", "JF", "KER", "DFO", "PC" -> "Oil/Petroleum & Byproducts"
        "BLQ" -> "Black liquor"
        "OTH" -> "Other"
        "PUR" -> "Purchased steam"
        "TDF" -> "Tire-derived fuel"
        else -> defaultValue
    }
}

In [8]:
val explode by column<Double>()
val fuelDesc by column<String>("Fuel")

val otherLimit = 50

DataFrame.readCSV(Paths.get("refined-data").resolve("epa").resolve("power-plants-by-fuel.csv").inputStream())
    .add(fuelDesc) {
        
        if(total.getValue(it) / otherLimit == 0){
            "Other"
        }else{
            fuel.getValue(it).toFuelType()
        }
    }
    .sortByDesc(total)
    .groupBy(fuelDesc)
    .aggregate { 
        sum(total) into total
    }
    .add(explode) {
        if (total.getValue(it) / otherLimit == 0) {
            0.5
        } else {
            0.0
        }
    }
    .plot {
        pie {
            slice(total)
            fillColor(fuelDesc)
            explode(explode)
            size = 40.0
            stroke = 0.5
            strokeColor = Color.WHITE
            //hole = 0.8
        }
        layout {
            style(Style.Void)
            size = Pair(650, 450)
            title = "Total power plants by fuel type"   
        }
    }

In [22]:
DataFrame.readExcel(
    Paths.get("raw-data").resolve("epa").resolve("egrid2022_data.xlsx").inputStream(),
    "UNT22",
    1
)
    .distinctBy(plantName)
    .select(plantName, fuel)
    .writeCSV(Paths.get("refined-data").resolve("epa").resolve("power-plants-and-their-fuel.csv").toFile())

In [23]:
val energyGenerated by column<Double>()

DataFrame.readExcel(
    Paths.get("raw-data").resolve("epa").resolve("egrid2022_data.xlsx").inputStream(),
    "GEN22",
    1
)
    .add(energyGenerated){
        val value = it.getValue<String>("GENNTAN")
        when {
            "null".equals(value) -> 0
            value.contains("(") -> value.replace("(","").replace(")","").trim().toInt()
            else -> value.toInt()
        }
    }
    .take(10)

Parameter specified as non-null is null: method kotlin.text.StringsKt__StringsKt.contains, parameter <this>
java.lang.NullPointerException: Parameter specified as non-null is null: method kotlin.text.StringsKt__StringsKt.contains, parameter <this>
	at kotlin.text.StringsKt__StringsKt.contains(Strings.kt)
	at kotlin.text.StringsKt__StringsKt.contains$default(Strings.kt:1156)
	at Line_33_jupyter$res33$1.invoke(Line_33.jupyter.kts:12)
	at Line_33_jupyter$res33$1.invoke(Line_33.jupyter.kts:8)
	at org.jetbrains.kotlinx.dataframe.impl.columns.ConstructorsKt.computeValues(constructors.kt:98)
	at org.jetbrains.kotlinx.dataframe.impl.columns.ConstructorsKt.newColumn(constructors.kt:60)
	at org.jetbrains.kotlinx.dataframe.api.MapKt.mapToColumn(map.kt:94)
	at Line_33_jupyter.<init>(Line_33.jupyter.kts:31)
	at java.base/jdk.internal.reflect.DirectConstructorHandleAccessor.newInstance(DirectConstructorHandleAccessor.java:62)
	at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Construc

In [24]:
val unitId by column<String>("UNITID")
val genId by column<String>("GENID")

val frameOutput = DataFrame.readExcel(
    Paths.get("raw-data").resolve("epa").resolve("egrid2022_data.xlsx").inputStream(),
    "UNT22",
    1
)

val frameEmissions = DataFrame.readExcel(
    Paths.get("raw-data").resolve("epa").resolve("egrid2022_data.xlsx").inputStream(),
    "GEN22",
    1
)

frameOutput.join(frameEmissions){
    plantName and (unitId match right.getColumn(genId))
}.writeCSV(Paths.get("refined-data").resolve("epa").resolve("power-plant-output-and-emissions.csv").toFile())
    

In [25]:
val co2 by column<Double>("CO2AN")
val output by column<Double>("GENNTAN")
val co2PerMwh by column<Double>()

DataFrame.readCSV(
    Paths.get("refined-data").resolve("epa").resolve("power-plant-output-and-emissions.csv").inputStream()
)
    .groupBy(fuel)
    .aggregate {
        sum(co2) into co2
        sum(output) into output
    }
    .add(co2PerMwh) {
        co2.getValue(it) / output.getValue(it)
    }
    .sortByDesc(co2PerMwh)
    .add(fuelDesc) {
        fuel.getValue(it).toFuelType()
    }
    .writeCSV(Paths.get("refined-data").resolve("epa").resolve("co2-per-1-mwh-per-fuel-type.csv").toFile())

In [26]:
DataFrame.readCSV(Paths.get("refined-data").resolve("epa").resolve("co2-per-1-mwh-per-fuel-type.csv").inputStream())
    .sortByDesc(output)
    .take(10)
    .plot {
        x.axis.name = "Type of Fuel"
        y.axis.name = "Total annual MWh"
        bars { 
            y(output)
            x(fuelDesc)
            //fillColor(output)
        }
        layout.size = (Pair(1000,450))
    }

In [27]:
val simplifiedDesc by column<String>("Fuel Type")

DataFrame.readCSV(Paths.get("refined-data").resolve("epa").resolve("co2-per-1-mwh-per-fuel-type.csv").inputStream())
    .sortByDesc(output)
    .add(simplifiedDesc){
        if(output.getValue(it) < 15_000_000){
            "Other"
        }else{
            fuelDesc.getValue(it)
        }
    }
    .plot {
        pie {
            slice(output)
            fillColor(simplifiedDesc)
            size = 40.0
            stroke = 0.5
            strokeColor = Color.WHITE
        }
        layout {
            style(Style.Void)
            size = Pair(650, 450)
            title = "Top 10 Total annual MWh per Fuel type"
        }
    }

In [28]:
val producesCo2 by column<String>("Produces CO2")
val totalPower by column<Double>()

DataFrame.readCSV(Paths.get("refined-data").resolve("epa").resolve("co2-per-1-mwh-per-fuel-type.csv").inputStream())
    .add(producesCo2){
        when(fuelDesc.getValue(it)){
            "Nuclear","Wind","Hydro","Solar","Geo-Thermal" -> "No"
            else -> "Yes"
        }
    }.groupBy(producesCo2)
    .aggregate{
        sum(output) into totalPower
    }
        
        .plot{
        pie {
            slice(totalPower)
            fillColor(producesCo2)
            size = 40.0
            stroke = 0.5
            strokeColor = Color.WHITE
        }
        layout {
            style(Style.Void)
            size = Pair(650, 450)
            title = "Clean vs Dirty power production"
        }
    }

In [29]:
val totalOutput = DataFrame.readCSV(Paths.get("refined-data").resolve("epa").resolve("co2-per-1-mwh-per-fuel-type.csv").inputStream()).sum(output)
val percentage by column<String>()

DataFrame.readCSV(Paths.get("refined-data").resolve("epa").resolve("co2-per-1-mwh-per-fuel-type.csv").inputStream())
    .add(producesCo2){
        when(fuelDesc.getValue(it)){
            "Nuclear","Wind","Hydro","Solar","Geo-Thermal" -> "No"
            else -> "Yes"
        }
    }.groupBy(producesCo2)
    .aggregate{
        sum(output) into totalPower
    }
    .add(percentage){
        "${totalPower.getValue(it) / totalOutput * 100} %"
    }


Produces CO2,totalPower,percentage
Yes,633215579.511,28.4568980867332 %
No,1591958708.92,71.5431019132668 %


In [30]:
val producesCo2 by column<String>("Produces CO2")
val totalPower by column<Double>()

DataFrame.readCSV(Paths.get("refined-data").resolve("epa").resolve("co2-per-1-mwh-per-fuel-type.csv").inputStream())
    
    .add(producesCo2){
        when(fuelDesc.getValue(it)){
            "Hydro","Solar" -> "Hydro/Solar"
            "Nuclear","Wind","Geo-Thermal" -> "No"
            else -> "Yes"
        }
    }.groupBy(producesCo2)
    .aggregate{
        sum(output) into totalPower
    }

    .plot{
        pie {
            slice(totalPower)
            fillColor(producesCo2)
            size = 40.0
            stroke = 0.5
            strokeColor = Color.WHITE
        }
        layout {
            style(Style.Void)
            size = Pair(650, 450)
            title = "Clean vs Dirty power production(Not counting wind and solar)"
        }
    }

How much CO2 is produced by manufacturing EV - https://climate.mit.edu/ask-mit/how-much-co2-emitted-manufacturing-batteries
Number of registered cars in USA - https://www.instituteforenergyresearch.org/fossil-fuels/gas-and-oil/new-registrations-of-gasoline-vehicles-are-still-growing-despite-the-ev-push/ 

In [31]:
val totalCO2 =
    DataFrame.readCSV(Paths.get("refined-data").resolve("epa").resolve("co2-per-1-mwh-per-fuel-type.csv").inputStream())
        .sum(co2)

println("Total CO2 for 2022 produced by enegergy production : ${totalCO2 / 1_000_000.0} million tons")
 

Total CO2 for 2022 produced by enegergy production : 621.9154703239997 million tons


https://evmagazine.com/top10/top-10-best-selling-electric-vehicles-in-the-us
Top 10 EVs sold in US:
1. Tesla Model Y : 60 - 81 KWh
2. Tesla Model 3 : 57.5 - 75 KWh
3. Ford Mustang Mach-E : 70 - 91 KWh 
4. Tesla Model S : 103 KWh
5. Chevrolet Bolt EV: 65KWh
6. Tesla Model X : 100 KWh
7. Hyundai IONIQ 5 : 58 KWh
8. Kia EV6: 58 - 84 KWh
9. VW ID.4: 62 - 82 KWh
10. Rivian R1T - 106 - 149 KWh

For illustration, the Tesla Model 3 holds an 80 kWh lithium-ion battery. 
CO2 emissions for manufacturing that battery would range between 2400 kg 
(almost two and a half metric tons) and 16,000 kg (16 metric tons). 

In [32]:
val totalGasPoweredCarsInUs: Double = 288_500_000.0
val totalDieselPoweredCarsInUs: Double = 9_900_000.0
val evBatteryConservativeCO2: Double = 2.4
val evBatteryPesimisticCO2: Double = 16.0
val evBatteryConservativeCO2perMVH = evBatteryConservativeCO2 / 80.0
val evBatteryPesimisticCO2perMVH = evBatteryPesimisticCO2 / 80.0

val optimistic = ((totalGasPoweredCarsInUs + totalDieselPoweredCarsInUs) * evBatteryConservativeCO2) / 1_000_000
val optimisticYearlyProductions = optimistic / (totalCO2 / 1_000_000)
val pesimistic = ((totalGasPoweredCarsInUs + totalDieselPoweredCarsInUs) * evBatteryPesimisticCO2) / 1_000_000
val pesimisticYearlyProductions = pesimistic / (totalCO2 / 1_000_000)

println("Total CO2(conservative) to replace all cars in US : $optimistic million tons or ${optimisticYearlyProductions} years worth of our yearly energy production CO2")
println("Total CO2(pesimistic) to replace all cars in US : $pesimistic million tons or ${pesimisticYearlyProductions} years worth  % of our yearly energy production CO2")



Total CO2(conservative) to replace all cars in US : 716.16 million tons or 1.1515391305942295 years worth of our yearly energy production CO2
Total CO2(pesimistic) to replace all cars in US : 4774.4 million tons or 7.676927537294864 years worth  % of our yearly energy production CO2


In [33]:
listOf(
    Pair(60.0,81.0),
    Pair(57.5,75.0)
).toDataFrame()

first,second
60.0,81.0
57.5,75.0
