In [None]:
import polars as pl
from benchmark_utils import mock_snakemake

if "snakemake" not in globals() or hasattr(snakemake, "mock"):  # noqa: F821
    snakemake = mock_snakemake("compute_capacity_factors")

In [26]:
MAPPING = {
    "Solar PV": "solar",
    "Wind": "wind",
    "CSP": "solar",
}
NON_DISPATCHABLE_TYPES = ["solar", "wind"]

In [15]:
gen = pl.read_parquet(snakemake.input.gen_capacity)
gen

bus,type,cost_per_MWh_linear,hourly_overhead_per_MW_capacity,gen_id,Pmax,PlantAndGenID,Lat,Lon
i64,str,f64,f64,u32,f64,list[str],f64,f64
1595,"""Natural Gas""",18.81994,3.876284,1001,213.3,"[""56476/C""]",38.0175,-121.7587
8357,"""Solar PV""",0.0,0.0,1053,12.929217,"[""58506/937"", ""58513/937"", … ""57007/1""]",33.566482,-114.91443
426,"""Biopower""",22.419113,5546.85123,1212,1.1,"[""57460/CL03"", ""57460/CL10"", … ""57460/CL00""]",34.1358,-118.1267
481,"""Natural Gas""",17.190283,33.302608,519,12.4,"[""10623/GEN2""]",34.0564,-118.2436
1699,"""Natural Gas""",12.647507,18.222295,472,84.8,"[""10213/GEN2"", ""10213/GEN1""]",33.9058,-118.4031
…,…,…,…,…,…,…,…,…
1239,"""Natural Gas""",18.702164,11.13917,915,54.0,"[""56026/STG""]",37.3767,-121.9508
1824,"""CSP""",0.0,0.0,496,45.252261,"[""10443/GEN1""]",35.014836,-117.565903
105,"""Hydropower""",15.622293,3.6540858,327,10.0,"[""534/1""]",38.850283,-120.381883
1606,"""Hydropower""",17.352919,1.120993,230,37.5,"[""401/1""]",37.51,-118.569167


In [None]:
df = pl.read_csv(snakemake.input.gen_dispatch, null_values=["NA", "#VALUE!"])
df = df.with_columns(pl.col("Date").str.to_datetime("%d-%m-%Y %H:%M"))
df = df.rename(lambda c: c.lower()).rename({"date": "datetime"})
df

datetime,hour,interval,load,solar,wind,net load,renewables,nuclear,large hydro,imports,generation,thermal,load less (generation+imports)
datetime[μs],i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2019-01-01 00:00:00,1,1,22320.49429,0.0,2862.309099,19458.18519,4662.44597,2272.38118,2366.01883,6275.014973,16047.98977,6747.14379,-2.510453
2019-01-01 01:00:00,2,1,21527.97042,0.0,2515.56231,19012.40811,4282.111911,2273.184635,2384.25768,6232.057172,15298.45224,6358.898014,-2.538992
2019-01-01 02:00:00,3,1,20836.4152,0.0,2351.416739,18484.99846,4130.582703,2274.356083,2160.2123,6007.834256,14833.19079,6268.039704,-4.609846
2019-01-01 03:00:00,4,1,20422.81794,0.0,2445.068525,17977.74942,4215.448265,2274.105854,1963.69373,5886.635995,14539.40932,6086.161471,-3.227375
2019-01-01 04:00:00,5,1,20263.46628,0.0,2313.330378,17950.1359,4088.050291,2273.530573,1965.07719,5883.068751,14382.30385,6055.645796,-1.906321
…,…,…,…,…,…,…,…,…,…,…,…,…,…
2019-12-31 19:00:00,20,1,26376.32956,0.0,363.493413,26012.83615,1852.591697,2271.264335,3327.33283,10898.02667,15471.02736,8019.838498,7.27553
2019-12-31 20:00:00,21,1,25430.99307,0.0,353.855947,25077.13712,1838.574723,2271.717485,3232.241044,10287.77333,15137.381,7794.847748,5.83874
2019-12-31 21:00:00,22,1,24520.57146,0.0,273.717488,24246.85397,1759.807844,2271.308664,3051.20003,9798.533333,14718.52334,7636.206803,3.514787
2019-12-31 22:00:00,23,1,23638.43047,0.0,224.392316,23414.03815,1660.73012,2273.928147,2556.704548,9517.6,14111.96041,7620.597595,8.87006


In [17]:
# get large hydro upper limit
max_energy_genearation = pl.DataFrame(
    {"type": ["hydro"], "limit": [df.get_column("large hydro").sum()]}
)
max_energy_genearation.write_parquet(snakemake.output.yearly_limit)
max_energy_genearation

type,limit
str,f64
"""hydro""",26350000.0


In [27]:
max_capacity = (
    gen.select(pl.col("type").replace(MAPPING), "Pmax")
    .group_by("type")
    .sum()
    .filter(pl.col("type").is_in(NON_DISPATCHABLE_TYPES))
)
max_capacity

type,Pmax
str,f64
"""solar""",12795.400002
"""wind""",5303.0


In [29]:
df2 = df.select(["datetime"] + NON_DISPATCHABLE_TYPES)
df2

datetime,solar,wind
datetime[μs],f64,f64
2019-01-01 00:00:00,0.0,2862.309099
2019-01-01 01:00:00,0.0,2515.56231
2019-01-01 02:00:00,0.0,2351.416739
2019-01-01 03:00:00,0.0,2445.068525
2019-01-01 04:00:00,0.0,2313.330378
…,…,…
2019-12-31 19:00:00,0.0,363.493413
2019-12-31 20:00:00,0.0,353.855947
2019-12-31 21:00:00,0.0,273.717488
2019-12-31 22:00:00,0.0,224.392316


In [30]:
df3 = df2
for gen_type in NON_DISPATCHABLE_TYPES:
    df3 = df3.with_columns(
        pl.col(gen_type)
        / max_capacity.filter(pl.col("type") == gen_type).get_column("Pmax")
    )
df3 = df3.unpivot(
    index=["datetime"], variable_name="type", value_name="capacity_factor"
)
df3

datetime,type,capacity_factor
datetime[μs],str,f64
2019-01-01 00:00:00,"""solar""",0.0
2019-01-01 01:00:00,"""solar""",0.0
2019-01-01 02:00:00,"""solar""",0.0
2019-01-01 03:00:00,"""solar""",0.0
2019-01-01 04:00:00,"""solar""",0.0
…,…,…
2019-12-31 19:00:00,"""wind""",0.068545
2019-12-31 20:00:00,"""wind""",0.066728
2019-12-31 21:00:00,"""wind""",0.051616
2019-12-31 22:00:00,"""wind""",0.042314


In [31]:
cf_by_hour = (
    df3.group_by(pl.col("datetime").dt.hour().alias("hour"), "type")
    .mean()
    .sort("hour")
    .drop("datetime")
)
cf_by_hour.plot.line(x="hour", y="capacity_factor", color="type").properties(
    title="Capacity Factors by Hour"
)

In [32]:
cf_by_hour = (
    df3.group_by(pl.col("datetime").dt.month().alias("month"), "type")
    .mean()
    .sort("month")
    .drop("datetime")
)
cf_by_hour.plot.line(x="month", y="capacity_factor", color="type").properties(
    title="Capacity Factors by Month"
)

In [33]:
dist_bucket_edges = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
df3.filter(pl.col("type") == "solar").get_column("capacity_factor").hist(
    bins=dist_bucket_edges, include_category=True
).plot.bar(x="category", y="count").properties(
    title="Solar Capacity Factor Distribution"
)

In [34]:
df3.filter(pl.col("type") == "wind").get_column("capacity_factor").hist(
    bins=dist_bucket_edges, include_category=True
).plot.bar(x="category", y="count").properties(
    title="Wind Capacity Factor Distribution"
)

In [35]:
df3.write_parquet(snakemake.output.vcf)