Raw data and code from https://drive.google.com/drive/folders/1Jsv34JjNo22NOCd26iBHMP80EPG0xmQE, linked from Casey Handmer's blog post [Solar and batteries for generic use cases](https://caseyhandmer.wordpress.com/2024/11/09/solar-and-batteries-for-generic-use-cases/)

In [1]:
from datetime import datetime
from pathlib import Path
from random import uniform

import altair as alt
from numba import njit
import pandas as pd
from vega_datasets import data

alt.data_transformers.enable("vegafusion")

folder = Path('./texas')
folder.exists()

True

In [2]:
actual = [fp for fp in folder.iterdir() if fp.stem.startswith('Actual')]
actual

[PosixPath('texas/Actual_33.55_-101.85_2006_DPV_37MW_5_Min.csv'),
 PosixPath('texas/Actual_36.25_-102.95_2006_UPV_84MW_5_Min.csv'),
 PosixPath('texas/Actual_33.55_-94.65_2006_UPV_136MW_5_Min.csv'),
 PosixPath('texas/Actual_32.05_-94.15_2006_UPV_95MW_5_Min.csv'),
 PosixPath('texas/Actual_35.85_-102.85_2006_UPV_17MW_5_Min.csv'),
 PosixPath('texas/Actual_34.55_-102.55_2006_UPV_168MW_5_Min.csv'),
 PosixPath('texas/Actual_34.95_-102.95_2006_UPV_67MW_5_Min.csv'),
 PosixPath('texas/Actual_33.05_-102.95_2006_UPV_67MW_5_Min.csv'),
 PosixPath('texas/Actual_35.05_-101.85_2006_DPV_27MW_5_Min.csv'),
 PosixPath('texas/Actual_34.65_-102.85_2006_UPV_34MW_5_Min.csv'),
 PosixPath('texas/Actual_32.65_-94.45_2006_UPV_54MW_5_Min.csv'),
 PosixPath('texas/Actual_33.65_-102.45_2006_UPV_168MW_5_Min.csv'),
 PosixPath('texas/Actual_32.45_-94.75_2006_DPV_35MW_5_Min.csv'),
 PosixPath('texas/Actual_32.05_-94.35_2006_UPV_27MW_5_Min.csv'),
 PosixPath('texas/Actual_33.25_-102.65_2006_UPV_17MW_5_Min.csv'),
 PosixPath('

In [3]:
states = alt.topo_feature(data.us_10m.url, feature='states')

usa_states = alt.Chart(states).mark_geoshape(
    fill='lightgray',
    stroke='white'
).project('albersUsa').properties(
    width=500,
    height=300
)
usa_states

In [4]:
def stem_to_lat_lon(s: str) -> tuple[float, float]:
    return tuple(float(x) for x in s.split('_')[1:3])

df = pd.DataFrame([stem_to_lat_lon(fp.stem) for fp in actual], columns=['lat', 'lon'])

usa_states + alt.Chart(df).mark_circle().encode(
    longitude='lon:Q',
    latitude='lat:Q',
    size=alt.value(10),
).project(
    'albersUsa'
).properties(width=500, height=300)

In [5]:
import re


def read_solar_array_data(fp: Path) -> pd.DataFrame:
    raw = pd.read_csv(fp)
    raw['local_time'] = raw['LocalTime'].apply(lambda s: datetime.strptime(s, '%m/%d/%y %H:%M'))
    raw['power'] = raw['Power(MW)']
    raw = raw.drop(columns=['LocalTime', 'Power(MW)'])
    size = int(re.search(r'(\d+)MW', fp.stem)[1])
    raw['power'] /= size  # standardize on 1MW
    return raw

sol = read_solar_array_data(folder / 'Actual_36.25_-102.95_2006_UPV_84MW_5_Min.csv')

In [6]:
sol.describe()

Unnamed: 0,local_time,power
count,105120,105120.0
mean,2006-07-02 11:57:29.999999744,0.190614
min,2006-01-01 00:00:00,0.0
25%,2006-04-02 05:58:45,0.0
50%,2006-07-02 11:57:30,0.0
75%,2006-10-01 17:56:15,0.388095
max,2006-12-31 23:55:00,0.992857
std,,0.270216


In [7]:
jan_1st = sol.loc[(sol['local_time'].dt.day == 1) & (sol['local_time'].dt.month == 1)]
alt.Chart(jan_1st).mark_line().encode(x='local_time', y='power', tooltip='local_time').properties(width=500)

In [8]:
from datetime import date
day_avg = sol.groupby(sol['local_time'].dt.time).mean().drop(columns=['local_time']).reset_index()
day_avg['local_time'] = day_avg['local_time'].apply(lambda t: datetime.combine(date.today(), t))
alt.Chart(day_avg).mark_line().encode(x='local_time', y='power').properties(title="Average power generation over a day")

In [9]:
from datetime import time

year_avg = sol.groupby(sol['local_time'].dt.date).mean().drop(columns=['local_time']).reset_index()
year_avg['local_time'] = year_avg['local_time'].apply(lambda t: datetime.combine(t, time(12, 0, 0)))
alt.Chart(year_avg).mark_line().encode(
    x='local_time', y='power'
).properties(height=500, width=800)

In [10]:
alt.Chart(sol).mark_line().encode(
    x='local_time', y='power'
).properties(
    width=800, height=600
)

In [11]:
days = sol['local_time'].dt.date.unique()[::10]
df = sol.loc[sol['local_time'].dt.date.isin(days)].assign(
    day=lambda x: x['local_time'].dt.date.apply(lambda y: datetime.combine(y, time(12, 0, 0))),
    time=lambda x: x['local_time'].dt.time.apply(lambda y: datetime.combine(date.today(), y)),
)
alt.Chart(df.sample(5000)).mark_line().encode(
    x="time:T",
    y="power:Q",
    color="day:N"
)

Time to implement the `uptime` function. We baseline a 1MW array, then set up numerical array with loads of different sizes and batteries of different sizes. If the battery is empty, load is off. If battery is full, no chargning can occur. We measure everything in 5 minute intervals (according to the data), and assume the battery starts full.
Battery state is measure in MWh stored, so in each interval we have to divide by 12.

Let's start with a naive variant (no vectorization)

In [12]:
24*365

8760

Casey's number here is `1.0523e7`, so I'm going to call that close enough.
There appears to be a bug in Casey's implementation. The check if the battery can be charged happens at the start of the interval. If the battery is almost full this will be negative, but then there might be too much solar excess, filling the battery beyond max.  However, if we leave the bug in the program, the number doesn't match quite as closely... So either this bug is not in Casey's program, or my program has another discrepancy.

I'm running the gradient descent for more steps with a much smaller amplitude since this code appears to be ~1000 times faster than the equivalent Mathematica. This takes care of some numerical infelicities.

That's really not that far from Casey's outcomes. I'm going to chalk the difference up to order of operations and floating point shenanigans.

In [13]:
from solar import read_solar_array_data, find_minimum_system_cost
from tqdm import tqdm
from pathlib import Path

folder = Path('./texas')
sol = read_solar_array_data(folder / 'Actual_36.25_-102.95_2006_UPV_84MW_5_Min.csv')

from tqdm import tqdm
results_raw = [
    find_minimum_system_cost(200e3, 200e3, 10e3*10**(0.1*i), sol['power'].tolist())
    for i in tqdm(range(40))
]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40/40 [00:19<00:00,  2.08it/s]


I'm not sure what's taking Casey's implementation so long? Let's call this a minute, then it's a factor 100? Factor 1000 if I enable compilation with `numba`.


In [14]:
results_raw

[((200000.0, 200000.0, 10000.0),
  (1.2269376949670299, 0.0, 1),
  (245387.53899340596, 0.0, 10000.0),
  (245387.53899340596, 255387.53899340596, 1093765.0719039543),
  (0.0, 0.8150373112685823, 0.0, 0.23349396095529376)),
 ((200000.0, 200000.0, 12589.254117941673),
  (1.2408388433862985, 0.0, 1),
  (248167.76867725968, 0.0, 12589.254117941673),
  (248167.76867725968, 260757.02279520134, 1104789.4684015538),
  (0.0, 0.8059064280023346, 0.0, 0.23602417497016268)),
 ((200000.0, 200000.0, 15848.931924611135),
  (1.2398868592076282, 0.0, 1),
  (247977.37184152563, 0.0, 15848.931924611135),
  (247977.37184152563, 263826.30376613676, 1118611.0778477148),
  (0.0, 0.806525202339081, 0.0, 0.2358516815994321)),
 ((200000.0, 200000.0, 19952.623149688796),
  (1.2726179576119685, 0.0, 1),
  (254523.5915223937, 0.0, 19952.623149688796),
  (254523.5915223937, 274476.2146720825, 1135587.406912799),
  (0.0, 0.7857817768629256, 0.0, 0.24170417266097718)),
 ((200000.0, 200000.0, 25118.8643150958),
  (1.2

In [15]:
labels = [
    'solar cost ($/MW)', 'battery cost ($/MW)', 'load cost ($/MW)',
    'array size (MW)', 'battery size (MWh)', 'load size (1 MW by definition)',
    'array cost ($)', 'battery cost ($)', 'load cost ($, normalized to 1 MW)',
    'total power system cost ($)', 'total system cost ($)', 'total cost per utilization ($)',
    'battery size relative to 1 MW array', 'load size relative to 1 MW array', 'annual battery utilization', 'annual load utilization'
]

results = pd.DataFrame([dict(zip(labels, (x for tup in r for x in tup))) for r in results_raw])
results.head()

Unnamed: 0,solar cost ($/MW),battery cost ($/MW),load cost ($/MW),array size (MW),battery size (MWh),load size (1 MW by definition),array cost ($),battery cost ($),"load cost ($, normalized to 1 MW)",total power system cost ($),total system cost ($),total cost per utilization ($),battery size relative to 1 MW array,load size relative to 1 MW array,annual battery utilization,annual load utilization
0,200000.0,200000.0,10000.0,1.226938,0.0,1,245387.538993,0.0,10000.0,245387.538993,255387.538993,1093765.0,0.0,0.815037,0.0,0.233494
1,200000.0,200000.0,12589.254118,1.240839,0.0,1,248167.768677,0.0,12589.254118,248167.768677,260757.022795,1104789.0,0.0,0.805906,0.0,0.236024
2,200000.0,200000.0,15848.931925,1.239887,0.0,1,247977.371842,0.0,15848.931925,247977.371842,263826.303766,1118611.0,0.0,0.806525,0.0,0.235852
3,200000.0,200000.0,19952.62315,1.272618,0.0,1,254523.591522,0.0,19952.62315,254523.591522,274476.214672,1135587.0,0.0,0.785782,0.0,0.241704
4,200000.0,200000.0,25118.864315,1.275928,0.0,1,255185.502346,0.0,25118.864315,255185.502346,280304.366661,1156915.0,0.0,0.783744,0.0,0.242286


In [16]:
subsystems = results.melt(id_vars='annual load utilization', value_vars=[
    'array cost ($)', 'battery cost ($)', 'load cost ($/MW)', 'total power system cost ($)', 'total system cost ($)', 
])
subsystems = subsystems.loc[subsystems['value'] <= 2e7]
alt.Chart(subsystems).mark_line().encode(
    x='annual load utilization:Q',
    y=alt.Y('value:Q', scale=alt.Scale(domain=[0, 2e7], clamp=True)),
    color='variable:N',
    tooltip='variable:N',
).properties(width=800, height=600)

In [17]:
subsystems = results.melt(id_vars="load cost ($/MW)", value_vars=[
    'array cost ($)', 'battery cost ($)', 'load cost ($/MW)', 'total power system cost ($)', 'total system cost ($)', 
    'total cost per utilization ($)',
])
subsystems = subsystems.loc[subsystems['value'] > 0]

alt.Chart(subsystems).mark_line().encode(
    x=alt.X('load cost ($/MW):Q', scale=alt.Scale(type='log', domain=[0.5e4, 1e8])),
    y=alt.Y('value:Q', scale=alt.Scale(type='log', domain=[0.5e3, 1e8])),
    color='variable:N',
    tooltip=('variable:N','value:Q'),
).properties(width=800, height=600)

In [18]:
alt.Chart(results).mark_line().encode(
    x=alt.X('load cost ($/MW)', scale=alt.Scale(type='log')),
    y='annual load utilization'
).properties(width=800, height=600)

In [19]:
results.describe()

Unnamed: 0,solar cost ($/MW),battery cost ($/MW),load cost ($/MW),array size (MW),battery size (MWh),load size (1 MW by definition),array cost ($),battery cost ($),"load cost ($, normalized to 1 MW)",total power system cost ($),total system cost ($),total cost per utilization ($),battery size relative to 1 MW array,load size relative to 1 MW array,annual battery utilization,annual load utilization
count,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0
mean,200000.0,200000.0,9654325.0,4.509199,5.878513,1.0,901839.7,1175703.0,9654325.0,2077542.0,11731870.0,13115380.0,0.759865,0.433217,0.42072,0.546435
std,0.0,0.0,18514650.0,3.917644,6.991034,0.0,783528.9,1398207.0,18514650.0,2151393.0,20230700.0,20053390.0,0.85943,0.283398,0.430788,0.321936
min,200000.0,200000.0,10000.0,1.226938,0.0,1.0,245387.5,0.0,10000.0,245387.5,255387.5,1093765.0,0.0,0.071457,0.0,0.233494
25%,200000.0,200000.0,94858.21,1.389814,0.0,1.0,277962.8,0.0,94858.21,277962.8,372821.0,1430908.0,0.0,0.139916,0.0,0.260509
50%,200000.0,200000.0,897164.1,2.178177,0.100642,1.0,435635.3,20128.43,897164.1,452913.5,1350078.0,4105373.0,0.046375,0.459877,0.24316,0.328339
75%,200000.0,200000.0,8457462.0,7.149443,14.367325,1.0,1429889.0,2873465.0,8457462.0,4303354.0,12760820.0,13650610.0,1.604379,0.71957,0.927048,0.934546
max,200000.0,200000.0,79432820.0,13.994393,15.884591,1.0,2798879.0,3176918.0,79432820.0,5975797.0,85408620.0,86390620.0,2.169885,0.815037,0.988118,0.988633


In [20]:
for col in ['array cost ($)', 'battery cost ($)', 'total power system cost ($)']:
    results[col.replace('$', '$/MWh')] = results[col] / (10 * 24 * 365 * results['annual load utilization'])

import numpy as np
min_util = results.loc[results['annual load utilization'].idxmin()]
uvals = np.arange(0.01, min_util['annual load utilization'], 0.001)
underutilized_solar = pd.Series(uvals, name='annual load utilization').to_frame()
underutilized_solar['variable'] = 'underutilized solar'
underutilized_solar['value'] = min_util['array cost ($)'] / (10 * 24 * 365 * uvals)

subsystems = results.melt(id_vars='annual load utilization', value_vars=[
    'array cost ($/MWh)', 'battery cost ($/MWh)', 'total power system cost ($/MWh)', 
])
subsystems = pd.concat([subsystems, underutilized_solar])
alt.Chart(subsystems).mark_line().encode(
    x='annual load utilization:Q',
    y=alt.Y('value:Q', scale=alt.Scale(domain=[0, 80])),
    color='variable:N',
).properties(width=800, height=600).interactive()