In [None]:
!python run.py all

-------Running run.py project-------
Merging raw datasets...
GIS Data loaded with 223 rows and 27 columns.
Station Summary Data loaded with 216 rows and 7 columns.
Windspeed Data loaded with 38664 rows and 4 columns.
Data processing completed. Merged weather data saved.
Preview of merged weather data:
Calculating PSPS probabilities...
PSPS probabilities calculated for 223 stations.
Preview of PSPS probabilities:
   objectid weatherstationcode  ... above_threshold_count  PSPS_probability
0         1                CBD  ...                   2.0          0.011696
1         2                CCR  ...                  25.0          0.139665
2         3                CES  ...                  10.0          0.057803
3       208                ROV  ...                   4.0          0.081633
4         5                CHO  ...                   0.0          0.000000

[5 rows x 30 columns]
Filtering top PSPS stations...
Filtered 1 high-risk PSPS stations.
Preview of top PSPS stations:
    obje

In [3]:
import numpy as np
import os
import json
import pandas as pd
from etl import load_data, save_data, merge_weather_data
from psps import calculate_psps_probability, calculate_combined_count
from top_psps import filter_top_psps_stations
from data_vri_conductor import merge_psps_conductor_vri, process_conductor_data
from span_analysis import (
    formSpanNet,
    unique_upstream_weather_stations_to_span,
    calculate_span_PSPS_probability,
    upstream_weather_stations_to_span,
    getUpstream,
    calculate_annual_customer_count
)

with open('data-params.json', 'r') as fh:
    config = json.load(fh)
    
gis_path = config['data_sources']['gis_weatherstation']
station_summary_path = config['data_sources']['station_summary_snapshot']
windspeed_path = config['data_sources']['windspeed_snapshot']

# Merge raw datasets
merged_data = merge_weather_data(gis_path, station_summary_path, windspeed_path)

print("Data processing completed. Merged weather data saved.")
print("Preview of merged weather data:")
merged_data.head()

GIS Data loaded with 223 rows and 27 columns.
Station Summary Data loaded with 216 rows and 7 columns.
Windspeed Data loaded with 38664 rows and 4 columns.
Data processing completed. Merged weather data saved.
Preview of merged weather data:


Unnamed: 0,objectid,weatherstationcode,weatherstationname,scadartuid,structureid,nwszone,district,thomasbrospagegrid,constructionstatus,creationuser,...,vri,alert,max_gust,99th,95th,snapshot_date_y,date,wind_speed,snapshot_date,exceed_threshold
0,1,CBD,Carlsbad,5158.0,P124785,Coastal-243,6.0,1126-G1,A,seu_gis_elec,...,M,35,39,33,25,2023-08-02,12/10/2012,11.0,8/2/2023,0
1,1,CBD,Carlsbad,5158.0,P124785,Coastal-243,6.0,1126-G1,A,seu_gis_elec,...,M,35,39,33,25,2023-08-02,12/19/2012,16.0,8/2/2023,0
2,1,CBD,Carlsbad,5158.0,P124785,Coastal-243,6.0,1126-G1,A,seu_gis_elec,...,M,35,39,33,25,2023-08-02,12/20/2012,17.0,8/2/2023,0
3,1,CBD,Carlsbad,5158.0,P124785,Coastal-243,6.0,1126-G1,A,seu_gis_elec,...,M,35,39,33,25,2023-08-02,12/25/2012,13.0,8/2/2023,0
4,1,CBD,Carlsbad,5158.0,P124785,Coastal-243,6.0,1126-G1,A,seu_gis_elec,...,M,35,39,33,25,2023-08-02,4/18/2013,28.0,8/2/2023,0


In [None]:

gis_weather_station = load_data(config['data_sources']['gis_weatherstation'])

weather_station_psps = calculate_psps_probability(
    merged_data, gis_weather_station, config['parameters']['psps_condition']
)
print(f"PSPS probabilities calculated for {len(weather_station_psps)} stations.")
print("Preview of PSPS probabilities:")
print(weather_station_psps.head())

In [None]:

top_stations = filter_top_psps_stations(weather_station_psps, config['parameters']['min_alert_threshold'])
print(f"Filtered {len(top_stations)} high-risk PSPS stations.")
print("Preview of top PSPS stations:")
print(top_stations.head())

In [None]:

vri_path = config['data_sources']['src_vri_snapshot']
conductor_path = config['data_sources']['dev_wings_agg_span']
conductor_vri_psps = merge_psps_conductor_vri(conductor_path, vri_path, weather_station_psps)

print(f"Merged VRI and conductor data saved. Total rows: {len(conductor_vri_psps)}.")
print("Preview of merged VRI-conductor data:")
print(conductor_vri_psps.head())

In [None]:

dev_wings_agg_span = process_conductor_data(config['data_sources']['dev_wings_agg_span'])
# G, _ = formSpanNet(merged_data, dev_wings_agg_span)
# network_data = {"nodes": list(G.nodes), "edges": list(G.edges)}
# output_path = config['output_paths']['processed_data']
# with open(f"{output_path}/network_graph.json", 'w') as json_file:
#     json.dump(network_data, json_file)
# print(f"Network graph saved with {len(G.nodes)} nodes and {len(G.edges)} edges.")

In [None]:


G, merged_station_psps_spans = formSpanNet(merged_data, dev_wings_agg_span)
uniqueUpsteamWStoSpan = unique_upstream_weather_stations_to_span(dev_wings_agg_span, G, merged_station_psps_spans)
sorted_unique_upstream_weather_station_to_span = dict(sorted(uniqueUpsteamWStoSpan.items(), key=lambda item: len(item[1]), reverse=True))

first_key, first_value = next(iter(sorted_unique_upstream_weather_station_to_span.items()))
highest_weather_station_count = len(first_value)
greatest_weather_station_impact = [span for span, station in sorted_unique_upstream_weather_station_to_span.items() 
                                if len(station) == highest_weather_station_count]

print("Highest Num of Stations Per Span: ", highest_weather_station_count)
print("Example Span: ", greatest_weather_station_impact[0])
print("Num of Spans with max Stations: ", len(greatest_weather_station_impact))

In [None]:
merged_data.head()

In [None]:

def calculate_span_PSPS_probability(associated_stations):
    overlap_wind_speeds = pd.DataFrame()
    for index, station in enumerate(associated_stations):
        filtered_df = merged_data[merged_data['weatherstationcode'] == station[0]][['date', 'exceed_threshold']].rename(
            columns={'date': f"date_{index}", 'exceed_threshold':f"exceed_threshold_{index}"}
        )
        if index == 0:
            overlap_wind_speeds = filtered_df
            continue
        
        overlap_wind_speeds = overlap_wind_speeds.merge(filtered_df, left_on=f"date_{index-1}", right_on=f"date_{index}")

    if overlap_wind_speeds.shape[0] == 0:
        return 0
        
    exceed_threshold_columns = [col for col in overlap_wind_speeds.columns if col.startswith('exceed_threshold')]
    overlap_wind_speeds['all_exceed_threshold'] = overlap_wind_speeds[exceed_threshold_columns].any(axis=1).astype(int)

    above_threshold_count = np.sum(overlap_wind_speeds['all_exceed_threshold'])
    wind_speed_count = overlap_wind_speeds.shape[0]
    
    return above_threshold_count/wind_speed_count


In [None]:
# windspeed_snapshot = load_data(windspeed_path)
# windspeed_snapshot = windspeed_snapshot[(windspeed_snapshot['wind_speed'] < max(windspeed_snapshot['wind_speed']))]
# windspeed_snapshot.sort_values(by='wind_speed', ascending=False)

# merged_station_wind_speed = merged_weather_station.merge(windspeed_snapshot, left_on='weatherstationcode', right_on='station', 
#                                                          how='inner').drop(columns=['station'])

# merged_station_wind_speed['exceed_threshold'] = merged_station_wind_speed.apply(lambda row: int(row['wind_speed'] > row['alert']), axis=1)
# merged_station_wind_speed.head()



In [None]:
new_span_probabilities = dict()

for globalid in uniqueUpsteamWStoSpan:
    new_span_probabilities[globalid] = calculate_span_PSPS_probability(uniqueUpsteamWStoSpan[globalid])
    #print(globalid, new_span_probabilities[globalid])

In [None]:
span_with_new_prob = dict(sorted(new_span_probabilities.items(), key=lambda item: item[1], reverse=True))
span_with_new_prob_df = pd.DataFrame(list(span_with_new_prob.items()), columns=['span', 'probability'])
span_with_new_prob_df

In [None]:
#Segment
dev_wings_agg_span_with_probabilities = dev_wings_agg_span.merge(span_with_new_prob_df, left_on='globalid', right_on='span')

segment_data = dev_wings_agg_span_with_probabilities.groupby('parent_feederid').agg(
    span_count=('globalid', 'count'),
    segment_psps_value=('probability', 'mean'),
    sum_of_customers=('cust_total', 'sum')
)
segment_data[segment_data.index == '222']

In [None]:
#Circuit
circuit_data = dev_wings_agg_span_with_probabilities.groupby('upstreamardfacilityid').agg(
    span_count=('globalid', 'count'),
    circuit_psps_value=('probability', 'mean'),
    sum_of_customers=('cust_total', 'sum')
)

circuit_data[circuit_data.index == '100-1122R']

In [None]:
def upstream_weather_stations_to_span(dev_wings_agg_span, G, merged_station_psps_spans):
    
    upstream_weather_station_to_span = dict()
    span_weather_station = dict()
    
    for row in merged_station_psps_spans.itertuples(index=True, name='Pandas'):
        span_weather_station[row.globalid] = (row.station, row.PSPS_probability, row.above_threshold_count, row.wind_speed_count)
    
    for row in dev_wings_agg_span.itertuples(index=True, name='Pandas'):
        upstream_spans = getUpstream(G, row.globalid, 'dfs') + [row.globalid]
    
        weather_stations = [span_weather_station[span] for span in upstream_spans if span in span_weather_station]
        upstream_weather_station_to_span[row.globalid] = weather_stations
    return upstream_weather_station_to_span

In [None]:
upstream_weather_station_to_span = upstream_weather_stations_to_span(dev_wings_agg_span, G, merged_station_psps_spans)

In [None]:
windspeed_snapshot_copy = load_data(windspeed_path)
windspeed_snapshot_copy["date"] = pd.to_datetime(windspeed_snapshot_copy["date"])
windspeed_snapshot_copy["year"] = windspeed_snapshot_copy["date"].dt.year

station_stats = windspeed_snapshot_copy.groupby("station")["year"].agg(
    duration=lambda x: x.max() - x.min(),
    count="count"
)

station_stats["expected_fire_per_year"] = station_stats.apply(
    lambda row: row["count"] / row["duration"] if row["duration"] > 0 else 0, axis=1
)

expected_fire_per_year_per_station = station_stats["expected_fire_per_year"].to_dict()

expected_fire_per_year_per_span = dict()

for globalid, stations_per_span in upstream_weather_station_to_span.items():
    
    values = [expected_fire_per_year_per_station[tup[0]] for tup in stations_per_span]

    if len(values) > 0:
        expected_fire_per_year_average = np.mean(values)
    else:
        expected_fire_per_year_average = 0

    expected_fire_per_year_per_span[globalid] = expected_fire_per_year_average

expected_fire_per_year_per_span_df = pd.DataFrame(list(expected_fire_per_year_per_span.items()), columns=['span', 'expected_fire'])
dev_wings_agg_span_with_probabilities_expected_fire = dev_wings_agg_span_with_probabilities.merge(expected_fire_per_year_per_span_df, left_on='globalid',
                                                                                                  right_on='span')

In [None]:
def calculate_annual_customer_count(row):
    annual_probability = 1 - (1 - row['probability']) ** row['expected_fire']
    annual_customers_affected = annual_probability * row['cust_total']
    return annual_customers_affected

dev_wings_agg_span_with_probabilities_expected_fire['annual_cust_total'] = dev_wings_agg_span_with_probabilities_expected_fire.apply(
    calculate_annual_customer_count, axis=1
)

segment_annual_customer = dev_wings_agg_span_with_probabilities_expected_fire.groupby('parent_feederid')[['annual_cust_total']].sum()

feederid_222 = segment_annual_customer[segment_annual_customer.index == '222']
feederid_222

In [None]:
annual_customer_affected = feederid_222.iloc[0]['annual_cust_total']
next_10_years = annual_customer_affected * 10

print(f"Annual customers affected: {annual_customer_affected}")
print(f"Expected Customers Affected in the next 10 years: {np.floor(next_10_years)}")