# Risk Model Y

## Load and Combine Data

In [None]:
import pandas as pd

# Read and combine CSV files
outage_2022 = pd.read_csv('eaglei_outage/eaglei_outages_with_events_2022.csv')
outage_2023 = pd.read_csv('eaglei_outage/eaglei_outages_with_events_2023.csv')
outage = pd.concat([outage_2022, outage_2023], ignore_index=True)

# Convert datetime column
outage['start_time'] = pd.to_datetime(outage['start_time'])

print(f"Combined data shape: {outage.shape}")
outage.head()

## Filter and Aggregate

In [None]:
# Filter for California severe weather (Dec 2022 - Mar 2023)
outage_filtered = outage[
    (outage['state_event'] == 'California') & 
    (outage['Event Type'] == 'Severe Weather') & 
    (outage['start_time'] >= '2022-12-01') & 
    (outage['start_time'] <= '2023-03-31')
]

# Aggregate by date and county
outage_agg = outage_filtered.groupby(
    [outage_filtered['start_time'].dt.date, 'county']
).agg({
    'duration': 'sum',
    'max_customers': 'max'
}).reset_index()

outage_agg['risk_score'] = 1
outage_agg.head()

## Export Results

In [None]:
outage_agg.to_csv('risk_model_y.csv', index=False)
print(f"Saved {len(outage_agg)} records")
outage_agg