In [118]:
import pandas as pd

import numpy as np


np.random.seed(42) # for reproducibility


regions = ['North', 'South', 'East', 'West']

substations = [f"S{i:02}" for i in range(1, 51)]


df = pd.DataFrame({

'Substation': substations,

'Region': np.random.choice(regions, size=50),

'Billing_Cycle': pd.date_range(start="2023-01-01", periods=50, freq='M'),

'Energy_Consumed_MWh': np.random.randint(800, 2000, size=50),

'Energy_Supplied_MWh': np.random.randint(900, 2100, size=50),

'Outage_Hours': np.random.poisson(2.5, size=50),

'Peak_Load_MW': np.random.uniform(60, 150, size=50).round(2),

'Voltage_Fluctuation_%': np.random.uniform(0.5, 2.5, size=50).round(2),

'Maintenance_Visits': np.random.randint(0, 4, size=50),

'Power_Factor': np.random.uniform(0.85, 1.0, size=50).round(3)

})

  'Billing_Cycle': pd.date_range(start="2023-01-01", periods=50, freq='M'),


In [119]:
df

Unnamed: 0,Substation,Region,Billing_Cycle,Energy_Consumed_MWh,Energy_Supplied_MWh,Outage_Hours,Peak_Load_MW,Voltage_Fluctuation_%,Maintenance_Visits,Power_Factor
0,S01,East,2023-01-31,1486,2080,3,73.95,2.46,3,0.952
1,S02,West,2023-02-28,1757,914,4,148.37,0.85,1,0.922
2,S03,North,2023-03-31,1362,964,2,135.5,0.53,0,0.924
3,S04,East,2023-04-30,1631,1420,2,137.44,2.03,0,0.862
4,S05,East,2023-05-31,1954,2052,3,82.52,2.11,3,0.864
5,S06,West,2023-06-30,1446,1547,0,63.5,1.19,3,0.94
6,S07,North,2023-07-31,820,1986,2,87.29,1.43,2,0.933
7,S08,North,2023-08-31,1640,2062,3,108.34,1.8,0,0.882
8,S09,East,2023-09-30,966,1492,1,89.4,0.6,2,0.992
9,S10,South,2023-10-31,1187,1291,2,134.51,2.4,3,0.967


# Section A: groupby() — Understanding Patterns by Region or Time

## A1. (Basic) Total energy consumption per region

In [120]:
df.groupby('Region')['Energy_Consumed_MWh'].sum()

Region
East     17227
North    13036
South    16778
West     22967
Name: Energy_Consumed_MWh, dtype: int32

## A2. (Basic) Average outage hours by region

In [121]:
df.groupby('Region')['Outage_Hours'].mean()

Region
East     2.000000
North    2.900000
South    2.909091
West     2.687500
Name: Outage_Hours, dtype: float64

## A3. (Intermediate) Count how many substations had > 2 maintenance visits, grouped by region

In [122]:
df[df['Maintenance_Visits'] > 2].groupby('Region')['Substation'].nunique()

Region
East     5
North    1
South    4
West     5
Name: Substation, dtype: int64

## A4. (Intermediate) Average Power Factor by Region & Year

In [123]:
df['year'] = df['Billing_Cycle'].dt.year
average_power_factor = df.groupby(['Region', 'year'])['Power_Factor'].mean()

In [124]:
average_power_factor

Region  year
East    2023    0.921167
        2024    0.982667
        2026    0.927750
North   2023    0.913000
        2024    0.970500
        2025    0.951667
        2026    0.939000
South   2023    0.967000
        2024    0.976000
        2025    0.927400
        2026    0.900000
        2027    0.969000
West    2023    0.931000
        2024    0.931000
        2025    0.919250
        2026    0.910600
Name: Power_Factor, dtype: float64

## A5. (Complex) For each region, get substations that had the highest fluctuation in voltage

In [125]:
fluctuation = df.groupby(['Region','Substation'])['Voltage_Fluctuation_%'].max().reset_index()

result = fluctuation.loc[fluctuation.groupby('Region')['Voltage_Fluctuation_%'].idxmax()]

print(result)

   Region Substation  Voltage_Fluctuation_%
0    East        S01                   2.46
22  North        S39                   1.81
33  South        S50                   2.44
39   West        S19                   2.39


# Section B: agg() — Multiple Metrics in One Shot

## B1. (Basic) Min & Max Peak Load per region

In [126]:
df.groupby('Region').agg({'Peak_Load_MW': ['min','max']})

Unnamed: 0_level_0,Peak_Load_MW,Peak_Load_MW
Unnamed: 0_level_1,min,max
Region,Unnamed: 1_level_2,Unnamed: 2_level_2
East,68.26,147.26
North,63.7,142.37
South,64.29,134.51
West,63.5,148.37


## B2. (Basic) Total visits & average outage per region

In [127]:
df.groupby('Region').agg({'Maintenance_Visits':'sum', 'Outage_Hours':'mean'}).round(2)

Unnamed: 0_level_0,Maintenance_Visits,Outage_Hours
Region,Unnamed: 1_level_1,Unnamed: 2_level_1
East,22,2.0
North,11,2.9
South,17,2.91
West,24,2.69


## B3. (Intermediate) Aggregate voltage fluctuation & power factor

In [128]:
df.agg({
    'Voltage_Fluctuation_%': ['sum','min','max'],
    'Power_Factor': ['sum','min','max']
})

Unnamed: 0,Voltage_Fluctuation_%,Power_Factor
sum,79.44,46.74
min,0.53,0.858
max,2.46,0.999


## B4. (Intermediate) Create custom metric: loss percent = (Supplied - Consumed) / Supplied

In [129]:
def compute_loss_percent(group):
    loss = group['Energy_Supplied_MWh'].sum() - group['Energy_Consumed_MWh'].sum()
    return (loss / group['Energy_Supplied_MWh'].sum()) * 100

In [130]:
df.groupby('Region').apply(compute_loss_percent).round(2).rename("Loss_Percent (%)")

  df.groupby('Region').apply(compute_loss_percent).round(2).rename("Loss_Percent (%)")


Region
East     13.24
North    14.49
South     1.22
West      5.77
Name: Loss_Percent (%), dtype: float64

## B5. (Complex) Aggregated stats across multiple dimensions with renaming

In [131]:
result = df.groupby(['Region','year']).agg(
    Average_voltage_fluctuation = ('Voltage_Fluctuation_%','mean'),
    Max_energy_supplied = ('Energy_Supplied_MWh', 'max'),
    Number_of_maintenance_visits = ('Maintenance_Visits', 'sum'),
)
print(result)

             Average_voltage_fluctuation  Max_energy_supplied  \
Region year                                                     
East   2023                     1.748333                 2080   
       2024                     1.763333                 1679   
       2026                     1.222500                 1938   
North  2023                     1.253333                 2062   
       2024                     1.675000                 1956   
       2025                     1.096667                 1542   
       2026                     1.490000                 1694   
South  2023                     2.400000                 1291   
       2024                     2.165000                 1971   
       2025                     1.440000                 1704   
       2026                     1.020000                 1763   
       2027                     1.890000                 1648   
West   2023                     1.020000                 1547   
       2024              

# Section C: rolling() — Moving Window Analysis

## C1. (Basic) 3-period rolling average of Energy Consumed

In [132]:
df['Energy_consumed_3-period_rolling'] = df['Energy_Consumed_MWh'].rolling(window=3,min_periods=1).mean()

In [133]:
df

Unnamed: 0,Substation,Region,Billing_Cycle,Energy_Consumed_MWh,Energy_Supplied_MWh,Outage_Hours,Peak_Load_MW,Voltage_Fluctuation_%,Maintenance_Visits,Power_Factor,year,Energy_consumed_3-period_rolling
0,S01,East,2023-01-31,1486,2080,3,73.95,2.46,3,0.952,2023,1486.0
1,S02,West,2023-02-28,1757,914,4,148.37,0.85,1,0.922,2023,1621.5
2,S03,North,2023-03-31,1362,964,2,135.5,0.53,0,0.924,2023,1535.0
3,S04,East,2023-04-30,1631,1420,2,137.44,2.03,0,0.862,2023,1583.333333
4,S05,East,2023-05-31,1954,2052,3,82.52,2.11,3,0.864,2023,1649.0
5,S06,West,2023-06-30,1446,1547,0,63.5,1.19,3,0.94,2023,1677.0
6,S07,North,2023-07-31,820,1986,2,87.29,1.43,2,0.933,2023,1406.666667
7,S08,North,2023-08-31,1640,2062,3,108.34,1.8,0,0.882,2023,1302.0
8,S09,East,2023-09-30,966,1492,1,89.4,0.6,2,0.992,2023,1142.0
9,S10,South,2023-10-31,1187,1291,2,134.51,2.4,3,0.967,2023,1264.333333


## C2. (Basic) 2-period rolling max of Outage Hours

In [134]:
df['rolling_max'] = df['Outage_Hours'].rolling(window=2).max()

In [135]:
df

Unnamed: 0,Substation,Region,Billing_Cycle,Energy_Consumed_MWh,Energy_Supplied_MWh,Outage_Hours,Peak_Load_MW,Voltage_Fluctuation_%,Maintenance_Visits,Power_Factor,year,Energy_consumed_3-period_rolling,rolling_max
0,S01,East,2023-01-31,1486,2080,3,73.95,2.46,3,0.952,2023,1486.0,
1,S02,West,2023-02-28,1757,914,4,148.37,0.85,1,0.922,2023,1621.5,4.0
2,S03,North,2023-03-31,1362,964,2,135.5,0.53,0,0.924,2023,1535.0,4.0
3,S04,East,2023-04-30,1631,1420,2,137.44,2.03,0,0.862,2023,1583.333333,2.0
4,S05,East,2023-05-31,1954,2052,3,82.52,2.11,3,0.864,2023,1649.0,3.0
5,S06,West,2023-06-30,1446,1547,0,63.5,1.19,3,0.94,2023,1677.0,3.0
6,S07,North,2023-07-31,820,1986,2,87.29,1.43,2,0.933,2023,1406.666667,2.0
7,S08,North,2023-08-31,1640,2062,3,108.34,1.8,0,0.882,2023,1302.0,3.0
8,S09,East,2023-09-30,966,1492,1,89.4,0.6,2,0.992,2023,1142.0,3.0
9,S10,South,2023-10-31,1187,1291,2,134.51,2.4,3,0.967,2023,1264.333333,2.0


## C3. (Intermediate) 5-month rolling mean of Peak Load

In [136]:
df.set_index('Billing_Cycle', inplace=True)
df['rolling_mean_of_peak_load'] = df['Peak_Load_MW'].rolling(5).mean()

In [137]:
df

Unnamed: 0_level_0,Substation,Region,Energy_Consumed_MWh,Energy_Supplied_MWh,Outage_Hours,Peak_Load_MW,Voltage_Fluctuation_%,Maintenance_Visits,Power_Factor,year,Energy_consumed_3-period_rolling,rolling_max,rolling_mean_of_peak_load
Billing_Cycle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-01-31,S01,East,1486,2080,3,73.95,2.46,3,0.952,2023,1486.0,,
2023-02-28,S02,West,1757,914,4,148.37,0.85,1,0.922,2023,1621.5,4.0,
2023-03-31,S03,North,1362,964,2,135.5,0.53,0,0.924,2023,1535.0,4.0,
2023-04-30,S04,East,1631,1420,2,137.44,2.03,0,0.862,2023,1583.333333,2.0,
2023-05-31,S05,East,1954,2052,3,82.52,2.11,3,0.864,2023,1649.0,3.0,115.556
2023-06-30,S06,West,1446,1547,0,63.5,1.19,3,0.94,2023,1677.0,3.0,113.466
2023-07-31,S07,North,820,1986,2,87.29,1.43,2,0.933,2023,1406.666667,2.0,101.25
2023-08-31,S08,North,1640,2062,3,108.34,1.8,0,0.882,2023,1302.0,3.0,95.818
2023-09-30,S09,East,966,1492,1,89.4,0.6,2,0.992,2023,1142.0,3.0,86.21
2023-10-31,S10,South,1187,1291,2,134.51,2.4,3,0.967,2023,1264.333333,2.0,96.608


## C4. (Intermediate) 3-month centered rolling std of Voltage Fluctuation

In [138]:
df['Std_voltage_fluctuation'] = df['Voltage_Fluctuation_%'].rolling(window=3, center=True).std()

In [139]:
df

Unnamed: 0_level_0,Substation,Region,Energy_Consumed_MWh,Energy_Supplied_MWh,Outage_Hours,Peak_Load_MW,Voltage_Fluctuation_%,Maintenance_Visits,Power_Factor,year,Energy_consumed_3-period_rolling,rolling_max,rolling_mean_of_peak_load,Std_voltage_fluctuation
Billing_Cycle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-01-31,S01,East,1486,2080,3,73.95,2.46,3,0.952,2023,1486.0,,,
2023-02-28,S02,West,1757,914,4,148.37,0.85,1,0.922,2023,1621.5,4.0,,1.03436
2023-03-31,S03,North,1362,964,2,135.5,0.53,0,0.924,2023,1535.0,4.0,,0.790021
2023-04-30,S04,East,1631,1420,2,137.44,2.03,0,0.862,2023,1583.333333,2.0,,0.890019
2023-05-31,S05,East,1954,2052,3,82.52,2.11,3,0.864,2023,1649.0,3.0,115.556,0.50964
2023-06-30,S06,West,1446,1547,0,63.5,1.19,3,0.94,2023,1677.0,3.0,113.466,0.477214
2023-07-31,S07,North,820,1986,2,87.29,1.43,2,0.933,2023,1406.666667,2.0,101.25,0.3073
2023-08-31,S08,North,1640,2062,3,108.34,1.8,0,0.882,2023,1302.0,3.0,95.818,0.614519
2023-09-30,S09,East,966,1492,1,89.4,0.6,2,0.992,2023,1142.0,3.0,86.21,0.916515
2023-10-31,S10,South,1187,1291,2,134.51,2.4,3,0.967,2023,1264.333333,2.0,96.608,1.003809


## C5. (Complex) Rolling average consumption & delta from current

In [140]:
df['delta'] = df['Energy_consumed_3-period_rolling'].diff().shift(-1)
df['delta1'] = df['Energy_Consumed_MWh'] - df['Energy_consumed_3-period_rolling']

In [141]:
df

Unnamed: 0_level_0,Substation,Region,Energy_Consumed_MWh,Energy_Supplied_MWh,Outage_Hours,Peak_Load_MW,Voltage_Fluctuation_%,Maintenance_Visits,Power_Factor,year,Energy_consumed_3-period_rolling,rolling_max,rolling_mean_of_peak_load,Std_voltage_fluctuation,delta,delta1
Billing_Cycle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2023-01-31,S01,East,1486,2080,3,73.95,2.46,3,0.952,2023,1486.0,,,,135.5,0.0
2023-02-28,S02,West,1757,914,4,148.37,0.85,1,0.922,2023,1621.5,4.0,,1.03436,-86.5,135.5
2023-03-31,S03,North,1362,964,2,135.5,0.53,0,0.924,2023,1535.0,4.0,,0.790021,48.333333,-173.0
2023-04-30,S04,East,1631,1420,2,137.44,2.03,0,0.862,2023,1583.333333,2.0,,0.890019,65.666667,47.666667
2023-05-31,S05,East,1954,2052,3,82.52,2.11,3,0.864,2023,1649.0,3.0,115.556,0.50964,28.0,305.0
2023-06-30,S06,West,1446,1547,0,63.5,1.19,3,0.94,2023,1677.0,3.0,113.466,0.477214,-270.333333,-231.0
2023-07-31,S07,North,820,1986,2,87.29,1.43,2,0.933,2023,1406.666667,2.0,101.25,0.3073,-104.666667,-586.666667
2023-08-31,S08,North,1640,2062,3,108.34,1.8,0,0.882,2023,1302.0,3.0,95.818,0.614519,-160.0,338.0
2023-09-30,S09,East,966,1492,1,89.4,0.6,2,0.992,2023,1142.0,3.0,86.21,0.916515,122.333333,-176.0
2023-10-31,S10,South,1187,1291,2,134.51,2.4,3,0.967,2023,1264.333333,2.0,96.608,1.003809,-80.0,-77.333333


# Section D: expanding() — Cumulative Trend Analysis

## D1. (Basic) Cumulative mean of Energy Supplied

In [142]:
df['Cumulative_mean'] = df['Energy_Supplied_MWh'].expanding().mean()

In [143]:
df

Unnamed: 0_level_0,Substation,Region,Energy_Consumed_MWh,Energy_Supplied_MWh,Outage_Hours,Peak_Load_MW,Voltage_Fluctuation_%,Maintenance_Visits,Power_Factor,year,Energy_consumed_3-period_rolling,rolling_max,rolling_mean_of_peak_load,Std_voltage_fluctuation,delta,delta1,Cumulative_mean
Billing_Cycle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-01-31,S01,East,1486,2080,3,73.95,2.46,3,0.952,2023,1486.0,,,,135.5,0.0,2080.0
2023-02-28,S02,West,1757,914,4,148.37,0.85,1,0.922,2023,1621.5,4.0,,1.03436,-86.5,135.5,1497.0
2023-03-31,S03,North,1362,964,2,135.5,0.53,0,0.924,2023,1535.0,4.0,,0.790021,48.333333,-173.0,1319.333333
2023-04-30,S04,East,1631,1420,2,137.44,2.03,0,0.862,2023,1583.333333,2.0,,0.890019,65.666667,47.666667,1344.5
2023-05-31,S05,East,1954,2052,3,82.52,2.11,3,0.864,2023,1649.0,3.0,115.556,0.50964,28.0,305.0,1486.0
2023-06-30,S06,West,1446,1547,0,63.5,1.19,3,0.94,2023,1677.0,3.0,113.466,0.477214,-270.333333,-231.0,1496.166667
2023-07-31,S07,North,820,1986,2,87.29,1.43,2,0.933,2023,1406.666667,2.0,101.25,0.3073,-104.666667,-586.666667,1566.142857
2023-08-31,S08,North,1640,2062,3,108.34,1.8,0,0.882,2023,1302.0,3.0,95.818,0.614519,-160.0,338.0,1628.125
2023-09-30,S09,East,966,1492,1,89.4,0.6,2,0.992,2023,1142.0,3.0,86.21,0.916515,122.333333,-176.0,1613.0
2023-10-31,S10,South,1187,1291,2,134.51,2.4,3,0.967,2023,1264.333333,2.0,96.608,1.003809,-80.0,-77.333333,1580.8


## D2. (Basic) Expanding sum of maintenance visits

In [144]:
df['Sum_of_maintenance_visits'] = df['Maintenance_Visits'].expanding().sum()

In [145]:
df

Unnamed: 0_level_0,Substation,Region,Energy_Consumed_MWh,Energy_Supplied_MWh,Outage_Hours,Peak_Load_MW,Voltage_Fluctuation_%,Maintenance_Visits,Power_Factor,year,Energy_consumed_3-period_rolling,rolling_max,rolling_mean_of_peak_load,Std_voltage_fluctuation,delta,delta1,Cumulative_mean,Sum_of_maintenance_visits
Billing_Cycle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2023-01-31,S01,East,1486,2080,3,73.95,2.46,3,0.952,2023,1486.0,,,,135.5,0.0,2080.0,3.0
2023-02-28,S02,West,1757,914,4,148.37,0.85,1,0.922,2023,1621.5,4.0,,1.03436,-86.5,135.5,1497.0,4.0
2023-03-31,S03,North,1362,964,2,135.5,0.53,0,0.924,2023,1535.0,4.0,,0.790021,48.333333,-173.0,1319.333333,4.0
2023-04-30,S04,East,1631,1420,2,137.44,2.03,0,0.862,2023,1583.333333,2.0,,0.890019,65.666667,47.666667,1344.5,4.0
2023-05-31,S05,East,1954,2052,3,82.52,2.11,3,0.864,2023,1649.0,3.0,115.556,0.50964,28.0,305.0,1486.0,7.0
2023-06-30,S06,West,1446,1547,0,63.5,1.19,3,0.94,2023,1677.0,3.0,113.466,0.477214,-270.333333,-231.0,1496.166667,10.0
2023-07-31,S07,North,820,1986,2,87.29,1.43,2,0.933,2023,1406.666667,2.0,101.25,0.3073,-104.666667,-586.666667,1566.142857,12.0
2023-08-31,S08,North,1640,2062,3,108.34,1.8,0,0.882,2023,1302.0,3.0,95.818,0.614519,-160.0,338.0,1628.125,12.0
2023-09-30,S09,East,966,1492,1,89.4,0.6,2,0.992,2023,1142.0,3.0,86.21,0.916515,122.333333,-176.0,1613.0,14.0
2023-10-31,S10,South,1187,1291,2,134.51,2.4,3,0.967,2023,1264.333333,2.0,96.608,1.003809,-80.0,-77.333333,1580.8,17.0


## D3. (Intermediate) Expanding std of power factor

In [146]:
df['std_power_factor'] = df['Power_Factor'].expanding().std()

In [147]:
df

Unnamed: 0_level_0,Substation,Region,Energy_Consumed_MWh,Energy_Supplied_MWh,Outage_Hours,Peak_Load_MW,Voltage_Fluctuation_%,Maintenance_Visits,Power_Factor,year,Energy_consumed_3-period_rolling,rolling_max,rolling_mean_of_peak_load,Std_voltage_fluctuation,delta,delta1,Cumulative_mean,Sum_of_maintenance_visits,std_power_factor
Billing_Cycle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2023-01-31,S01,East,1486,2080,3,73.95,2.46,3,0.952,2023,1486.0,,,,135.5,0.0,2080.0,3.0,
2023-02-28,S02,West,1757,914,4,148.37,0.85,1,0.922,2023,1621.5,4.0,,1.03436,-86.5,135.5,1497.0,4.0,0.021213
2023-03-31,S03,North,1362,964,2,135.5,0.53,0,0.924,2023,1535.0,4.0,,0.790021,48.333333,-173.0,1319.333333,4.0,0.016773
2023-04-30,S04,East,1631,1420,2,137.44,2.03,0,0.862,2023,1583.333333,2.0,,0.890019,65.666667,47.666667,1344.5,4.0,0.037895
2023-05-31,S05,East,1954,2052,3,82.52,2.11,3,0.864,2023,1649.0,3.0,115.556,0.50964,28.0,305.0,1486.0,7.0,0.039965
2023-06-30,S06,West,1446,1547,0,63.5,1.19,3,0.94,2023,1677.0,3.0,113.466,0.477214,-270.333333,-231.0,1496.166667,10.0,0.038526
2023-07-31,S07,North,820,1986,2,87.29,1.43,2,0.933,2023,1406.666667,2.0,101.25,0.3073,-104.666667,-586.666667,1566.142857,12.0,0.036168
2023-08-31,S08,North,1640,2062,3,108.34,1.8,0,0.882,2023,1302.0,3.0,95.818,0.614519,-160.0,338.0,1628.125,12.0,0.035329
2023-09-30,S09,East,966,1492,1,89.4,0.6,2,0.992,2023,1142.0,3.0,86.21,0.916515,122.333333,-176.0,1613.0,14.0,0.042913
2023-10-31,S10,South,1187,1291,2,134.51,2.4,3,0.967,2023,1264.333333,2.0,96.608,1.003809,-80.0,-77.333333,1580.8,17.0,0.043212


## D4. (Intermediate) Expanding max of peak load

In [148]:
df['expanding_max_of_peak_load'] = df['Peak_Load_MW'].expanding().max()

In [149]:
df

Unnamed: 0_level_0,Substation,Region,Energy_Consumed_MWh,Energy_Supplied_MWh,Outage_Hours,Peak_Load_MW,Voltage_Fluctuation_%,Maintenance_Visits,Power_Factor,year,Energy_consumed_3-period_rolling,rolling_max,rolling_mean_of_peak_load,Std_voltage_fluctuation,delta,delta1,Cumulative_mean,Sum_of_maintenance_visits,std_power_factor,expanding_max_of_peak_load
Billing_Cycle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2023-01-31,S01,East,1486,2080,3,73.95,2.46,3,0.952,2023,1486.0,,,,135.5,0.0,2080.0,3.0,,73.95
2023-02-28,S02,West,1757,914,4,148.37,0.85,1,0.922,2023,1621.5,4.0,,1.03436,-86.5,135.5,1497.0,4.0,0.021213,148.37
2023-03-31,S03,North,1362,964,2,135.5,0.53,0,0.924,2023,1535.0,4.0,,0.790021,48.333333,-173.0,1319.333333,4.0,0.016773,148.37
2023-04-30,S04,East,1631,1420,2,137.44,2.03,0,0.862,2023,1583.333333,2.0,,0.890019,65.666667,47.666667,1344.5,4.0,0.037895,148.37
2023-05-31,S05,East,1954,2052,3,82.52,2.11,3,0.864,2023,1649.0,3.0,115.556,0.50964,28.0,305.0,1486.0,7.0,0.039965,148.37
2023-06-30,S06,West,1446,1547,0,63.5,1.19,3,0.94,2023,1677.0,3.0,113.466,0.477214,-270.333333,-231.0,1496.166667,10.0,0.038526,148.37
2023-07-31,S07,North,820,1986,2,87.29,1.43,2,0.933,2023,1406.666667,2.0,101.25,0.3073,-104.666667,-586.666667,1566.142857,12.0,0.036168,148.37
2023-08-31,S08,North,1640,2062,3,108.34,1.8,0,0.882,2023,1302.0,3.0,95.818,0.614519,-160.0,338.0,1628.125,12.0,0.035329,148.37
2023-09-30,S09,East,966,1492,1,89.4,0.6,2,0.992,2023,1142.0,3.0,86.21,0.916515,122.333333,-176.0,1613.0,14.0,0.042913,148.37
2023-10-31,S10,South,1187,1291,2,134.51,2.4,3,0.967,2023,1264.333333,2.0,96.608,1.003809,-80.0,-77.333333,1580.8,17.0,0.043212,148.37


## D5. (Complex) Flag if current outage is greater than expanding avg

In [153]:
df['ExpandingAvg'] = df['Outage_Hours'].expanding().mean()
df['flag'] = df['Outage_Hours'] > df['ExpandingAvg']

In [154]:
df

Unnamed: 0_level_0,Substation,Region,Energy_Consumed_MWh,Energy_Supplied_MWh,Outage_Hours,Peak_Load_MW,Voltage_Fluctuation_%,Maintenance_Visits,Power_Factor,year,...,rolling_mean_of_peak_load,Std_voltage_fluctuation,delta,delta1,Cumulative_mean,Sum_of_maintenance_visits,std_power_factor,expanding_max_of_peak_load,ExpandingAvg,flag
Billing_Cycle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-31,S01,East,1486,2080,3,73.95,2.46,3,0.952,2023,...,,,135.5,0.0,2080.0,3.0,,73.95,3.0,False
2023-02-28,S02,West,1757,914,4,148.37,0.85,1,0.922,2023,...,,1.03436,-86.5,135.5,1497.0,4.0,0.021213,148.37,3.5,True
2023-03-31,S03,North,1362,964,2,135.5,0.53,0,0.924,2023,...,,0.790021,48.333333,-173.0,1319.333333,4.0,0.016773,148.37,3.0,False
2023-04-30,S04,East,1631,1420,2,137.44,2.03,0,0.862,2023,...,,0.890019,65.666667,47.666667,1344.5,4.0,0.037895,148.37,2.75,False
2023-05-31,S05,East,1954,2052,3,82.52,2.11,3,0.864,2023,...,115.556,0.50964,28.0,305.0,1486.0,7.0,0.039965,148.37,2.8,True
2023-06-30,S06,West,1446,1547,0,63.5,1.19,3,0.94,2023,...,113.466,0.477214,-270.333333,-231.0,1496.166667,10.0,0.038526,148.37,2.333333,False
2023-07-31,S07,North,820,1986,2,87.29,1.43,2,0.933,2023,...,101.25,0.3073,-104.666667,-586.666667,1566.142857,12.0,0.036168,148.37,2.285714,False
2023-08-31,S08,North,1640,2062,3,108.34,1.8,0,0.882,2023,...,95.818,0.614519,-160.0,338.0,1628.125,12.0,0.035329,148.37,2.375,True
2023-09-30,S09,East,966,1492,1,89.4,0.6,2,0.992,2023,...,86.21,0.916515,122.333333,-176.0,1613.0,14.0,0.042913,148.37,2.222222,False
2023-10-31,S10,South,1187,1291,2,134.51,2.4,3,0.967,2023,...,96.608,1.003809,-80.0,-77.333333,1580.8,17.0,0.043212,148.37,2.2,False
