In [1]:
!pip install pandas numpy matplotlib scikit-learn datetime

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

In [3]:
data = pd.read_csv("./Outage_Dataset.csv")

In [4]:
data.describe()

Unnamed: 0,planned_outages_mw,maintenance_outages_mw,forced_outages_mw,total_outages_mw
count,10968.0,10968.0,10968.0,10968.0
mean,10724.540664,6095.297046,5368.670314,22188.508023
std,12431.394182,3536.099017,2944.803302,14372.848434
min,0.0,-1420.0,347.0,191.0
25%,189.0,3527.0,3098.75,11373.75
50%,5938.5,5416.0,4828.5,18326.5
75%,18015.25,8094.75,7061.0,28792.75
max,56522.0,21541.0,35844.0,73541.0


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10968 entries, 0 to 10967
Data columns (total 11 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   interval_start_local    10968 non-null  object
 1   interval_start_utc      10968 non-null  object
 2   interval_end_local      10968 non-null  object
 3   interval_end_utc        10968 non-null  object
 4   publish_time_local      10968 non-null  object
 5   publish_time_utc        10968 non-null  object
 6   region                  10968 non-null  object
 7   planned_outages_mw      10968 non-null  int64 
 8   maintenance_outages_mw  10968 non-null  int64 
 9   forced_outages_mw       10968 non-null  int64 
 10  total_outages_mw        10968 non-null  int64 
dtypes: int64(4), object(7)
memory usage: 942.7+ KB


In [6]:
data.shape

(10968, 11)

In [7]:
data

Unnamed: 0,interval_start_local,interval_start_utc,interval_end_local,interval_end_utc,publish_time_local,publish_time_utc,region,planned_outages_mw,maintenance_outages_mw,forced_outages_mw,total_outages_mw
0,2015-11-28 00:00:00-05:00,2015-11-28 05:00:00+00:00,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,2015-11-28 00:00:00-05:00,2015-11-28 05:00:00+00:00,Mid Atlantic - Dominion,8009,4490,1039,13538
1,2015-11-28 00:00:00-05:00,2015-11-28 05:00:00+00:00,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,2015-11-28 00:00:00-05:00,2015-11-28 05:00:00+00:00,PJM RTO,14059,7435,4828,26322
2,2015-11-28 00:00:00-05:00,2015-11-28 05:00:00+00:00,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,2015-11-28 00:00:00-05:00,2015-11-28 05:00:00+00:00,Western,6050,2945,3789,12784
3,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,2015-11-30 00:00:00-05:00,2015-11-30 05:00:00+00:00,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,Mid Atlantic - Dominion,7760,3230,1165,12155
4,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,2015-11-30 00:00:00-05:00,2015-11-30 05:00:00+00:00,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,PJM RTO,13810,7488,5995,27293
...,...,...,...,...,...,...,...,...,...,...,...
10963,2025-11-29 00:00:00-05:00,2025-11-29 05:00:00+00:00,2025-11-30 00:00:00-05:00,2025-11-30 05:00:00+00:00,2025-11-29 00:00:00-05:00,2025-11-29 05:00:00+00:00,PJM RTO,15290,4137,9664,29091
10964,2025-11-29 00:00:00-05:00,2025-11-29 05:00:00+00:00,2025-11-30 00:00:00-05:00,2025-11-30 05:00:00+00:00,2025-11-29 00:00:00-05:00,2025-11-29 05:00:00+00:00,Western,5522,2245,7173,14940
10965,2025-11-30 00:00:00-05:00,2025-11-30 05:00:00+00:00,2025-12-01 00:00:00-05:00,2025-12-01 05:00:00+00:00,2025-11-30 00:00:00-05:00,2025-11-30 05:00:00+00:00,Mid Atlantic - Dominion,10364,1682,2490,14536
10966,2025-11-30 00:00:00-05:00,2025-11-30 05:00:00+00:00,2025-12-01 00:00:00-05:00,2025-12-01 05:00:00+00:00,2025-11-30 00:00:00-05:00,2025-11-30 05:00:00+00:00,PJM RTO,15886,4323,9082,29291


# Adding one more field dayOfWeek in Outage_dataset

In [9]:
data['interval_start_local'] = pd.to_datetime(data['interval_start_local'], utc=True).dt.tz_convert(None)
data['dayOfWeek'] = data['interval_start_local'].dt.dayofweek
data['month'] = data['interval_start_local'].dt.month
data['isWeekend'] = data['dayOfWeek'].isin([5, 6]).astype(int)

In [10]:
data

Unnamed: 0,interval_start_local,interval_start_utc,interval_end_local,interval_end_utc,publish_time_local,publish_time_utc,region,planned_outages_mw,maintenance_outages_mw,forced_outages_mw,total_outages_mw,dayOfWeek,month,isWeekend
0,2015-11-28 05:00:00,2015-11-28 05:00:00+00:00,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,2015-11-28 00:00:00-05:00,2015-11-28 05:00:00+00:00,Mid Atlantic - Dominion,8009,4490,1039,13538,5,11,1
1,2015-11-28 05:00:00,2015-11-28 05:00:00+00:00,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,2015-11-28 00:00:00-05:00,2015-11-28 05:00:00+00:00,PJM RTO,14059,7435,4828,26322,5,11,1
2,2015-11-28 05:00:00,2015-11-28 05:00:00+00:00,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,2015-11-28 00:00:00-05:00,2015-11-28 05:00:00+00:00,Western,6050,2945,3789,12784,5,11,1
3,2015-11-29 05:00:00,2015-11-29 05:00:00+00:00,2015-11-30 00:00:00-05:00,2015-11-30 05:00:00+00:00,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,Mid Atlantic - Dominion,7760,3230,1165,12155,6,11,1
4,2015-11-29 05:00:00,2015-11-29 05:00:00+00:00,2015-11-30 00:00:00-05:00,2015-11-30 05:00:00+00:00,2015-11-29 00:00:00-05:00,2015-11-29 05:00:00+00:00,PJM RTO,13810,7488,5995,27293,6,11,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10963,2025-11-29 05:00:00,2025-11-29 05:00:00+00:00,2025-11-30 00:00:00-05:00,2025-11-30 05:00:00+00:00,2025-11-29 00:00:00-05:00,2025-11-29 05:00:00+00:00,PJM RTO,15290,4137,9664,29091,5,11,1
10964,2025-11-29 05:00:00,2025-11-29 05:00:00+00:00,2025-11-30 00:00:00-05:00,2025-11-30 05:00:00+00:00,2025-11-29 00:00:00-05:00,2025-11-29 05:00:00+00:00,Western,5522,2245,7173,14940,5,11,1
10965,2025-11-30 05:00:00,2025-11-30 05:00:00+00:00,2025-12-01 00:00:00-05:00,2025-12-01 05:00:00+00:00,2025-11-30 00:00:00-05:00,2025-11-30 05:00:00+00:00,Mid Atlantic - Dominion,10364,1682,2490,14536,6,11,1
10966,2025-11-30 05:00:00,2025-11-30 05:00:00+00:00,2025-12-01 00:00:00-05:00,2025-12-01 05:00:00+00:00,2025-11-30 00:00:00-05:00,2025-11-30 05:00:00+00:00,PJM RTO,15886,4323,9082,29291,6,11,1


In [11]:
data.tail(30)

Unnamed: 0,interval_start_local,interval_start_utc,interval_end_local,interval_end_utc,publish_time_local,publish_time_utc,region,planned_outages_mw,maintenance_outages_mw,forced_outages_mw,total_outages_mw,dayOfWeek,month,isWeekend
10938,2025-11-21 05:00:00,2025-11-21 05:00:00+00:00,2025-11-22 00:00:00-05:00,2025-11-22 05:00:00+00:00,2025-11-21 00:00:00-05:00,2025-11-21 05:00:00+00:00,Mid Atlantic - Dominion,16738,8604,3539,28881,4,11,0
10939,2025-11-21 05:00:00,2025-11-21 05:00:00+00:00,2025-11-22 00:00:00-05:00,2025-11-22 05:00:00+00:00,2025-11-21 00:00:00-05:00,2025-11-21 05:00:00+00:00,PJM RTO,29522,15154,9729,54405,4,11,0
10940,2025-11-21 05:00:00,2025-11-21 05:00:00+00:00,2025-11-22 00:00:00-05:00,2025-11-22 05:00:00+00:00,2025-11-21 00:00:00-05:00,2025-11-21 05:00:00+00:00,Western,12784,6550,6190,25524,4,11,0
10941,2025-11-22 05:00:00,2025-11-22 05:00:00+00:00,2025-11-23 00:00:00-05:00,2025-11-23 05:00:00+00:00,2025-11-22 00:00:00-05:00,2025-11-22 05:00:00+00:00,Mid Atlantic - Dominion,16159,4636,2644,23439,5,11,1
10942,2025-11-22 05:00:00,2025-11-22 05:00:00+00:00,2025-11-23 00:00:00-05:00,2025-11-23 05:00:00+00:00,2025-11-22 00:00:00-05:00,2025-11-22 05:00:00+00:00,PJM RTO,26115,11633,9558,47306,5,11,1
10943,2025-11-22 05:00:00,2025-11-22 05:00:00+00:00,2025-11-23 00:00:00-05:00,2025-11-23 05:00:00+00:00,2025-11-22 00:00:00-05:00,2025-11-22 05:00:00+00:00,Western,9956,6997,6914,23867,5,11,1
10944,2025-11-23 05:00:00,2025-11-23 05:00:00+00:00,2025-11-24 00:00:00-05:00,2025-11-24 05:00:00+00:00,2025-11-23 00:00:00-05:00,2025-11-23 05:00:00+00:00,Mid Atlantic - Dominion,15759,4592,2648,22999,6,11,1
10945,2025-11-23 05:00:00,2025-11-23 05:00:00+00:00,2025-11-24 00:00:00-05:00,2025-11-24 05:00:00+00:00,2025-11-23 00:00:00-05:00,2025-11-23 05:00:00+00:00,PJM RTO,23495,10346,10177,44018,6,11,1
10946,2025-11-23 05:00:00,2025-11-23 05:00:00+00:00,2025-11-24 00:00:00-05:00,2025-11-24 05:00:00+00:00,2025-11-23 00:00:00-05:00,2025-11-23 05:00:00+00:00,Western,7736,5754,7529,21019,6,11,1
10947,2025-11-24 05:00:00,2025-11-24 05:00:00+00:00,2025-11-25 00:00:00-05:00,2025-11-25 05:00:00+00:00,2025-11-24 00:00:00-05:00,2025-11-24 05:00:00+00:00,Mid Atlantic - Dominion,15276,4036,2600,21912,0,11,0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10968 entries, 0 to 10967
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   interval_start_local    10968 non-null  datetime64[ns]
 1   interval_start_utc      10968 non-null  object        
 2   interval_end_local      10968 non-null  object        
 3   interval_end_utc        10968 non-null  object        
 4   publish_time_local      10968 non-null  object        
 5   publish_time_utc        10968 non-null  object        
 6   region                  10968 non-null  object        
 7   planned_outages_mw      10968 non-null  int64         
 8   maintenance_outages_mw  10968 non-null  int64         
 9   forced_outages_mw       10968 non-null  int64         
 10  total_outages_mw        10968 non-null  int64         
 11  dayOfWeek               10968 non-null  int32         
 12  month                   10968 non-null  int32 

In [19]:
totalRows = data['interval_start_local'].count()