# Energy Use Analysis

# Introduction
This is data from my energy company, ComEd from the past year. 10_22_2022 to 10_22_2023
## Goals
* become familiar with the columns in the dataset

## Source
Data from the [My Green Button] (https://secure.comed.com/MyAccount/MyBillUsage/pages/secure/GreenButtonConnectDownloadMyData.aspx) webpage on the ComEd website.

In [59]:
import pandas as pd
import numpy as np
energy_df = pd.read_csv(filepath_or_buffer="energy_use_10_22_22_to_10_22_23.csv", header=4)

In [60]:
print(energy_df.columns)
energy_df.head()

Index(['TYPE', 'DATE', 'START TIME', 'END TIME', 'USAGE', 'UNITS', 'COST',
       'NOTES'],
      dtype='object')


Unnamed: 0,TYPE,DATE,START TIME,END TIME,USAGE,UNITS,COST,NOTES
0,Electric usage,2022-10-22,00:00,00:29,0.11,kWh,$0.01,
1,Electric usage,2022-10-22,00:30,00:59,0.13,kWh,$0.02,
2,Electric usage,2022-10-22,01:00,01:29,0.09,kWh,$0.01,
3,Electric usage,2022-10-22,01:30,01:59,0.2,kWh,$0.02,
4,Electric usage,2022-10-22,02:00,02:29,0.1,kWh,$0.01,


In [61]:
energy_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17520 entries, 0 to 17519
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   TYPE        17520 non-null  object 
 1   DATE        17520 non-null  object 
 2   START TIME  17520 non-null  object 
 3   END TIME    17520 non-null  object 
 4   USAGE       17520 non-null  float64
 5   UNITS       17520 non-null  object 
 6   COST        17520 non-null  object 
 7   NOTES       0 non-null      float64
dtypes: float64(2), object(6)
memory usage: 1.1+ MB


In [62]:
# Printing all the unique values of uninteresting columns
print([energy_df['TYPE'].unique(),
energy_df['UNITS'].unique(), 
energy_df['NOTES'].unique()])

[array(['Electric usage'], dtype=object), array(['kWh'], dtype=object), array([nan])]


In [63]:
# dropping columns with 0 variance
energy_df_clean = energy_df.drop(['TYPE', 'UNITS', 'NOTES'], axis='columns')
energy_df_clean.head()

Unnamed: 0,DATE,START TIME,END TIME,USAGE,COST
0,2022-10-22,00:00,00:29,0.11,$0.01
1,2022-10-22,00:30,00:59,0.13,$0.02
2,2022-10-22,01:00,01:29,0.09,$0.01
3,2022-10-22,01:30,01:59,0.2,$0.02
4,2022-10-22,02:00,02:29,0.1,$0.01


In [64]:
energy_df_clean['START_DT'] = pd.to_datetime(energy_df['START TIME'], infer_datetime_format=True)
energy_df_clean.head()

Unnamed: 0,DATE,START TIME,END TIME,USAGE,COST,START_DT
0,2022-10-22,00:00,00:29,0.11,$0.01,2023-10-24 00:00:00
1,2022-10-22,00:30,00:59,0.13,$0.02,2023-10-24 00:30:00
2,2022-10-22,01:00,01:29,0.09,$0.01,2023-10-24 01:00:00
3,2022-10-22,01:30,01:59,0.2,$0.02,2023-10-24 01:30:00
4,2022-10-22,02:00,02:29,0.1,$0.01,2023-10-24 02:00:00


In [65]:
# converted the DATE, START, END columns to datetime format
energy_df_clean['DATE_TIME'] = energy_df_clean['DATE'] + ' ' + energy_df_clean['START TIME']
energy_df_clean['DATE_START'] = pd.to_datetime(energy_df_clean['DATE_TIME'], infer_datetime_format=True)

energy_df_clean['DATE_TIME'] = energy_df_clean['DATE'] + ' ' + energy_df_clean['END TIME']
energy_df_clean['DATE_END'] = pd.to_datetime(energy_df_clean['DATE_TIME'], infer_datetime_format=True)
energy_df_clean.head()

Unnamed: 0,DATE,START TIME,END TIME,USAGE,COST,START_DT,DATE_TIME,DATE_START,DATE_END
0,2022-10-22,00:00,00:29,0.11,$0.01,2023-10-24 00:00:00,2022-10-22 00:29,2022-10-22 00:00:00,2022-10-22 00:29:00
1,2022-10-22,00:30,00:59,0.13,$0.02,2023-10-24 00:30:00,2022-10-22 00:59,2022-10-22 00:30:00,2022-10-22 00:59:00
2,2022-10-22,01:00,01:29,0.09,$0.01,2023-10-24 01:00:00,2022-10-22 01:29,2022-10-22 01:00:00,2022-10-22 01:29:00
3,2022-10-22,01:30,01:59,0.2,$0.02,2023-10-24 01:30:00,2022-10-22 01:59,2022-10-22 01:30:00,2022-10-22 01:59:00
4,2022-10-22,02:00,02:29,0.1,$0.01,2023-10-24 02:00:00,2022-10-22 02:29,2022-10-22 02:00:00,2022-10-22 02:29:00


In [66]:
# created a 
energy_df_clean['USAGE_DUR'] = energy_df_clean['DATE_END'] - energy_df_clean['DATE_START']

energy_df_clean.head()

Unnamed: 0,DATE,START TIME,END TIME,USAGE,COST,START_DT,DATE_TIME,DATE_START,DATE_END,USAGE_DUR
0,2022-10-22,00:00,00:29,0.11,$0.01,2023-10-24 00:00:00,2022-10-22 00:29,2022-10-22 00:00:00,2022-10-22 00:29:00,0 days 00:29:00
1,2022-10-22,00:30,00:59,0.13,$0.02,2023-10-24 00:30:00,2022-10-22 00:59,2022-10-22 00:30:00,2022-10-22 00:59:00,0 days 00:29:00
2,2022-10-22,01:00,01:29,0.09,$0.01,2023-10-24 01:00:00,2022-10-22 01:29,2022-10-22 01:00:00,2022-10-22 01:29:00,0 days 00:29:00
3,2022-10-22,01:30,01:59,0.2,$0.02,2023-10-24 01:30:00,2022-10-22 01:59,2022-10-22 01:30:00,2022-10-22 01:59:00,0 days 00:29:00
4,2022-10-22,02:00,02:29,0.1,$0.01,2023-10-24 02:00:00,2022-10-22 02:29,2022-10-22 02:00:00,2022-10-22 02:29:00,0 days 00:29:00


In [71]:
unique_durs = (energy_df_clean['USAGE_DUR']).unique()
# unique_durs = (energy_df_clean['USAGE_DUR'].dt.seconds.astype(str) + ' seconds').unique()
for d in unique_durs:
    print(d)
# print(unique_durs)

1740000000000 nanoseconds
80940000000000 nanoseconds


In [82]:
weird_usage = energy_df_clean[energy_df_clean['USAGE_DUR']==unique_durs[1]]
weird_usage.head()

Unnamed: 0,DATE,START TIME,END TIME,USAGE,COST,START_DT,DATE_TIME,DATE_START,DATE_END,USAGE_DUR
723,2022-11-06,01:30,23:59,2.03,$0.26,2023-10-24 01:30:00,2022-11-06 23:59,2022-11-06 01:30:00,2022-11-06 23:59:00,0 days 22:29:00


In [83]:
print(weird_usage['DATE_START'])

723   2022-11-06 01:30:00
Name: DATE_START, dtype: datetime64[ns]


In [87]:
energy_df_clean.set_index('DATE_START')
wd = weird_usage['DATE_START']
print(wd)
energy_df_clean.drop(index=weird_usage['DATE_START'], inplace=True)
print((energy_df_clean['USAGE_DUR']).unique())

723   2022-11-06 01:30:00
Name: DATE_START, dtype: datetime64[ns]


KeyError: "[Timestamp('2022-11-06 01:30:00')] not found in axis"

# Column / header info
* **col0**: description0
* **col1**: description1

# Initial Observations
* Observation0
* Observation1

In [None]:
df.info()

In [None]:
df.describe

# Cleaning

# Ideas for Improvement
* 
* 