In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from statsmodels.tsa.seasonal import seasonal_decompose


This dataset provides comprehensive monthly and annual electric power operational data from the U.S. Energy Information Administration (EIA). It covers the period from 2015 to 2024 and includes detailed metrics on electric power generation, consumption, costs, and emissions. The dataset is designed to support analysis and research into the efficiency and environmental impact of electric power operations across various states and sectors in the United States.

In [None]:
df = pd.read_csv('electricity_data.csvcsv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   period               5000 non-null   object
 1   location             5000 non-null   object
 2   stateDescription     5000 non-null   object
 3   sectorid             5000 non-null   int64 
 4   sectorDescription    5000 non-null   object
 5   fueltypeid           5000 non-null   object
 6   fuelTypeDescription  5000 non-null   object
dtypes: int64(1), object(6)
memory usage: 273.6+ KB


In [4]:
df.head()

Unnamed: 0,period,location,stateDescription,sectorid,sectorDescription,fueltypeid,fuelTypeDescription
0,2024-05,MI,Michigan,2,IPP Non-CHP,NGO,natural gas & other gases
1,2024-05,LA,Louisiana,2,IPP Non-CHP,NGO,natural gas & other gases
2,2024-05,LA,Louisiana,2,IPP Non-CHP,NG,natural gas
3,2024-05,LA,Louisiana,2,IPP Non-CHP,HYC,conventional hydroelectric
4,2024-05,NJ,New Jersey,2,IPP Non-CHP,OBW,biomass


In [6]:
df.shape

(5000, 7)

In [7]:
df.isnull().sum()

period                 0
location               0
stateDescription       0
sectorid               0
sectorDescription      0
fueltypeid             0
fuelTypeDescription    0
dtype: int64

In [8]:
df.describe(include="all")


Unnamed: 0,period,location,stateDescription,sectorid,sectorDescription,fueltypeid,fuelTypeDescription
count,5000,5000,5000,5000.0,5000,5000,5000
unique,1,64,64,,15,45,43
top,2024-05,ESC,East South Central,,All Sectors,ALL,biomass
freq,5000,215,215,,790,239,374
mean,,,,61.6574,,,
std,,,,44.717486,,,
min,,,,1.0,,,
25%,,,,5.0,,,
50%,,,,94.0,,,
75%,,,,98.0,,,


In [9]:
df["period"] = pd.to_datetime(df["period"], format="%Y-%m")

# Sort by time
df = df.sort_values("period")

# Confirm
df["period"].min(), df["period"].max()


(Timestamp('2024-05-01 00:00:00'), Timestamp('2024-05-01 00:00:00'))

In [10]:
# Number of unique months
df["period"].nunique()

# Check missing months
all_months = pd.date_range(
    start=df["period"].min(),
    end=df["period"].max(),
    freq="MS"
)

missing_months = all_months.difference(df["period"].unique())
missing_months


DatetimeIndex([], dtype='datetime64[ns]', freq='MS')