In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob, os, lib
from dotenv import load_dotenv

load_dotenv()

In [None]:
sys, dat, df = [], [], []

for file in glob.glob(os.path.join(os.getcwd(), "**", "*.xlsx"), recursive=True):
    try:
        a = pd.read_excel(file, sheet_name="System Reliability Data", engine='openpyxl')
        b = pd.read_excel(file, sheet_name="System Reliability Data", engine='openpyxl', skiprows=8)
        sys.append(a["Unnamed: 2"][1])
        dat.append(a["Unnamed: 2"][2])
        df.append(pd.DataFrame(b.iloc[:,17:-1].iloc[1]).T)
    except:
        continue

In [None]:
c = pd.concat([df[j] for j in range(len(df))])
c["Date"], c["System"] = dat, sys
c = c.drop_duplicates() # TODO: Even tho the duplicates are being removed, gotta remove manually all the duplicated data (just in case)
c.index = c["Date"]

In [None]:
c["System"].value_counts()

In [None]:
# TODO: This can be implemented using a List
# In the list will be put all the Items we wanna filter throught, so it isn't necessary to write a "|" statement for each of them 

c1 = c[(c["System"] == "Consumers Energy Co.") | (c["System"] == "DTE Energy Co.") | (c["System"] == "Indiana Michigan Power Co.")]

In [None]:
plt.figure(figsize = (20,4))

# ! Plot
# Using the data of "Other" causes of each Company

for i in c1["System"].value_counts().index:
    (c1[c1["System"] == i]["Total number of outages caused by other causes"] / c1[c1["System"] == i]["Total number of outages caused by other causes"].max()).plot(label=i)
    plt.legend(ncol=3)

In [None]:
try:
    raw_data = pd.read_csv(os.getenv('FILE_PATH_OMNI'), delimiter="\s+", names=range(55), engine="python")
except:
    print("<Exception> OMNI File Required")
    exit(0)

In [None]:
data = raw_data[[0, 1, 2, 38, 39, 40, 49, 50, 22, 23, 24, 27, 28, 8]] # + Data to be used
data = data[(data[0]>=2023) & (data[0]<=2023)]

# TODO: Explain for what every column will be used for ...

data.columns = [
    "Year", # Kinda obvious ...
    "DecimalYear", #
    "Hour", #
    "Kp", #
    "R", #
    "DST", #
    "Ap", #
    "F10.7", #
    "Proton temperature", #
    "Proton Density", #
    "Plasma speed", #
    "Alpha/Proton ratio", #
    "Flow Pressure", #
    "Field Magnitude Average |B|" #
]
data.index = pd.date_range("2023", "2024", freq="60min")[:-1]
data = data[["Kp", "R", "DST", "Ap", "F10.7", "Proton temperature", "Proton Density", "Plasma speed", "Alpha/Proton ratio",
              "Flow Pressure", "Field Magnitude Average |B|"]]
data

In [None]:
# TODO: Refactor this. if possible

data["F10.7"] = np.where(data["F10.7"] == 999.9, np.nan, data["F10.7"])
data["Kp"] = np.where(data["Kp"] == 99, np.nan, data["Kp"])
data["R"] = np.where(data["R"] == 999, np.nan, data["R"])
data["DST"] = np.where(data["DST"] == 99999, np.nan, data["DST"])
data["Ap"] = np.where(data["Ap"] == 999, np.nan, data["Ap"])
data["Proton temperature"] = np.where(data["Proton temperature"] == 9999999, np.nan, data["Proton temperature"])
data["Proton Density"] = np.where(data["Proton Density"] == 999.9, np.nan, data["Proton Density"])
data["Plasma speed"] = np.where(data["Plasma speed"] == 9999, np.nan, data["Plasma speed"])
data["Alpha/Proton ratio"] = np.where(data["Alpha/Proton ratio"] == 9.999, np.nan, data["Alpha/Proton ratio"])
data["Flow Pressure"] = np.where(data["Flow Pressure"] == 99.99, np.nan, data["Flow Pressure"])
data["Field Magnitude Average |B|"] = np.where(data["Field Magnitude Average |B|"] == 999.9, np.nan, data["Field Magnitude Average |B|"])

In [None]:
data = data[(data.index >= "2023")]
data

In [None]:
# Hacemos una funcion que te permite agregar las columnas y llenarlas de 0
def create_columns(df, new_columns):
    df[new_columns] = np.zeros((len(df), len(new_columns)))
    return df

In [None]:
# Creating a copy of the dataframe (in order to manipulate them)
df = data.copy() 

# Columns go from "G0" to "G5"
new_columns_kp = ['G0' , 'G1', 'G2', 'G3', 'G4', 'G5']

# df[new_columns_kp] = np.zeros((len(df), len(new_columns_kp)))
df = create_columns(df, new_columns_kp)

In [None]:
# Conditions ... 
conditions = {
    'G0': data['Kp'].le(43),
    'G1': data['Kp'].between(46, 54),
    'G2': data['Kp'].between(56, 64),
    'G3': data['Kp'].between(66, 74),
    'G4': data['Kp'].between(76, 88),
    'G5': data['Kp'].ge(90)
}

for G, condition in conditions.items(): 
    df[G] = condition

In [None]:
def resample_kp(df, lvl, closed = None):
    df_resampled_kp = df.resample(rule = lvl, closed = closed).agg({
        'Kp': 'max',
        'R': 'mean',
        'F10.7': 'mean',
        'Proton temperature': 'mean',
        'Proton Density': 'mean',
        'Plasma speed': 'mean',
        'Alpha/Proton ratio': 'mean',
        'Flow Pressure': 'mean',
        'Field Magnitude Average |B|': 'mean',
        'G1': 'sum',
        'G2': 'sum',
        'G3': 'sum',
        'G4': 'sum',
        'G5': 'sum',
    })    
    df_resampled_kp['Total ST'] = df_resampled_kp[['G1','G2','G3','G4','G5']].sum(axis = 1)
    return df_resampled_kp

In [None]:
freq = "M"

In [None]:
df_resample_kp = resample_kp(df, freq)
df_resample_kp['Solar Cycle'] = pd.cut(df_resample_kp.index,
                                     bins = [
                                          pd.to_datetime('1964-10-01'),
                                          pd.to_datetime('1976-03-01'),
                                          pd.to_datetime('1986-09-01'),
                                          pd.to_datetime("1996-08-01"), 
                                          pd.to_datetime("2008-12-31"),
                                          pd.to_datetime("2019-12-31"), 
                                          pd.to_datetime("2100-01-01")
                                    ],
                                     labels=[20,21,22,23,24,25])
df_resample_kp = df_resample_kp.rename(columns={"Kp":"Kp max"})
df_resample_kp

In [None]:
c1_CEC = c1[c1["System"] == "Consumers Energy Co."]
c1_CEC["TotalStorms"] = df_resample_kp["Total ST"]
c1_CEC["G1"] = df_resample_kp["G1"]
c1_CEC["G2"] = df_resample_kp["G2"]
c1_CEC["G3"] = df_resample_kp["G3"]
c1_CEC["G4"] = df_resample_kp["G4"]
c1_CEC["G5"] = df_resample_kp["G5"]

In [None]:
c1_CEC

In [None]:
c1_CEC.columns

In [None]:
plt.scatter(c1_CEC["TotalStorms"], c1_CEC["Total number of outages caused by unknonwn causes"])

In [None]:
plt.figure(figsize=(20,4))
(c1_CEC["Total number of outages caused by unknonwn causes"] / c1_CEC["Total number of outages caused by unknonwn causes"].max()).plot()
(c1_CEC["TotalStorms"] / c1_CEC["TotalStorms"].max()).plot()
plt.legend(ncol=3)

In [None]:
np.corrcoef(np.array(c1_CEC["Total number of outages caused by unknonwn causes"]), np.array(c1_CEC["TotalStorms"]))

In [None]:
c1_CEC.corr(numeric_only=True)