# Investigate Net Energy Production
## Load packages and data

In [2]:
import DataRetriever as dr
import pandas as pd
import plotly.express as px

retriever = dr.DataRetriever()

year_2 = retriever.get_data("All-Subsystems-minute-Year2.pkl")

# Add timestamp converted to UTC as index to year_2
# year_2["Timestamp UTC"] = pd.to_datetime(year_2["Timestamp"], utc=True, infer_datetime_format=True)
# year_2.set_index("Timestamp UTC", inplace=True)

# Load the metadata
metadata = retriever.get_data("metadata-year2.pkl")
metadata.rename(columns={"Unnamed: 0": "Attribute"}, inplace=True)

year_2[year_2["PV_Watts3PhTotalW3PhT1", "PV_Watts3PhTotalW3PhT2", "PV_PVSystem1ACPowerOSPACPV1OS", "PV_PVSystem2ACPowerOSPACPV2OS", "Elec_PowerPV1of2", "Elec_PowerPV2of2"] < 0] = 0

print("Hej")

KeyError: ('PV_Watts3PhTotalW3PhT1', 'PV_Watts3PhTotalW3PhT2', 'PV_PVSystem1ACPowerOSPACPV1OS', 'PV_PVSystem2ACPowerOSPACPV2OS', 'Elec_PowerPV1of2', 'Elec_PowerPV2of2')

## Determine which columns are generating and which are consuming
### Generating Attributes

In [None]:
watt_attributes = metadata[metadata["Units"] == "W"]
print(watt_attributes)

# Define generating attributes as attributes in W that have description "Instantaneous power produced"
powerproduced_attributes = [description.startswith("Instantaneous power produced") for description in watt_attributes["Description"]]
gen_attributes = watt_attributes[powerproduced_attributes]
print(gen_attributes)
gen_attributes = gen_attributes["Attribute"].tolist()
gen_attributes

### Consuming Attributes

In [None]:
# Define generating attributes as attributes in W that have description "Instantaneous power produced"
not_powerproduced = [subsystem != "PV" for subsystem in watt_attributes["Subsystem"]]
con_attributes = watt_attributes[not_powerproduced]
con_attributes = year_2[con_attributes["Attribute"].tolist()]
con_attributes

In [None]:
# Need to further filter the consuming attributes as some are duplicates and measure the same thing

# We use the correlation between attributes to determine if they measure the same thing
correlations_df = con_attributes.corr().abs()

# Create a dict of key value pairs with high correlations.
correlation_pairs = dict()
for rowIndex, row in correlations_df.iterrows(): #RowIndex is the row name, row is a pd.Series of (column name : entry value)
    for columnIndex, value in row.items(): #columnIndex is column name, value is the entry value
        if value > 0.80: #If correlation is larger than threshold and row name and column name is not the same
            if rowIndex != columnIndex: # We dont want pairs of themselves, eg A : [A]
                correlation_pairs.setdefault(rowIndex, []) #Insert row name as key if it does not already exist. Make the value pairs an empty list.
                correlation_pairs[rowIndex].append(columnIndex) #Append the column name as value to the key.

# Since the correlation_pairs dict contains pairs of A : [B] and B : [A], we want to remove the second instance of the pair.
correlation_pairs_cleaned = correlation_pairs.copy()
for value in correlation_pairs_cleaned.values():
    for key in correlation_pairs_cleaned.keys():
        if value == key:
            correlation_pairs_cleaned.pop(key)

# With the cleaned correlation_pairs, we iterate through the consuming attributes dataframes columns, and prop it, if it exists as a value in the dict

print(len(con_attributes.columns))
columnstodrop = list(correlation_pairs_cleaned.values())
for column in con_attributes.columns:
    if [column] in columnstodrop: # We write [column] since columnstodrop is lists in a list.
        con_attributes = con_attributes.drop([column], axis = 1)

print(len(con_attributes.columns))
con_attributes

In [None]:
con_attributes = con_attributes.columns

In [None]:
con_attributes = list(con_attributes)
con_attributes

# Create DataFrame containing only Generating and Consuming attributes

In [None]:
energy_attributes = ["Timestamp"] + gen_attributes + con_attributes
energy_data = year_2[energy_attributes]

## Aggregate to mean values per hour to get Wh

In [None]:
# energy_data = energy_data.resample("H").mean()
# energy_data

# (energy_data["Load_RefrigeratorPowerWithStandby"].sum() + energy_data["Elec_PowerHeatLoadforRefrigerator"].sum())/1000

### Create new attributes "Sum Generated", "Sum Consumed", and "Net Energy"

In [None]:
total_generated = energy_data[gen_attributes].sum().sum()
total_consumed = energy_data[con_attributes].sum().sum()
print(total_generated * (1/60) , total_consumed * (1/60), (total_generated - total_consumed) * (1/60))

In [None]:
energy_data["Sum Generating"] = energy_data[gen_attributes].sum(axis=1)
energy_data["Sum Consuming"] = energy_data[con_attributes].sum(axis=1)
energy_data["Net Energy"] = energy_data["Sum Generating"] - energy_data["Sum Consuming"]
energy_data["Surplus"] = energy_data["Net Energy"] > 0
energy_data

# Plotting consuming attributes

In [None]:
# fig = px.bar(data_frame=energy_data,
#              x=energy_data.index,
#              y="Sum Consuming")
# fig.update_xaxes(range=["2015-03-13", "2015-03-23"])
# fig.show()

## Plotting generating attributes

In [None]:
fig = px.bar(data_frame=energy_data,
             x="Timestamp",
             y="Sum Generating",
             color="Surplus",
             color_discrete_sequence=["darkgreen"])
fig.update_xaxes(range=["2015-03-13", "2015-03-23"])
fig.show()

## Plotting Net Energy

In [None]:
# fig = px.bar(data_frame=energy_data,
#              x="Timestamp",
#              y="Net Energy",
#              color="Surplus",
#              color_discrete_sequence=["darkred", "darkgreen"])
# fig.update_xaxes(range=["2015-03-13", "2015-03-23"])
# fig.show()