In [None]:
import gc
import h5py 
for obj in gc.get_objects():   # Browse through ALL objects
    if isinstance(obj, h5py.File):   # Just HDF5 files
        try:
            obj.close()
        except:
            pass # Was already closed

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch

basePath = "."
realData = pd.HDFStore("/home/npopkov/syndatagenerators/syndatagenerators/models/ddpm/20240119_OM_privat-19-02.h5")

In [None]:
keys = realData.keys()

targetIds = {}
for key in keys:
    i = realData[key]['cluster_id'].iloc[0]
    if i in targetIds.keys():
        pass
    else:
        targetIds[i] = key
    

In [None]:
#TODO achsenlimits richten
#TODO make cluster plots, getClusterIds for HouseholdsIDS look at different households that have the same clusterid (ggf manual), cluster households, see what clusters form
#TODO remember params for scaling the original data from dataset just as we do with the temp

def normalize(x):
    return (x - np.nanmin(x)) / (np.nanmax(x) - np.nanmin(x))

def getHouseholdPlots(real, synth, path=None, offset=0):
    plt.figure(figsize=(16,10))
    for i in range(8,12,1):
        real_df = real[targetIds[i]]
        start_index = real_df.loc[(real_df.index.hour == 0) & (real_df.index.minute == 0)].index[0]
        real_series = real_df[start_index:]["w"].values
        real_series = real_series[:(len(real_series) // 96) * 96]
        real_series = real_series.reshape(-1,96)
        #real_series = (real_series - np.nanmin(real_series)) / (np.nanmax(real_series) - np.nanmin(real_series))
        synth_series = synth[i+offset*20]
        synth_series = synth_series[:(len(synth_series) // 96) * 96]
        synth_series = synth_series.reshape(-1,96)
        plt.subplot(5,4,i+1)
        plt.plot(np.arange(0,24,0.25),np.nanmean(real_series, axis=0), label="real", alpha=0.7, c="teal")
        plt.plot(np.arange(0,24,0.25),np.nanmean(synth_series, axis=0), label="synth", alpha=0.7, c="orange")
        plt.title(f"Cluster ID {i}")
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"Households{offset}.png")
    else:
        plt.show()

def getHistogram(real, synth, path=None, id=0):
    plt.figure(figsize=(10,10))
    real = real[targetIds[id]]["w"].values.flatten()
    synth = synth[id].flatten()
    plt.figure(figsize=(10,10))
    plt.yscale("log")
    plt.hist(normalize(real), bins=500, label="real", alpha=0.5, color="teal")
    plt.hist(normalize(synth), bins=500, label="synth", alpha=0.5, color="orange")
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"Histogram{id}.png")
    else:
        plt.show()

def getWeek(real, synth, index, path=None, n_rows=18, offset=0):
    plt.figure(figsize=(40,2.5*n_rows))
    for i in range(n_rows):
        real_df = real[targetIds[i]]
        start_index = real_df.loc[(real_df.index.day_of_week == 5) & (real_df.index.hour == 0) & (real_df.index.minute == 0)].index[0]
        real_series = real_df[start_index:]["w"].values
        real_series = real_series[:(len(real_series) // 96) * 96]
        real_series = real_series.flatten()
        #real_series = (real_series - np.nanmin(real_series)) / (np.nanmax(real_series) - np.nanmin(real_series))
        synth_series = synth[i+offset*n_rows]
        synth_series = synth_series[:(len(synth_series) // 96) * 96]
        synth_series = synth_series.flatten()
        plt.subplot(n_rows,1,i+1)
        plt.plot(index[:96*7], real_series[:96*7], label="real", alpha=0.7, c="teal")
        plt.plot(index[:96*7], synth_series[:96*7], label="synth", alpha=0.7, c="orange")
        plt.plot(index[:96*7], np.repeat([0], 96*7), color="black")
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"Week{offset}.png")
    else:
        plt.show()

def getDayMeanYear(real, synth, index, path=None, n_rows=18, offset=0):
    plt.figure(figsize=(20,10))
    for i in range(13,16,1):
        real_df = real[targetIds[i]]
        start_index = real_df.loc[(real_df.index.month == 1) & (real_df.index.day == 1) & (real_df.index.hour == 0) & (real_df.index.minute == 0)].index[0]
        real_series = real_df[start_index:]["w"].values
        real_series = real_series[:(len(real_series) // 96) * 96]
        real_series = real_series.reshape(-1,96)
        #real_series = (real_series - np.nanmin(real_series)) / (np.nanmax(real_series) - np.nanmin(real_series))
        synth_series = synth[i+offset*n_rows]
        synth_series = synth_series[:(len(synth_series) // 96) * 96]
        synth_series = synth_series.reshape(-1,96)

        plt.subplot(3, 1, i-13+1)
        plt.plot(index[:365*96*2:96], np.nanmean(real_series, axis=1)[:365*2], label="real", c="teal")
        plt.plot(index[:365*96*2:96], np.nanmean(synth_series, axis=1)[:365*2], label="synth", c="orange")
        plt.title(f"Cluster ID {i}")
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"DayMeanYear{offset}.png")
    else:
        plt.show()


def getWeekDayComparison(real, synth, path=None, id=0):
    real_df = real[targetIds[id]]
    start_index = real_df.loc[(real_df.index.hour == 0) & (real_df.index.minute == 0) & (real_df.index.day_of_week == 5)].index[0]
    real_series = real_df[start_index:]["w"].values 
    real_series = real_series[:(len(real_series) // 96) * 96]
    real_series = real_series.reshape(-1,96)
    #real_series = (real_series - np.nanmin(real_series)) / (np.nanmax(real_series) - np.nanmin(real_series))
    realFridays = real_series[::7]
    realSaturdays = real_series[1::7]
    realSundays = real_series[2::7]
    realMondays = real_series[3::7]
    realTuesdays = real_series[4::7]
    realWednesdays = real_series[5::7]
    realThursdays = real_series[6::7]
    synth_series = synth[id] - 200
    synth_series = synth_series[:(len(synth_series) // 96) * 96]
    synth_series = synth_series.reshape(-1,96)
    synthFridays = synth_series[::7]
    synthSaturdays = synth_series[1::7]
    synthSundays = synth_series[2::7]
    synthMondays = synth_series[3::7]
    synthTuesdays = synth_series[4::7]
    synthWednesdays = synth_series[5::7]
    synthThursdays = synth_series[6::7]

    index = np.linspace(0,24, 96)
    plt.figure()
    plt.plot(index[:96], np.nanmean(realSaturdays, axis=0), label="realSaturdayMean", c="teal")
    plt.plot(index[:96], np.nanmean(synthSaturdays, axis=0), label="synthSaturdayMean", c="orange")
    plt.xlim(0,24)
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"MeanSaturday{index}.png")
    else:
        plt.show()

    plt.figure()
    plt.plot(index[:96], np.nanmean(realSundays, axis=0), label="realSundayMean", c="teal")
    plt.plot(index[:96], np.nanmean(synthSundays, axis=0), label="synthSundayMean", c="orange")
    plt.xlim(0,24)
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"MeanSunday{index}.png")
    else:
        plt.show()

    plt.figure()
    plt.plot(index[:96], np.nanmean(realMondays, axis=0), label="realMondayMean", c="teal")
    plt.plot(index[:96], np.nanmean(synthMondays, axis=0), label="synthMondayMean", c="orange")
    plt.xlim(0,24)
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"MeanMonday{index}.png")
    else:
        plt.show()
    plt.plot(index[:96], np.nanmean(realTuesdays, axis=0), label="realTuesdayMean", c="teal")
    plt.plot(index[:96], np.nanmean(synthTuesdays, axis=0), label="synthTuesdayMean",  c="orange")
    plt.xlim(0,24)
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"MeanTuesday{index}.png")
    else:
        plt.show()

    plt.figure()
    plt.plot(index[:96], np.nanmean(realWednesdays, axis=0), label="realWendnesdayMean", c="teal")
    plt.plot(index[:96], np.nanmean(synthWednesdays, axis=0), label="synthWendnesdayMean",  c="orange")
    plt.xlim(0,24)
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"MeanWednesday{index}.png")
    else:
        plt.show()

    plt.figure()
    plt.plot(index[:96], np.nanmean(realThursdays, axis=0), label="realThurdayMeans", c="teal")
    plt.plot(index[:96], np.nanmean(synthThursdays, axis=0), label="synthThursdayMeans", c="orange")
    plt.xlim(0,24)
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"MeanThursday{index}.png")
    else:
        plt.show()

    plt.figure()
    plt.plot(index[:96], np.nanmean(realFridays, axis=0), label="resyndatagenerators/models/ddpm/experiments/web_expert_tests/2024-06-27-2150-1000-32-1e-06-0.002-40/challengeSamples.csvalFridayMean",  c="teal")
    plt.plot(index[:96], np.nanmean(synthFridays, axis=0), label="synthFridayMean", c="orange")
    plt.xlim(0,24)
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"MeanFriday{index}.png")
    else:
        plt.show()

def getMeanWeek(real, synth, index, path=None, id=0):
    real_df = real[targetIds[id]]
    start_index = real_df.loc[(real_df.index.hour == 0) & (real_df.index.minute == 0) & (real_df.index.day_of_week == 0)].index[0]
    real_series = real_df[start_index:]["w"].values
    real_series = real_series[:(len(real_series) // (96*7)) * 96*7]
    real_series = real_series.reshape(-1,96*7)
    #real_series = (real_series - np.nanmin(real_series)) / (np.nanmax(real_series) - np.nanmin(real_series))
    
    synth_series = synth[id] +150
    synth_series = synth_series[:(len(synth_series) // (96*7)) * 96*7]
    synth_series = synth_series.reshape(-1,96*7)
    #synth_series = (synth_series - np.nanmin(synth_series)) / (np.nanmax(synth_series) - np.nanmin(synth_series))
    
    plt.figure(figsize=(40,9))
    plt.plot(index[:96*7], np.nanmean(real_series, axis=0), label="realWeekMean", c="teal")
    plt.plot(index[:96*7], np.nanmean(synth_series, axis=0), label="synthWeekMean", c="orange")
    plt.xlim(index[0], index[96*7-1])
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"WeekMean{id}.png")
    else:
        plt.show()

#takes about 2 to 5 minutes at mac
def getClustersMeanWeek(real, synth, index, path=None):
    plt.figure(figsize=(20,40))
    for i, key in enumerate(targetIds):
        real_df = real[key]
        cluster_id = real_df["cluster_id"].values[0]
        start_index = real_df.loc[(real_df.index.hour == 0) & (real_df.index.minute == 0) & (real_df.index.day_of_week == 0)].index[0]
        real_series = real_df[start_index:]["w"].values 
        real_series = real_series[:(len(real_series) // (96*7)) * 96*7]
        real_series = real_series.reshape(-1,96*7)
        #real_series = (real_series - np.nanmin(real_series)) / (np.nanmax(real_series) - np.nanmin(real_series))


        synth_series = synth[i]
        synth_series = synth_series[:(len(synth_series) // (96*7)) * 96*7]
        synth_series = synth_series.reshape(-1,96*7)

        plt.subplot(8, 2, cluster_id + 1)
        plt.plot(index[:96*7], np.nanmean(real_series, axis=0), label="realWeekMean" if i == len(targetIds)-1 else None, c="teal", alpha=0.4)
        plt.plot(index[:96*7], np.nanmean(synth_series, axis=0), label="synthWeekMean" if i == len(targetIds)-1 else None, c="orange", alpha=0.3)
        plt.xlim(index[0], index[96*7-1])
    plt.legend()
    if(path!=None):
        plt.savefig(path+f"ClustersMeanWeek.png")
    else:
        plt.show()


In [None]:

relative = 'syndatagenerators/models/ddpm/experiments/web_expert_tests/2024-06-24-2007-200-32-1e-06-0.02-25/challengeSamples.csv'
synthData = pd.read_csv(f"/home/npopkov/syndatagenerators/{relative}")

index = pd.DatetimeIndex(synthData["Unnamed: 0"]).values
synth = synthData.values.T[1:]

In [None]:
getHouseholdPlots(realData,synth)

In [None]:
getWeek(realData, synth, index)

In [None]:
getDayMeanYear(realData, synth, index)

In [None]:
getMeanWeek(realData, synth, index,id=5)

In [None]:
getWeekDayComparison(realData, synth,id=0)

In [None]:
realData.close()