In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

##### Prepare data

In [None]:
df_load = pd.read_parquet(r'old/data/load_profiles.parquet.gzip')

In [None]:
def remove_incomplete_days(df):
    df_temp = df.groupby('date').count()
    incompleteDays = df_temp[(df_temp < 24).any(axis = 1)].index
    df = df.loc[~df['date'].isin(incompleteDays)]
    incompleteDays_list = [item.strftime('%Y-%m-%d')for item in incompleteDays.tolist()]
    print(f'The following days were removed: {incompleteDays_list}')
    return df

df_load_2 = remove_incomplete_days(df_load)

##### GAN

In [None]:
import torch
from torch import nn, cat, optim, full, randn, no_grad
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt


class Generator(nn.Module):
    def __init__(self, dimLatent, classCount, dimEmbedding):
        super(Generator, self).__init__()
        self.dimLatent = dimLatent
        self.classCount = classCount
        self.dimEmbedding = dimEmbedding    #dimension of the embedding tensor
        self.labelEmbedding = nn.Embedding(num_embeddings = self.classCount, embedding_dim = dimEmbedding)
        self.model = nn.Sequential(
            # 1st layer
            nn.Linear(in_features = self.dimLatent + self.dimEmbedding, out_features = 64),
            #nn.ReLU(),
            nn.LeakyReLU(),
            nn.Dropout(0.2),
            # 2nd layer
            nn.Linear(in_features = 64, out_features = 128),
            #nn.ReLU(),
            nn.LeakyReLU(),
            nn.Dropout(0.2),
            # 3rd layer
            nn.Linear(in_features = 128, out_features = 24),
            nn.Tanh()
        )
    
    def forward(self, noise, labels):
        x = self.model(cat((self.labelEmbedding(labels), noise), -1))   #apply model to concatenated tensor (fixed label tensor + noise tensor)
        return x


class Discriminator(nn.Module):
    def __init__(self, featureCount, classCount, dimEmbedding):
        super(Discriminator, self).__init__()
        self.featureCount = featureCount
        self.classCount = classCount
        self.dimEmbedding = dimEmbedding
        self.labelEmbedding = nn.Embedding(num_embeddings = self.classCount, embedding_dim = dimEmbedding)
        self.model = nn.Sequential(
            # 1st layer
            nn.Linear(in_features = self.featureCount + self.dimEmbedding, out_features = 128),
            nn.LeakyReLU(),
            nn.Dropout(0.2),
            # 2nd layer
            nn.Linear(in_features = 128, out_features = 64),
            nn.LeakyReLU(),
            nn.Dropout(0.2),
            # 3rd layer
            nn.Linear(in_features = 64, out_features = 1),
            nn.Sigmoid()
        )
    
    def forward(self, data, labels):
        bool_ = self.model(cat((data, self.labelEmbedding(labels)), -1))
        return bool_


class GAN(object):
    def __init__(self, device, dataLoader, dimLatent, featureCount, classCount, dimEmbedding, lr, maxNorm, epochCount, testLabel = None, exampleCount = 3):
        self.device = device
        self.dataLoader = dataLoader
        self.dimLatent = dimLatent
        self.featureCount = featureCount
        self.classCount = classCount
        self.dimEmbedding = dimEmbedding
        self.lr = lr
        self.maxNorm = maxNorm
        self.epochCount = epochCount
        self.testLabel = testLabel
        self.exampleCount = exampleCount

        # Initialize generator
        self.Gen = Generator(dimLatent, classCount, dimEmbedding)
        self.Gen.to(self.device)

        # Initialize discriminator
        self.Dis = Discriminator(featureCount, classCount, dimEmbedding)
        self.Dis.to(self.device)
    
        # Initialize optimizers
        self.optimGen = optim.Adam(params = self.Gen.parameters(), lr = self.lr)
        self.optimDis = optim.Adam(params = self.Dis.parameters(), lr = self.lr)

        # Initialize the loss function
        self.criterion = nn.BCELoss()

        self.df_loss = pd.DataFrame(
            columns = [
                'epoch',
                'batch index',
                'discriminator loss (real data)',
                'discriminator loss (fake data)',
                'discriminator loss',
                'generator loss',
                'discriminator gradient norm',
                'generator gradient norm'
            ])
        self.iterCount = 0
        if isinstance(testLabel, int):
            self.noiseFixed = randn(self.exampleCount, dimLatent, device = device)
            self.labelsFixed = full(size = (self.exampleCount,), fill_value = self.testLabel, device = self.device, dtype = torch.int32)
    
    def train(self):
        for epoch in tqdm(range(self.epochCount)):
            for batchIdx, (data, target) in enumerate(self.dataLoader): #target = actual (real) label
                data = data.to(device = self.device, dtype = torch.float32)
                target = target.to(device = self.device, dtype = torch.int32)

                # Train discriminator with real data
                self.Dis.zero_grad()                                                                                            #set the gradients to zero for every mini-batch
                yReal = self.Dis(data, target)                                                                                  #train discriminator with real data
                labelReal = full(size = (data.size(0), 1), fill_value = 1, device = self.device, dtype = torch.float32)         #a tensor containing only ones
                lossDisReal = self.criterion(yReal, labelReal)                                                                  #calculate the loss
                lossDisReal.backward()                                                                                          #calculate new gradients

                # Train discriminator with fake data
                noise = randn(data.size(0), self.dimLatent, device = self.device)                                               #create a tensor filled with random numbers
                randomLabelFake = torch.randint(low = 0, high = self.classCount, size = (data.size(0),), device = self.device)  #random labels needed in addition to the noise
                labelFake = full(size = (data.size(0), 1), fill_value = 0, device = self.device, dtype = torch.float32)         #a tensor containing only zeros
                xFake = self.Gen(noise, randomLabelFake)                                                                        #create fake data from noise + random labels with generator
                yFake = self.Dis(xFake.detach(), randomLabelFake)                                                               #let the discriminator label the fake data (`.detach()` creates a copy of the tensor)
                lossDisFake = self.criterion(yFake, labelFake)
                lossDisFake.backward()

                lossDis = (lossDisReal + lossDisFake)                                                                           #compute the total discriminator loss
                grad_norm_dis = torch.nn.utils.clip_grad_norm_(self.Dis.parameters(), max_norm = self.maxNorm)                  #gradient clipping (large max_norm to avoid actual clipping)
                self.optimDis.step()                                                                                            #update the discriminator

                # Train generator (now that we fed the discriminator with fake data)
                self.Gen.zero_grad()
                yFake_2 = self.Dis(xFake, randomLabelFake)                                                                      #let the discriminator label the fake data (now that the discriminator is updated)
                lossGen = self.criterion(yFake_2, labelReal)                                                                    #calculate the generator loss (small if the discriminator thinks that `yFake_2 == labelReal`)
                lossGen.backward()
                grad_norm_gen = torch.nn.utils.clip_grad_norm_(self.Gen.parameters(), max_norm = self.maxNorm)
                self.optimGen.step()

                # Log the progress
                self.df_loss.loc[len(self.df_loss)] = [
                    epoch,
                    batchIdx,
                    lossDisReal.detach().cpu().numpy(),
                    lossDisFake.detach().cpu().numpy(),
                    lossDis.detach().cpu().numpy(),
                    lossGen.detach().cpu().numpy(),
                    grad_norm_dis.detach().cpu().numpy(),
                    grad_norm_gen.detach().cpu().numpy()
                ]
                if self.iterCount % int(self.epochCount*len(self.dataLoader)/10) == 0 or self.iterCount == self.epochCount*len(self.dataLoader) - 1:
                    print(f'training: {int(self.iterCount/(self.epochCount*len(self.dataLoader))*100)} %')
                    if isinstance(testLabel, int):
                        with no_grad():
                            xFakeTest = self.Gen(self.noiseFixed, self.labelsFixed)
                            yFakeTest = self.Dis(xFakeTest, self.labelsFixed)
                            plt.figure(figsize = (4, 3), facecolor = 'w')
                            plt.plot(xFakeTest.detach().cpu().T)
                            plt.title(f'labels: {self.labelsFixed.numpy()}\ndiscriminator: {yFakeTest.detach().cpu().numpy().reshape(-1).round(4)}')
                            plt.tight_layout()
                            plt.savefig(f'training_{int(self.iterCount/(self.epochCount*len(self.dataLoader))*100)}_%.png')
                            plt.show()
                            print(aaa)
                self.iterCount += 1

##### Prepare profile

##### Scale data to range -1, 1

In [None]:
samplesScaled.shape

##### Run GAN (**can be skipped, trained model can be loaded futher down below**)

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader

if torch.cuda.is_available():
    device = torch.device('cuda:0')
    print('GPU is used.')
else:
    device = torch.device('cpu')
    print('CPU is used.')

dataset = TensorDataset(torch.Tensor(samplesScaled), torch.Tensor(labels))
dataLoader = DataLoader(dataset)
dimLatent = 32
featureCount = samplesScaled.shape[1]
classCount = len(set(labels))
dimEmbedding = classCount
lr = 2*1e-4/3
maxNorm = 1e6
epochCount = 200
testLabel = 0

In [None]:
model = GAN(device, dataLoader, dimLatent, featureCount, classCount, dimEmbedding, lr, maxNorm, epochCount, testLabel)
model.train()

##### Inspect single day

In [None]:
exampleCount = 3
label = 0

noise = randn(exampleCount, dimLatent, device = device)
label_ = full(size = (exampleCount,), fill_value = label, device = device, dtype = torch.int32)

samplesGen = model.Gen(noise, label_).detach().cpu().numpy()

plt.plot(samplesGen.T, color = 'green', alpha = 0.5)
plt.plot([], color = 'green', label = 'Synthetic')
plt.plot(samplesScaled[labels == label].T, color = 'red', label = 'Real', alpha = 0.75)
plt.legend();

##### Inspect all days

In [None]:
reverseLabel_dict = {value: key for key, value in label_dict.items()}

for item in labels:
    label = item
    noise = randn(exampleCount, dimLatent, device = device)
    label_ = full(size = (exampleCount,), fill_value = label, device = device, dtype = torch.int32)
    samplesGen = model.Gen(noise, label_).detach().cpu().numpy()
    plt.plot(samplesGen.T, color = 'green', alpha = 0.5)
    plt.plot([], color = 'green', label = 'Synthetic')
    plt.plot(samplesScaled[labels == label].T, color = 'red', label = 'Real', alpha = 0.75)
    plt.legend()
    plt.title(reverseLabel_dict[item])
    plt.show()

In [None]:
torch.save(model, "model_phil.pt")

##### Load trained model

In [None]:
model = torch.load('model_phil.pt')

In [None]:
from sdmetrics.single_column import KSComplement

KSComplement.compute(
    real_data=samples.reshape(-1),
    synthetic_data=synthSamples.reshape(-1)
)

In [None]:
from sdmetrics.single_column import RangeCoverage

RangeCoverage.compute(
    real_data=samples.reshape(-1),
    synthetic_data=synthSamples.reshape(-1)
)

In [None]:
df_load["timestamp"]

In [None]:
df_1 = pd.DataFrame(np.vstack([samples.reshape(-1), synthSamples.reshape(-1)]).T, index=df_load["timestamp"], columns=["real", "synthetic"])

In [None]:
df_1.real.hist(alpha=0.5)
df_1.synthetic.hist(alpha=0.5)

In [None]:
from sdmetrics.visualization import get_column_plot

fig = get_column_plot(
    real_data=pd.DataFrame(samples.reshape(-1), index=df_load["timestamp"], columns=["values"]),
    synthetic_data=pd.DataFrame(synthSamples.reshape(-1), index=df_load["timestamp"], columns=["values"]),
    column_name="values",
    plot_type='distplot'
)

fig.show()

##### Visualize losses

In [None]:
def plot_df_loss(col):
    model.df_loss[col].astype(float).plot(title = col)
    plt.show()

In [None]:
plot_df_loss('generator loss')
plot_df_loss('discriminator loss (real data)')
plot_df_loss('discriminator loss (fake data)')
plot_df_loss('generator gradient norm')
plot_df_loss('discriminator gradient norm')

##### (Save trained model)

In [None]:
#torch.save(model, 'model_3.pt')

##### Create whole profile

In [None]:
synthSamples_list = []

for item in labels:
    noise = randn(1, dimLatent, device = device)
    label_ = full(size = (1,), fill_value = item, device = device, dtype = torch.int32)
    samplesGen = model.Gen(noise, label_).detach().cpu().numpy()
    synthSamples_list.append(samplesGen)

synthSamples = np.vstack(synthSamples_list)

synthSamples = scaler.inverse_transform(synthSamples.T).T

In [None]:
plt.figure(figsize = (36, 12))
plt.plot(synthSamples.reshape(-1), color = 'green', label = 'Synthetic')
plt.legend(fontsize = 32)
plt.show()
plt.figure(figsize = (36, 12))
plt.plot(samples.reshape(-1), color = 'red', label = 'Real')
plt.legend(fontsize = 32)
plt.show();