In [None]:
import cryptpandas as crp
import os
import getpass
import pandas as pd
import numpy as np
import torch
print(f'torch {torch.__version__}')
from sklearn.preprocessing import minmax_scale, MinMaxScaler
import json
from pathlib import Path
from torch.utils.data import TensorDataset, DataLoader
import torch
import matplotlib.pyplot as plt
from datetime import date

from data_manip import remove_incomplete_days
from preproc import import_and_preprocess_data, create_and_add_datetime_features
from GAN import GAN
from plot import plot_losses
from plot import plot_synthetic_vs_real_samples

### Data import

In [None]:
GAN_data_path = Path().absolute().parent / 'GAN_data'

df_loadProfiles = crp.read_encrypted(path = os.path.join(GAN_data_path, 'all_profiles.crypt'), password = getpass.getpass('Password: '))

In [None]:
df_labels = pd.read_csv(os.path.join(GAN_data_path, 'DBSCAN_15_clusters_labels.csv'), sep = ';')
df_labels['name'] = df_labels['name'].str.split('_', expand = True)[1]

### Create a dataframe for one cluster

In [None]:
clusterLabel = 1

profiles = df_labels.loc[df_labels['labels'] == clusterLabel, 'name'].to_list()#[:20]
print(len(profiles))

df_profiles = df_loadProfiles[df_loadProfiles.columns[:13].tolist() + [item for item in profiles if item in df_loadProfiles.columns]].copy()
df_plot = df_profiles.iloc[:, 13:].reset_index(drop = True).copy()    #save for later

df_profiles = df_profiles.melt(id_vars = df_loadProfiles.columns[:13], value_vars = df_profiles.columns[13:], var_name = 'profile')
df_profiles = df_profiles.pivot_table(values = 'value', index = ['date', 'profile'], columns = 'hour of the day')

### Plot the cluster (**<span style="color:red">can be skipped</span>**)

In [None]:
df_plot.plot(alpha = 0.75, legend = False, figsize = (16, 2));

### Create and scale samples and labels

In [None]:
scaler = MinMaxScaler(feature_range = (-1, 1))

samples = df_profiles.to_numpy()
samplesScaled = scaler.fit_transform(samples.T).T

#dates = df_profiles.reset_index()['date'].copy()
#labelsDate_dict = {date_: idx for idx, date_ in enumerate(df_loadProfiles['date'].unique())}
#labelsProfile_dict = {profile: idx for idx, profile in enumerate(profiles)}
#labelsDate = df_profiles.reset_index()['date'].map(labelsDate_dict).to_numpy()
#labelsProfile = df_profiles.reset_index()['profile'].map(labelsProfile_dict).to_numpy()
#labels = np.vstack([labelsProfile, labelsDate])

labels = np.array(range(len(df_profiles)))

df_profilesScaled = df_profiles.copy()
df_profilesScaled[::] = samplesScaled

### Plot scaled profiles (**<span style="color:red">can be skipped</span>**)

In [None]:
df_plot_2 = df_profilesScaled.reset_index().melt(id_vars = ['date', 'profile'], value_vars = df_profilesScaled.columns).copy()
df_plot_2 = df_plot_2.pivot_table(values = 'value', index = 'profile', columns = ['date', 'hour of the day'])

In [None]:
df_plot_2.iloc[:, :24].T.plot(alpha = 0.75, legend = False, figsize = (16, 2));

In [None]:
df_plot = df_profiles[df_profiles.index.get_level_values('date') == date(2021, 6, 2)].reset_index(drop = True).copy()
df_plot.T.plot(alpha = 0.75, legend = False, figsize = (16, 4));

### Create dataLoader

In [None]:
dataset = TensorDataset(torch.Tensor(samplesScaled), torch.Tensor(labels))
dataLoader = DataLoader(dataset, batch_size = 48, shuffle = True)

### Configure GAN

In [None]:
name = 'model_2023-11-'

if torch.cuda.is_available():
    device = torch.device('cuda:0')
    print('GPU is used.')
else:
    device = torch.device('cpu')
    print('CPU is used.')

dimLatent = 32
featureCount = samplesScaled.shape[1]
classCount = len(set(labels))
dimEmbedding = classCount
lr = 1e-5
maxNorm = 1e6
epochCount = 200
#testLabel = 0

### Create and run model

In [None]:
model = GAN(
    name = name,
    device = device,
    dataLoader = dataLoader,
    dimLatent = dimLatent,
    featureCount = featureCount,
    classCount = classCount,
    dimEmbedding = dimEmbedding,
    lr = lr,
    maxNorm = maxNorm,
    epochCount = epochCount,
    #testLabel = testLabel
)
model.train()

### Generate synthetic profiles

In [None]:
numberOfProfiles = 50

synthSamplesScaled_list = [model.generate_sample() for i in range(numberOfProfiles)]
synthSamples_list = [scaler.inverse_transform(item.T).T for item in synthSamplesScaled_list]

synthSamplesScaled = np.dstack(synthSamplesScaled_list)
synthSamples = np.dstack(synthSamples_list)

### Create plots

In [None]:
plot_synthetic_vs_real_samples(
    model = model,
    df_profile = df_profiles,
    samplesScaled = samplesScaled,
    synthSamples = synthSamplesScaled
)

In [None]:
plot_losses(model = model)