In [1]:
import cryptpandas as crp
import os
import getpass
import pandas as pd
import numpy as np
import torch
print(f'torch {torch.__version__}')
from sklearn.preprocessing import minmax_scale, MinMaxScaler
import json
from pathlib import Path
from torch.utils.data import TensorDataset, DataLoader
import torch
import matplotlib.pyplot as plt
from datetime import date

from data_manip import remove_incomplete_days
from preproc import import_and_preprocess_data, create_and_add_datetime_features
from GAN import GAN
from plot import plot_losses
from plot import plot_synthetic_vs_real_samples

torch 2.1.1+cu121


### Data import

In [2]:
GAN_data_path = Path().absolute().parent / 'GAN_data'

df_loadProfiles = crp.read_encrypted(path = os.path.join(GAN_data_path, 'all_profiles.crypt'), password="Ene123Elec#4")#getpass.getpass('Password: '))

In [3]:
df_labels = pd.read_csv(os.path.join(GAN_data_path, 'DBSCAN_15_clusters_labels.csv'), sep = ';')
df_labels['name'] = df_labels['name'].str.split('_', expand = True)[1]

### Create a dataframe for one cluster

In [4]:
clusterLabel = 1

profiles = df_labels.loc[df_labels['labels'] == clusterLabel, 'name'].to_list()[:40]
print(len(profiles))

df_profiles = df_loadProfiles[df_loadProfiles.columns[:13].tolist() + [item for item in profiles if item in df_loadProfiles.columns]].copy()
df_plot = df_profiles.iloc[:, 13:].reset_index(drop = True).copy()    #save for later

df_profiles = df_profiles.melt(id_vars = df_loadProfiles.columns[:13], value_vars = df_profiles.columns[13:], var_name = 'profile')
df_profiles = df_profiles.pivot_table(values = 'value', index = ['date', 'profile'], columns = 'hour of the day')

40


### Create and scale samples and labels

In [5]:
samples = df_profiles.to_numpy()
labels = np.array(range(len(df_profiles)))

### Configure GAN

In [6]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
    print('GPU is used.')
else:
    device = torch.device('cpu')
    print('CPU is used.')

batchSize = 1000
dimLatent = 32
featureCount = samples.shape[1]
classCount = len(set(labels))
dimEmbedding = classCount
lr = 1e-5
maxNorm = 1e6
epochCount = 1000
#testLabel = 0

GPU is used.


### Create and run model

### Save model

In [7]:
#model.name = 'model_test'
#torch.save(model, f'models/{model.name}.pt')

In [8]:
import torch
import GAN
model = torch.load("models/model_cluster_1_of_15_DBSCAN_2023-11-25_test.pt")

In [28]:
array = model.generate_sample()
df_synthProfiles = df_profiles.copy()
df_synthProfiles[::] = array
df2 = df_synthProfiles.reset_index().melt(id_vars=["date","profile"]).pivot_table(values="value", columns="profile", index=["date", "hour of the day"])

In [32]:
df2.to_csv("synthetic.csv")

### Generate synthetic profiles

In [10]:
df_synthProfiles

Unnamed: 0_level_0,hour of the day,1,2,3,4,5,6,7,8,9,10,...,15,16,17,18,19,20,21,22,23,24
date,profile,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2021-06-01,10,8.247011,6.589770,6.025851,6.136952,6.192895,6.252640,6.610291,6.779030,8.830244,7.583932,...,9.999150,13.722522,10.236259,11.962848,9.571348,18.832588,31.997660,8.521497,9.634047,8.292643
2021-06-01,1024,207.659866,146.826385,56.521984,106.353340,102.123329,128.007309,130.864380,137.417191,213.541397,225.636368,...,293.519867,320.674377,251.608673,233.710953,226.767761,338.407257,587.518860,290.595032,204.919128,266.922211
2021-06-01,1169,574.670715,203.330261,163.931442,379.367493,398.492767,402.990540,366.899536,365.727661,335.200287,456.372009,...,406.157166,317.691925,907.981384,478.201233,305.374756,932.452209,2013.334839,988.114929,1012.445129,1039.718140
2021-06-01,1185,2825.664795,1940.979980,2154.510742,2779.212646,2300.514648,2733.034180,1368.954834,373.637177,12.660195,16.753080,...,18.102062,40.186852,39.964767,29.196846,179.482971,695.384155,2036.162109,1455.986206,3050.550049,3022.483398
2021-06-01,1212,193.883942,162.233231,97.258926,143.483032,129.294830,131.932892,141.757553,145.986679,259.368469,247.342148,...,258.654877,195.201157,275.738342,250.598572,226.063858,302.660980,425.582031,217.137543,182.460831,191.571548
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-06-30,2630,28.500647,19.785583,15.099562,23.954916,17.143648,23.719692,18.160738,15.005317,12.497416,13.779497,...,12.946076,14.554584,16.796362,14.252987,15.504000,16.622561,27.134274,24.583912,31.190151,30.356428
2022-06-30,2679,1.760892,0.288702,0.008110,0.090896,0.104598,0.239061,0.456636,0.432963,3.202371,4.482346,...,10.534113,6.104594,7.517599,7.056006,1.924541,36.282238,48.247417,3.565997,4.748577,5.513801
2022-06-30,2743,1827.067505,1799.543823,1780.468262,1797.273193,1805.878906,1817.343384,1727.823242,607.322754,46.540997,44.668919,...,30.831434,26.745832,25.868710,22.548368,211.191147,975.528564,1330.820557,1066.067749,1831.551758,1830.830933
2022-06-30,2800,103.757240,95.199577,89.916023,95.236183,99.656189,96.865982,93.945457,93.998764,89.178055,92.899826,...,90.888535,91.643730,104.678001,94.005379,103.331314,98.224648,113.260048,109.471657,111.987457,109.475792


In [11]:
numberOfProfiles = 90

synthSamplesScaled_list = [model.generate_sample() for i in range(numberOfProfiles)]
synthSamples_list = [scaler.inverse_transform(item.T).T for item in synthSamplesScaled_list]

synthSamplesScaled = np.dstack(synthSamplesScaled_list)
synthSamples = np.dstack(synthSamples_list)

KeyboardInterrupt: 

In [None]:
df_profiles

In [None]:
df_synthProfiles = df_profiles.copy()
df_synthProfiles[::] = scaler.inverse_transform(model.generate_sample().T).T

In [None]:
df_profiles.iloc[5].plot()

In [None]:
df_synthProfiles.iloc[5].plot()

In [None]:
pd.DataFrame(scaler.inverse_transform(model.generate_sample().T).T, columns = df_profiles)

In [None]:
model.generate_sample().shape

### Create plots

In [None]:
plot_synthetic_vs_real_samples(
    model = model,
    df_profile = df_profiles,
    samplesScaled = samplesScaled,
    synthSamples = synthSamplesScaled
)

In [None]:
plot_losses(model = model)