# About this code

- Code author: Chenguang Wang   
- Email: c.wang-8@tudelft.nl; samwangchenguang@gmail.com
- Affiliation: Delft University of Technology
- Project name: Generating multivariate load states using a (conditional) variational autoencoder
- Motivation: This is a project for PSCC2022 – Power Systems Computation Conference: [Homepage of the conference](https://pscc2022.pt/)
- Aim of this code: Analyze multivariate dependencies of population
- A preprint is available, and you can check this paper for more details  [Link of the paper](https://arxiv.org/abs/2110.11435)
    - Paper authors: Chenguang Wang, Ensieh Sharifnia, Zhi Gao, Simon H. Tindemans, Peter Palensky
    - Accepted for publication at PSCC 2022 and a special issue of EPSR
    - If you use (parts of) this code, please cite the preprint or published paper

In [None]:
# Import libraries
import torch
import numpy as np
import pandas as pd
from torch_two_sample import EnergyStatistic
import matplotlib.pyplot as plt

In [None]:
# Data import
Train_load_imported  = pd.read_csv("../Data/13-17_32_Train.csv", index_col=0) # Training data 
σ_auto_Time_all_noisy_β_1_imported  = pd.read_csv("../Generations/σ'_auto_Time_all_noisy_β=1.csv", index_col=0) # CVAE (Auto σ’, Noisy)
σ_auto_Time_all_mu_β_1_imported = pd.read_csv("../Generations/σ'_auto_Time_all_mu_β=1.csv", index_col=0) # CVAE (σ’=0.1, Noisy)
σ_0_1_Time_all_noisy_imported = pd.read_csv("../Generations/σ'_0.1_Time_all_noisy.csv", index_col=0) # CVAE (Auto σ’, Noise free)
σ_0_1_Time_all_mu_imported = pd.read_csv("../Generations/σ'_0.1_Time_all_mu.csv", index_col=0) # CVAE (σ’=0.1, Noise free)

Train_load_numpy = Train_load_imported.values.astype(np.float32)
σ_0_1_Time_all_mu_numpy = σ_0_1_Time_all_mu_imported.values.astype(np.float32)
σ_0_1_Time_all_noisy_numpy = σ_0_1_Time_all_noisy_imported.values.astype(np.float32)
σ_auto_Time_all_mu_β_1_numpy = σ_auto_Time_all_mu_β_1_imported.values.astype(np.float32)
σ_auto_Time_all_noisy_β_1_numpy = σ_auto_Time_all_noisy_β_1_imported.values.astype(np.float32)

country_number = 32

Train_load = torch.from_numpy(Train_load_numpy[:,:country_number])
σ_0_1_Time_all_mu = torch.from_numpy(σ_0_1_Time_all_mu_numpy)
σ_0_1_Time_all_noisy = torch.from_numpy(σ_0_1_Time_all_noisy_numpy)
σ_auto_Time_all_mu_β_1 = torch.from_numpy(σ_auto_Time_all_mu_β_1_numpy)
σ_auto_Time_all_noisy_β_1 = torch.from_numpy(σ_auto_Time_all_noisy_β_1_numpy)

In [None]:
# Define how many data points you want to sample from the original and generated set
Volume_Energy=176

# Define the Energy test
Energy_test=EnergyStatistic(Volume_Energy,Volume_Energy)

# Define the number of random draws from the permutation
Permutations_time=100

# Define how many times you want to sample from the training and generated data set
Repeat_time=1000

In [None]:
#----------------------------- Energy test -----------------------

# Calculate P-values of training to training data by Energy test 
for i in range(Repeat_time):
    random_index_1=np.random.choice(len(Train_load), size=Volume_Energy, replace=False)
    random_index_2=np.random.choice(len(Train_load), size=Volume_Energy, replace=False)
    Energy_distance, Matrix_for_P_Value = Energy_test(Train_load[random_index_1], Train_load[random_index_2], ret_matrix=True)
    P_Train_to_Train=Energy_test.pval(Matrix_for_P_Value,n_permutations=Permutations_time)
    if i==0:
      P_Train_to_Train_all=P_Train_to_Train
    else:
      P_Train_to_Train_all=np.append(P_Train_to_Train_all, P_Train_to_Train) 
    if i%100==0:
      print('Train_to_CVAE_set, Repeat_time =  {}'. format(i))
# Cumulative distribution of p-values (training to training data)
P_Train_to_Train_all_stored = np.sort(P_Train_to_Train_all)
P_Train_to_Train_all_fraction = np.array(range(len(P_Train_to_Train_all)))/float(len(P_Train_to_Train_all))


# Calculate P-values of training data to CVAE (Auto σ', Noisy) data by Energy test 
for i in range(Repeat_time):
    random_index_1=np.random.choice(len(Train_load), size=Volume_Energy, replace=False)
    random_index_2=np.random.choice(len(σ_auto_Time_all_noisy_β_1), size=Volume_Energy, replace=False)
    Energy_distance, Matrix_for_P_Value = Energy_test(Train_load[random_index_1], σ_auto_Time_all_noisy_β_1[random_index_2], ret_matrix=True)
    P_Train_to_CVAE_auto_noisy = Energy_test.pval(Matrix_for_P_Value,n_permutations=Permutations_time)
    if i==0:
      P_Train_to_CVAE_auto_noisy_all = P_Train_to_CVAE_auto_noisy
    else:
      P_Train_to_CVAE_auto_noisy_all = np.append(P_Train_to_CVAE_auto_noisy_all, P_Train_to_CVAE_auto_noisy) 
    if i%100==0:
      print('Train_to_CVAE_auto_noisy, Repeat_time =  {}'. format(i))
# Cumulative distribution of p-values (training to CVAE (Auto σ', Noisy) data)
P_Train_to_CVAE_auto_noisy_stored = np.sort(P_Train_to_CVAE_auto_noisy_all)
P_Train_to_CVAE_auto_noisy_fraction = np.array(range(len(P_Train_to_CVAE_auto_noisy_all)))/float(len(P_Train_to_CVAE_auto_noisy_all))


# Calculate P-values of training data to CVAE (σ'=0.1, Noisy) data by Energy test 
for i in range(Repeat_time):
    random_index_1=np.random.choice(len(Train_load), size=Volume_Energy, replace=False)
    random_index_2=np.random.choice(len(σ_0_1_Time_all_noisy), size=Volume_Energy, replace=False)
    Energy_distance, Matrix_for_P_Value = Energy_test(Train_load[random_index_1], σ_0_1_Time_all_noisy[random_index_2], ret_matrix=True)
    P_Train_to_CVAE_noisy=Energy_test.pval(Matrix_for_P_Value,n_permutations=Permutations_time)
    if i==0:
      P_Train_to_CVAE_noisy_all=P_Train_to_CVAE_noisy
    else:
      P_Train_to_CVAE_noisy_all=np.append(P_Train_to_CVAE_noisy_all, P_Train_to_CVAE_noisy) 
    if i%100==0:
      print('Train_to_CVAE_noisy, Repeat_time =  {}'. format(i))
# Cumulative distribution of p-values (training to CVAE (σ'=0.1, Noisy) data)
P_Train_to_CVAE_noisy_stored = np.sort(P_Train_to_CVAE_noisy_all)
P_Train_to_CVAE_noisy_fraction = np.array(range(len(P_Train_to_CVAE_noisy_all)))/float(len(P_Train_to_CVAE_noisy_all))


# Calculate P-values of training data to CVAE (Auto σ', Noise free) data by Energy test 
for i in range(Repeat_time):
    random_index_1=np.random.choice(len(Train_load), size=Volume_Energy, replace=False)
    random_index_2=np.random.choice(len(σ_auto_Time_all_mu_β_1), size=Volume_Energy, replace=False)
    Energy_distance, Matrix_for_P_Value = Energy_test(Train_load[random_index_1], σ_auto_Time_all_mu_β_1[random_index_2], ret_matrix=True)
    P_Train_to_CVAE_auto_mu=Energy_test.pval(Matrix_for_P_Value,n_permutations=Permutations_time)
    if i==0:
      P_Train_to_CVAE_auto_mu_all=P_Train_to_CVAE_auto_mu
    else:
      P_Train_to_CVAE_auto_mu_all=np.append(P_Train_to_CVAE_auto_mu_all, P_Train_to_CVAE_auto_mu) 
    if i%100==0:
      print('Train_to_CVAE_auto_mu, Repeat_time =  {}'. format(i))
# Cumulative distribution of p-values (training to CVAE (Auto σ', Noise free) data)
P_Train_to_CVAE_auto_mu_stored = np.sort(P_Train_to_CVAE_auto_mu_all)
P_Train_to_CVAE_auto_mu_fraction = np.array(range(len(P_Train_to_CVAE_auto_mu_all)))/float(len(P_Train_to_CVAE_auto_mu_all))

# Calculate P-values of training data to CVAE (σ'=0.1, Noise free) data by Energy test 
for i in range(Repeat_time):
    random_index_1=np.random.choice(len(Train_load), size=Volume_Energy, replace=False)
    random_index_2=np.random.choice(len(σ_0_1_Time_all_mu), size=Volume_Energy, replace=False)
    Energy_distance, Matrix_for_P_Value = Energy_test(Train_load[random_index_1], σ_0_1_Time_all_mu[random_index_2], ret_matrix=True)
    P_Train_to_CVAE_mu=Energy_test.pval(Matrix_for_P_Value,n_permutations=Permutations_time)
    if i==0:
      P_Train_to_CVAE_mu_all=P_Train_to_CVAE_mu
    else:
      P_Train_to_CVAE_mu_all=np.append(P_Train_to_CVAE_mu_all, P_Train_to_CVAE_mu) 
    if i%100==0:
      print('Train_to_CVAE_mu, Repeat_time =  {}'. format(i))
# Cumulative distribution of p-values (training to CVAE (σ'=0.1, Noise free) data)
P_Train_to_CVAE_mu_all_stored = np.sort(P_Train_to_CVAE_mu_all)
P_Train_to_CVAE_mu_all_fraction = np.array(range(len(P_Train_to_CVAE_mu_all)))/float(len(P_Train_to_CVAE_mu_all))



In [None]:
#------------------Set figure--------------------
# figure size
plt.figure(figsize=(8,8))
# Set dpi
plt.rcParams['savefig.dpi'] = 600
plt.rcParams['figure.dpi'] = 600
# set frame
bwith = 1.5
TK = plt.gca() 
TK.spines['bottom'].set_linewidth(bwith)
TK.spines['left'].set_linewidth(bwith)
TK.spines['top'].set_linewidth(bwith)
TK.spines['right'].set_linewidth(bwith)

#------------------font--------------------------
font = {'family' : 'Times New Roman',
'weight' : 'normal','size': 24}

#------------------set tick----------------------
ax = plt.gca()
ax.tick_params(direction='in', length=3, width=1)

#------------------ tick labelel ----------------
ax = plt.gca()
tick_lable_size=20

#----------------- set gird width --------------
gridwidth=1

#---------------- set legend size --------------
legend_size=24

#----------------- set line width ---------------
line_width=2


#---------------------------------------------------------------------------------
#---------------------------------------------------------------------------------
#---------------------------------------------------------------------------------

plt.plot(P_Train_to_Train_all_stored, P_Train_to_Train_all_fraction, color='b',linestyle='-',
         lw=line_width, label='Training data') 

plt.plot(P_Train_to_CVAE_auto_noisy_stored, P_Train_to_CVAE_auto_noisy_fraction, color='black',linestyle='-',
         lw=line_width, label='CVAE (Auto $\sigma^\prime$, Noisy)') 

plt.plot(P_Train_to_CVAE_noisy_stored, P_Train_to_CVAE_noisy_fraction, color='r',linestyle='-',
         lw=line_width, label='CVAE ($\sigma^\prime$=0.1, Noisy)')

plt.plot(P_Train_to_CVAE_auto_mu_stored, P_Train_to_CVAE_auto_mu_fraction, color='black',linestyle=':',
         lw=line_width, label='CVAE (Auto $\sigma^\prime$, Noise free)') 

plt.plot(P_Train_to_CVAE_mu_all_stored, P_Train_to_CVAE_mu_all_fraction, color='r',linestyle=':',
         lw=line_width, label='CVAE ($\sigma^\prime$=0.1, Noise free)') 

#---------------------------------------------------------------------------------
#---------------------------------------------------------------------------------
#---------------------------------------------------------------------------------

# ticks and grids
plt.xticks(fontproperties = 'Times New Roman',size=tick_lable_size, weight = 'normal')
plt.yticks(fontproperties = 'Times New Roman',fontsize=tick_lable_size, weight = 'normal')
plt.grid(color='lightgrey', linestyle='-', linewidth=gridwidth, axis='y')
plt.grid(color='lightgrey', linestyle='-', linewidth=gridwidth, axis='x')

# Notations
plt.xlabel('P-values',font)#(e)
plt.ylabel('Probability',font)
plt.title('Energy test',font)
plt.legend(loc="lower right",prop={'family' : 'Times New Roman','weight' : 'normal','size': legend_size})
plt.text(0.466, -0.22,'(i)',font)
plt.show()