# About this code

- Code author: Chenguang Wang   
- Email: c.wang-8@tudelft.nl; samwangchenguang@gmail.com
- Affiliation: Delft University of Technology
- Project name: Generating multivariate load states using a (conditional) variational autoencoder
- Motivation: This is a project for PSCC2022 – Power Systems Computation Conference: [Homepage of the conference](https://pscc2022.pt/)
- Aim of this code: Analyze marginal distribution of load data
- A preprint is available, and you can check this paper for more details  [Link of the paper](https://arxiv.org/abs/2110.11435)
    - Paper authors: Chenguang Wang, Ensieh Sharifnia, Zhi Gao, Simon H. Tindemans, Peter Palensky
    - Accepted for publication at PSCC 2022 and a special issue of EPSR
    - If you use (parts of) this code, please cite the preprint or published paper

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ks_2samp # library of K-S test

In [None]:
# Data import
Train_load  = pd.read_csv("../Data/13-17_32_Train.csv", index_col=0) # Training data
σ_auto_Time_all_noisy_β_1  = pd.read_csv("../Generations/σ'_auto_Time_all_noisy_β=1.csv", index_col=0) # CVAE (Auto σ’, Noisy)
σ_auto_Time_all_mu_β_1 = pd.read_csv("../Generations/σ'_auto_Time_all_mu_β=1.csv", index_col=0) # CVAE (σ’=0.1, Noisy)
σ_0_1_Time_all_noisy = pd.read_csv("../Generations/σ'_0.1_Time_all_noisy.csv", index_col=0) # CVAE (Auto σ’, Noise free)
σ_0_1_Time_all_mu = pd.read_csv("../Generations/σ'_0.1_Time_all_mu.csv", index_col=0) # CVAE (σ’=0.1, Noise free)

In [None]:
#------------------Set figure--------------------
# figure size
plt.figure(figsize=(8,8))
# Set dpi
plt.rcParams['savefig.dpi'] = 600
plt.rcParams['figure.dpi'] = 600
# set frame
bwith = 1.5
TK = plt.gca() 
TK.spines['bottom'].set_linewidth(bwith)
TK.spines['left'].set_linewidth(bwith)
TK.spines['top'].set_linewidth(bwith)
TK.spines['right'].set_linewidth(bwith)

#------------------font--------------------------
font = {'family' : 'Times New Roman',
'weight' : 'normal','size': 24}

#------------------set tick----------------------
ax = plt.gca()
ax.tick_params(direction='in', length=3, width=1)

#------------------ tick labelel ----------------
ax = plt.gca()
ax.ticklabel_format(style='sci', scilimits=(-1,2), axis='y')
tick_lable_size=20

#----------------- set gird width --------------
gridwidth=1

#---------------- set legend size --------------
legend_size=24

#----------------- set line width ---------------
line_width=2


#---------------------------------------------------------------------------------
#---------------------------------------------------------------------------------
#---------------------------------------------------------------------------------

# Define how many data points (load demands at specific time of a specific 
# country) you want to sample from the trianing and generated set
Volume_KS=176

# Define how many times you want to sample from the training 
# and generated data set
Repeat_time=5000
P_value_tr_tr_all = 0 
P_value_tr_cvae_mu_all = 0 # CVAE (σ’=0.1, Noise free)
P_value_tr_cvae_noisy_all = 0 # CVAE (σ’=0.1, Noisy)
P_value_tr_cvae_auto_mu_all = 0 # CVAE (Auto σ’, Noise free)
P_value_tr_cvae_auto_noisy_all = 0  # CVAE (Auto σ’, Noisy)

Country_number =32

#----------------------- KS of training to traing data  ----------------------
for c in range(Country_number):
    
    Train_NL_load = Train_load.iloc[:,c].values.astype(np.float32)
    # Calculate P-values from K-S test
    for i in range(Repeat_time):
        
        random_index_1=np.random.choice(len(Train_NL_load), size=Volume_KS, replace=False)
        random_index_2=np.random.choice(len(Train_NL_load), size=Volume_KS, replace=False)
        D, P_value=ks_2samp(Train_NL_load[random_index_1], Train_NL_load[random_index_2])
        
        if i==0:
          P_value_tr_tr=P_value
        else:
          P_value_tr_tr=np.append(P_value_tr_tr, P_value) 

    if c==0:
        P_value_tr_tr_all=P_value_tr_tr
    else:
        P_value_tr_tr_all=np.append(P_value_tr_tr_all, P_value_tr_tr) 
    
    print('Historical data, Country =  {}'. format(c))            
# Cumulative distribution of p-values (Training data to test data)
P_value_tr_tr_stored = np.sort(P_value_tr_tr_all)
P_value_tr_tr_fraction = np.array(range(len(P_value_tr_tr_all)))/float(len(P_value_tr_tr_all))   

plt.plot(P_value_tr_tr_stored, P_value_tr_tr_fraction, color='b',linestyle='-',
         lw=line_width, label='Training data')

#----------------------- KS of training to CVAE (auto σ', Noisy) data  ----------------------
for c in range(Country_number):
    
    Train_NL_load = Train_load.iloc[:,c].values.astype(np.float32)
    σ_auto_NL_Time_all_noisy_β_1 = σ_auto_Time_all_noisy_β_1.iloc[:,c].values.astype(np.float32)
    
    # Calculate P-values from K-S test
    for i in range(Repeat_time):
        random_index_1=np.random.choice(len(Train_NL_load), size=Volume_KS, replace=False)
        random_index_2=np.random.choice(len(σ_auto_NL_Time_all_noisy_β_1), size=Volume_KS, replace=False)
        D, P_value=ks_2samp(Train_NL_load[random_index_1], σ_auto_NL_Time_all_noisy_β_1[random_index_2])
        
        if i==0:
          P_value_tr_cvae_auto_noisy=P_value
        else:
          P_value_tr_cvae_auto_noisy=np.append(P_value_tr_cvae_auto_noisy, P_value) 

    if c==0:
        P_value_tr_cvae_auto_noisy_all=P_value_tr_cvae_auto_noisy
    else:
        P_value_tr_cvae_auto_noisy_all=np.append(P_value_tr_cvae_auto_noisy_all, P_value_tr_cvae_auto_noisy) 
    
    print('CVAE (Auto $\sigma^\prime$, Noisy), Country =  {}'. format(c))     
# Cumulative distribution of p-values (Training data to test data)
P_value_tr_cvae_auto_noisy_stored = np.sort(P_value_tr_cvae_auto_noisy_all)
P_value_tr_cvae_auto_noisy_fraction = np.array(range(len(P_value_tr_cvae_auto_noisy_all)))/float(len(P_value_tr_cvae_auto_noisy_all))   

plt.plot(P_value_tr_cvae_auto_noisy_stored, P_value_tr_cvae_auto_noisy_fraction, color='black',linestyle='-',
        lw=line_width, label='CVAE (Auto $\sigma^\prime$, Noisy)')


#----------------------- KS of training to CVAE (σ'=0.1, Noisy) data  ----------------------
for c in range(Country_number):
    
    Train_NL_load = Train_load.iloc[:,c].values.astype(np.float32)
    σ_0_1_NL_Time_all_noisy = σ_0_1_Time_all_noisy.iloc[:,c].values.astype(np.float32)
    
    # Calculate P-values from K-S test
    for i in range(Repeat_time):
        random_index_1=np.random.choice(len(Train_NL_load), size=Volume_KS, replace=False)
        random_index_2=np.random.choice(len(σ_0_1_NL_Time_all_noisy), size=Volume_KS, replace=False)
        D, P_value=ks_2samp(Train_NL_load[random_index_1], σ_0_1_NL_Time_all_noisy[random_index_2])
        
        if i==0:
          P_value_tr_cvae_noisy=P_value
        else:
          P_value_tr_cvae_noisy=np.append(P_value_tr_cvae_noisy, P_value) 

    if c==0:
        P_value_tr_cvae_noisy_all=P_value_tr_cvae_noisy
    else:
        P_value_tr_cvae_noisy_all=np.append(P_value_tr_cvae_noisy_all, P_value_tr_cvae_noisy) 
    
    print('CVAE ($\sigma^\prime$=0.1, Noisy), Country =  {}'. format(c))  
# Cumulative distribution of p-values (Training data to test data)
P_value_tr_cvae_noisy_stored = np.sort(P_value_tr_cvae_noisy_all)
P_value_tr_cvae_noisy_fraction = np.array(range(len(P_value_tr_cvae_noisy_all)))/float(len(P_value_tr_cvae_noisy_all))   

plt.plot(P_value_tr_cvae_noisy_stored, P_value_tr_cvae_noisy_fraction, color='red',linestyle='-',
        lw=line_width, label='CVAE ($\sigma^\prime$=0.1, Noisy)')



#----------------------- KS of training to CVAE (auto σ', Noise free) data  ----------------------
for c in range(Country_number):
    
    Train_NL_load = Train_load.iloc[:,c].values.astype(np.float32)
    σ_auto_NL_Time_all_mu_β_1 = σ_auto_Time_all_mu_β_1.iloc[:,c].values.astype(np.float32) 
    
    # Calculate P-values from K-S test
    for i in range(Repeat_time):
        random_index_1=np.random.choice(len(Train_NL_load), size=Volume_KS, replace=False)
        random_index_2=np.random.choice(len(σ_auto_NL_Time_all_mu_β_1), size=Volume_KS, replace=False)
        D, P_value=ks_2samp(Train_NL_load[random_index_1], σ_auto_NL_Time_all_mu_β_1[random_index_2])
        
        if i==0:
          P_value_tr_cvae_auto_mu=P_value
        else:
          P_value_tr_cvae_auto_mu=np.append(P_value_tr_cvae_auto_mu, P_value) 

    if c==0:
        P_value_tr_cvae_auto_mu_all=P_value_tr_cvae_auto_mu
    else:
       P_value_tr_cvae_auto_mu_all=np.append(P_value_tr_cvae_auto_mu_all, P_value_tr_cvae_auto_mu) 
    print('CVAE (Auto $\sigma^\prime$, Noise free), Country =  {}'. format(c))             
# Cumulative distribution of p-values (Training data to test data)
P_value_tr_cvae_auto_mu_stored = np.sort(P_value_tr_cvae_auto_mu_all)
P_value_tr_cvae_auto_mu_fraction = np.array(range(len(P_value_tr_cvae_auto_mu_all)))/float(len(P_value_tr_cvae_auto_mu_all))   

plt.plot(P_value_tr_cvae_auto_mu_stored, P_value_tr_cvae_auto_mu_fraction, color='black',linestyle=':',
        lw=line_width, label='CVAE (Auto $\sigma^\prime$, Noise free)')



#----------------------- KS of training to CVAE (σ'=0.1, Noise free) data  ----------------------
for c in range(Country_number):
    
    Train_NL_load = Train_load.iloc[:,c].values.astype(np.float32)
    σ_0_1_NL_Time_all_mu = σ_0_1_Time_all_mu.iloc[:,c].values.astype(np.float32)
    
    # Calculate P-values from K-S test  
    for i in range(Repeat_time):
        random_index_1=np.random.choice(len(Train_NL_load), size=Volume_KS, replace=False)
        random_index_2=np.random.choice(len(σ_0_1_NL_Time_all_mu), size=Volume_KS, replace=False)
        D, P_value=ks_2samp(Train_NL_load[random_index_1], σ_0_1_NL_Time_all_mu[random_index_2])
        
        if i==0:
          P_value_tr_cvae_mu=P_value
        else:
          P_value_tr_cvae_mu=np.append(P_value_tr_cvae_mu, P_value) 

    if c==0:
        P_value_tr_cvae_mu_all=P_value_tr_cvae_mu
    else:
        P_value_tr_cvae_mu_all=np.append(P_value_tr_cvae_mu_all, P_value_tr_cvae_mu) 

    print('CVAE ($\sigma^\prime$=0.1, Noise free), Country =  {}'. format(c))          
# Cumulative distribution of p-values (Training data to test data)
P_value_tr_cvae_mu_stored = np.sort(P_value_tr_cvae_mu_all)
P_value_tr_cvae_mu_fraction = np.array(range(len(P_value_tr_cvae_mu_all)))/float(len(P_value_tr_cvae_mu_all))   

plt.plot(P_value_tr_cvae_mu_stored, P_value_tr_cvae_mu_fraction, color='red',linestyle=':',
        lw=line_width, label='CVAE ($\sigma^\prime$=0.1, Noise free)')


#---------------------------------------------------------------------------------
#---------------------------------------------------------------------------------
#---------------------------------------------------------------------------------

# ticks and grids
plt.xticks(fontproperties = 'Times New Roman',size=tick_lable_size, weight = 'normal')
plt.yticks(fontproperties = 'Times New Roman',fontsize=tick_lable_size, weight = 'normal')
plt.grid(color='lightgrey', linestyle='-', linewidth=gridwidth, axis='y')
plt.grid(color='lightgrey', linestyle='-', linewidth=gridwidth, axis='x')

# Notations
plt.xlabel('P-values',font)
plt.ylabel('Probaility',font)
plt.title('Kolmogorov-Smirnov test',font)
plt.legend(loc='lower right',prop={'family' : 'Times New Roman','weight' : 'normal','size': legend_size})
plt.text(0.466, -0.22,'(a)',font)
plt.show()