In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pickle
from tqdm.notebook import tqdm
from tqdm import trange
%matplotlib inline

In [None]:
def read_list_of_arrays(filename):
    A = pickle.load(open(filename, 'rb'))
    
    if len(A) == 3:
        print(A[1][0], A[2][0])
        A = A[0]
        
    dim = A[0].flatten().shape[0]
    B = np.zeros((len(A), dim))

    for i in range(len(A)):
        B[i, :] = A[i].flatten()
        
    return B

In [None]:
epochs = np.arange(500, 5500, 500)

In [None]:
epochs

In [None]:
cloud_base = read_list_of_arrays('/gan-clouds/timegan_data.pickle')

clouds = []
for ep in epochs:
    epo = ep 
    clouds.append(read_list_of_arrays('/gan-clouds/timegan_various_epochs5k/model_%d.pickle' % epo))

In [None]:
cloud_base.shape

In [None]:
for cloud in clouds:
    print(cloud.shape)

### Compute cross-barcodes 

In [None]:
import mtd

In [None]:
res1 = []
trials = 50

for i in trange(len(clouds)):
    np.random.seed(7)
    barcs = [mtd.calc_cross_barcodes(cloud_base, clouds[i], batch_size1 = 100, batch_size2 = 1000,\
                                          cuda = 1, pdist_device = 'gpu') for _ in range(trials)]
    res1.append(barcs)

In [None]:
res2 = []
trials = 50

for i in trange(len(clouds)):
    np.random.seed(7)
    barcs = [mtd.calc_cross_barcodes(clouds[i], cloud_base, batch_size1 = 100, batch_size2 = 1000,\
                                          cuda = 1, pdist_device = 'gpu') for _ in range(trials)]
    res2.append(barcs)

### Absolute barcodes

In [None]:
barc = mtd.calc_cross_barcodes(clouds[-1], np.zeros((0,0)), batch_size1 = 100, batch_size2 = 0)

In [None]:
barc = mtd.calc_cross_barcodes(cloud_base, np.zeros((0,0)), batch_size1 = 100, batch_size2 = 0)

In [None]:
def get_scores(res, args_dict, trials = 10):

    scores = []

    for i in range(len(res)): 
        barc_list = []
        
        for exp_id, elem in enumerate(res[i]):
            barc_list.append(mtd.get_score(elem, **args_dict))
        
        r = sum(barc_list) / len(barc_list)
        
        scores.append(r)

    return scores

In [None]:
scores = get_scores(res1, {'h_idx' : 1, 'kind' : 'sum_length'})

In [None]:
for ep, s in zip(epochs, scores):
    print(s)

In [None]:
scores = get_scores(res2, {'h_idx' : 1, 'kind' : 'sum_length'})

In [None]:
for ep, s in zip(epochs, scores):
    print(s)

In [None]:
#pickle.dump(res1, open('res1_timegan.pickle', 'wb'))
#pickle.dump(res2, open('res2_timegan.pickle', 'wb'))

### PCA

In [None]:
import numpy as np
from sklearn.decomposition import PCA

In [None]:
%pylab inline

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Create data
def plot2(data, groups = ("base", "cloud")):
    colors = ("red", "green")

    # Create plot
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)

    for data, color, group in zip(data, colors, groups):
        x, y = data
        ax.scatter(x, y, alpha=0.5, c=color, edgecolors='none', s=5, label=group)

    #plt.title('Matplot scatter plot')
    plt.legend(loc=2)
    plt.show()

#### PCA from base+last GAN

In [None]:
all_pca = []

for i in range(len(epochs)):
    pca = PCA(n_components=2)

    cb = np.concatenate((cloud_base, clouds[-1]))
    pca.fit(cb)

    cb = cloud_base
    cloud_base_pca = pca.transform(cb)
    data = [(cloud_base_pca[:,0], cloud_base_pca[:,1])]

    cg = clouds[i]

    cloud_pca = pca.transform(cg)
    data.append((cloud_pca[:,0], cloud_pca[:,1]))
    
    all_pca.append(data)

    plot2(data, groups = ("real", "generated, epoch %d" % epochs[i]))

In [None]:
#pickle.dump(all_pca, open('timegan_all_pca.pickle', 'wb'))