In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
import sys
import os
sys.path.append(os.pardir)

import gc
import joblib
import pdb

In [6]:
import numpy as np
import torch
import umap

In [8]:
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size':16})
import seaborn as sns

In [9]:
from data.dataset import bird_dataset_single_hdf

In [10]:
from utils.utils import load_netG, overlap_encode, overlap_decode, segment_image

In [22]:
from hmmlearn.hmm import GaussianHMM

In [11]:
from hmm.hmm_utils import generate_samples

# load a bird, get its songs from some day

In [49]:
dataset = bird_dataset_single_hdf('/home/songbird/datapartition/all_birds.hdf', 'r15y5')

... total number of folders for bird r15y5 = 41 ...


In [50]:
# choose a day
X = dataset.get(day=25, nsamps=-1)

In [51]:
X[0].shape

(129, 188)

# flatten and stack these 

In [52]:
X_real = np.concatenate([x.T for x in X])

In [53]:
X_real.shape

(1033307, 129)

In [20]:
gc.collect()

2807

# create network, generate samples

In [47]:
netG = load_netG('/home/songbird/datapartition/mdgan_output/daily_gan/r15y5_nz12_alldaysnets/netG_epoch_60_day_all.pth',
                 nz=12, ngf=128, cuda=True, resnet=True)

In [48]:
# load hmm
hmm = joblib.load('/home/songbird/datapartition/mdgan_output/daily_gan/r15y5_nz12_alldaysnets/day_25/hmm_hiddensize_100/model_day_25.pkl')
hmm = hmm['model']

In [54]:
timesteps_per_sample = [x.shape[1]//16 for x in X]


In [55]:
sample_seqs = generate_samples(netG, hmm, nsamples=len(X), invtemp=1., timesteps=timesteps_per_sample, cuda=True)

In [29]:
sample_seqs[0].shape

(129, 624)

In [56]:
Z = np.concatenate([s.T for s in sample_seqs])
Z.shape

(1013200, 129)

In [114]:
Z = 2.*np.random.randn(200000, 12)

#Z = -10 + 20*np.random.rand(200000, 12)
Z.shape

(200000, 12)

In [115]:
Xhat,_ = overlap_decode(Z, netG)

In [116]:
Xhat.shape

(129, 3200000)

In [117]:
segs = segment_image(Xhat, width=16)
Xhat = [np.ndarray.flatten(s) for s in segs]
del segs
gc.collect()

34

In [118]:
Xhat = np.stack(Xhat,axis=0)

In [119]:
Xhat.shape

(200000, 2064)

# project on umap

In [53]:
del X_real
gc.collect()

15742

In [31]:
from umap import UMAP

In [57]:
umaplearner = UMAP(n_neighbors=15, n_components=2, min_dist = 0.01)

In [58]:
xreal = umaplearner.fit_transform(X_real)

In [59]:
xreal.shape

(1033307, 2)

In [None]:
xreal2 = umaplearner.transform(X_real)

In [60]:
xfake = umaplearner.transform(Z)

  self._set_arrayXarray(i, j, x)


In [61]:
xfake.shape

(1013200, 2)

In [63]:
plt.figure(figsize=(15,15))
plt.scatter(xreal[:,0], xreal[:,1], s = 5, c = 'b', marker='.', )
plt.scatter(xfake[:,0], xfake[:,1], s = 3, c = 'w', marker='o', edgecolors='r', alpha=0.1)
plt.legend(['real', 'sampled'])
#plt.show()
plt.savefig('/home/songbird/Dropbox/Work/MDGAN_paper/Figures_for_paper/Final_figures/Figure2/r15y5_umap_nz12_day25_hmm100_vs_real.jpg',
            format='jpg', dpi=200)
plt.close()

In [None]:
'/home/songbird/Dropbox/'

In [107]:
import seaborn as sns

In [108]:
import pandas as pd

In [109]:
df_real = pd.DataFrame({'x1': xreal2[:,0], 'x2': xreal2[:,1]})
df_fake = pd.DataFrame({'x1': xfake2[:,0], 'x2': xfake2[:,1]})


In [None]:
plt.figure(figsize=(15,15))
sns.displot(df_real, x="x1", y="x2", kind="kde")
sns.displot(df_fake, x="x1", y="x2", kind="kde")