In [1]:
import numpy as np
import setigen as stg
from blimpy import Waterfall
import matplotlib.pyplot as plt
import random
import os
from astropy import units as u
from tqdm import tqdm
from sklearn.metrics import silhouette_score
import tensorflow as tf
from tensorflow.keras import layers

os.environ["CUDA_VISIBLE_DEVICES"]="1"
num_classes = 100
num_samples_per_class = 1000


2023-09-07 04:04:28.373839: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def painting(data):
    all_data = []
    labels = []
    for c in range(num_classes):
        drift = 2*random.random()*(-1)**random.randint(0,2)
        snr = random.randint(100, 150)
        width = random.randint(20, 50)
        for s in range(num_samples_per_class):
            index = random.randint(0, data.shape[0]-1)
            window = data[index, :,:]
            
            start = random.randint(50, 180)
            
            frame = stg.Frame.from_data(df=2.7939677238464355*u.Hz,
                                        dt=18.253611008*u.s,
                                        fch1=1289*u.MHz,
                                        ascending=True,
                                        data=window)
            frame.add_signal(stg.constant_path(
                                        f_start=frame.get_frequency(index=start),
                                       drift_rate=drift*u.Hz/u.s),
                                      stg.constant_t_profile(level=frame.get_intensity(snr=snr)),
                                      stg.gaussian_f_profile(width=width*u.Hz),
                                      stg.constant_bp_profile(level=1))
            all_data.append(frame.data)
            labels.append(c)
    all_data = np.array(all_data)
    labels = np.vstack(labels)
    return all_data, labels

In [3]:
import cv2
import numpy as np


In [4]:
from tqdm import tqdm
import gc
import keras
from AE import AE

In [5]:
def build_model(enocder, decoder):
    autoencoder = AE(encoder, decoder)
    autoencoder.compile(optimizer=keras.optimizers.Adam(lr=5e-4))
    return autoencoder

In [6]:
latent_dim = 10
time_samples = 16
freq_sample =  256
encoder_inputs = keras.Input(shape=(time_samples, freq_sample, 1))
x = layers.Conv2D(3, 3, activation="relu", strides=1, padding="same")(encoder_inputs)
x = layers.MaxPool2D(pool_size=(1, 2))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, activation="relu", strides=1, padding="same")(x)
x = layers.MaxPool2D(pool_size=(1, 2))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, activation="relu", strides=1, padding="same")(x)
x = layers.MaxPool2D(pool_size=(1, 2))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, activation="relu", strides=1, padding="same")(x)
x = layers.MaxPool2D(pool_size=(1, 2))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, activation="relu", strides=1, padding="same")(x)
x = layers.MaxPool2D(pool_size=(1, 2))(x)
x = layers.BatchNormalization()(x)
x_shape = x.shape
x = layers.Flatten()(x)
x = layers.Dense(256, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(64, activation="relu")(x)
x = layers.BatchNormalization()(x)
z = layers.Dense(latent_dim, name="latent", activation="linear")(x)
encoder = keras.Model(encoder_inputs, z, name="encoder")
encoder.summary()

Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 16, 256, 1)]      0         
                                                                 
 conv2d (Conv2D)             (None, 16, 256, 3)        30        
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 128, 3)       0         
 )                                                               
                                                                 
 batch_normalization (BatchN  (None, 16, 128, 3)       12        
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 16, 128, 64)       1792      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 16, 64, 64)       0   

2023-09-07 04:04:46.683787: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-09-07 04:04:47.072731: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14233 MB memory:  -> device: 0, name: NVIDIA RTX A4000, pci bus id: 0000:61:00.0, compute capability: 8.6


In [7]:
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(64, activation="relu")(latent_inputs)
x = layers.Dense(256, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(x_shape[1]* x_shape[2]* x_shape[3], activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Reshape((x_shape[1], x_shape[2], x_shape[3]))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.MaxPool2D(pool_size=(2, 1))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.MaxPool2D(pool_size=(2, 1))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.MaxPool2D(pool_size=(2, 1))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.MaxPool2D(pool_size=(2, 1))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2DTranspose(3, 3, activation="relu", strides=2, padding="same")(x)
x = layers.MaxPool2D(pool_size=(2, 1))(x)
x = layers.BatchNormalization()(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="linear", padding="same")(x)
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 10)]              0         
                                                                 
 dense_2 (Dense)             (None, 64)                704       
                                                                 
 dense_3 (Dense)             (None, 256)               16640     
                                                                 
 batch_normalization_7 (Batc  (None, 256)              1024      
 hNormalization)                                                 
                                                                 
 dense_4 (Dense)             (None, 8192)              2105344   
                                                                 
 batch_normalization_8 (Batc  (None, 8192)             32768     
 hNormalization)                                           

In [8]:
autoencoder = AE(encoder, decoder)
autoencoder.compile(optimizer=keras.optimizers.Adam(learning_rate = 1e-3))
autoencoder.load_weights("../autoencoder/models/full-weights-"+'07-02-2023-11-08-47')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f372915f4c0>

In [9]:
def normalize(data):
    epsilon = 1
    min_val = data.min()
    data = data - min_val + epsilon
    new_data = np.log(data)
    min_val = data.min()
    max_val = data.max()
    final_data = (data - min_val) / (max_val - min_val)
    return final_data
    
def normalize_data(data):
    for i in tqdm(range(data.shape[0])):
        data[i,:,:] = normalize(data[i,:,:])
    return data

In [10]:
def measure_cluster_size(x, labels):
    get_labels = list(set(labels))
    mean_cluster_spread = []
    for l in get_labels:
        index = np.asarray(labels==l)
        x_features = x[index, :]
        mean_centroid = np.mean(x_features, axis = 0)
        diff = x_features - mean_centroid
        norms = np.linalg.norm(diff, axis = 1)
        mean_cluster_spread.append(np.max(norms)/np.mean(norms))
        # print(x_features.shape, mean_centroid.shape, norms.shape)
    return np.mean(mean_cluster_spread), np.std(mean_cluster_spread)

In [11]:
import os
from tqdm import tqdm
total_scores = []
for i in tqdm(range(10)):
    directory = os.fsencode( "../../../../../datax/scratch/pma/reverse_search/test/")
    count = 0
    data = []
    for folder in os.listdir(directory):
        print(folder)
        for subfolder in os.listdir(directory+folder):
            back = os.fsencode( "/")
            if '.' not in str(subfolder):
                for file in os.listdir(directory+folder+back+subfolder):
                    file_directory = str(os.path.join(directory+folder+back+subfolder, file)).replace('b', '').replace("'","")
                    if 'filtered.npy' in file_directory:
                        data.append(np.load(str(file_directory)))
                        count += 1
    data = np.vstack(data)
    print(data.shape)
    injected, labels = painting(data)
    
    print(injected.shape)
    
    input_data = np.expand_dims(normalize_data(injected), axis = -1)
    del data
    gc.collect()
    input_data = normalize_data(input_data)
    print(input_data[0,:,:].max(), input_data[0,:,:].min())
    print(input_data.shape)
    features = []
    # for i in range(1,101):
    tensor = tf.convert_to_tensor(input_data, dtype=tf.float32)
    X = autoencoder.encoder.predict(tensor, batch_size= 1024)
    # del input_data
    gc.collect()
    # features = np.vstack(features)
    score, spread = measure_cluster_size(X, labels[:, 0])
    print("SCORE IS: ", score, " spread ",spread)
    total_scores.append(score)

  0%|                                                    | 0/10 [00:00<?, ?it/s]

b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                | 0/100000 [00:00<?, ?it/s][A
  1%|▎                                 | 1080/100000 [00:00<00:09, 10789.95it/s][A
  2%|▋                                 | 2159/100000 [00:00<00:09, 10781.25it/s][A
  3%|█                                 | 3238/100000 [00:00<00:08, 10773.44it/s][A
  4%|█▍                                | 4316/100000 [00:00<00:08, 10767.48it/s][A
  5%|█▊                                | 5394/100000 [00:00<00:08, 10771.36it/s][A
  6%|██▏                               | 6475/100000 [00:00<00:08, 10782.01it/s][A
  8%|██▌                               | 7554/100000 [00:00<00:08, 10784.18it/s][A
  9%|██▉                               | 8633/100000 [00:00<00:08, 10777.31it/s][A
 10%|███▎                              | 9713/100000 [00:00<00:08, 10782.59it/s][A
 11%|███▌                             | 10793/100000 [00:01<00:08, 10784.87it/s][A
 12%|███▉                             | 11872/100000 [00:01<00:08, 10778.62

1.0 0.0
(100000, 16, 256, 1)


2023-09-07 04:07:20.285276: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401


10/98 [==>...........................] - ETA: 1s

2023-09-07 04:07:22.661416: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.




 10%|████▎                                      | 1/10 [02:36<23:25, 156.14s/it]

SCORE IS:  2.7265627  spread  1.2918146
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                | 0/100000 [00:00<?, ?it/s][A
  1%|▎                                 | 1073/100000 [00:00<00:09, 10719.17it/s][A
  2%|▋                                 | 2145/100000 [00:00<00:09, 10650.64it/s][A
  3%|█                                 | 3211/100000 [00:00<00:09, 10624.18it/s][A
  4%|█▍                                | 4274/100000 [00:00<00:09, 10622.17it/s][A
  5%|█▊                                | 5337/100000 [00:00<00:08, 10623.75it/s][A
  6%|██▏                               | 6400/100000 [00:00<00:08, 10622.88it/s][A
  7%|██▌                               | 7463/100000 [00:00<00:08, 10615.58it/s][A
  9%|██▉                               | 8525/100000 [00:00<00:08, 10604.32it/s][A
 10%|███▎                              | 9588/100000 [00:00<00:08, 10609.76it/s][A
 11%|███▌                             | 10652/100000 [00:01<00:08, 10616.67it/s][A
 12%|███▊                             | 11717/100000 [00:01<00:08, 10625.15

1.0 0.0
(100000, 16, 256, 1)


 20%|████████▌                                  | 2/10 [05:12<20:48, 156.05s/it]

SCORE IS:  2.7357862  spread  1.3544941
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                | 0/100000 [00:00<?, ?it/s][A
  1%|▎                                 | 1044/100000 [00:00<00:09, 10433.07it/s][A
  2%|▋                                 | 2088/100000 [00:00<00:09, 10122.46it/s][A
  3%|█                                 | 3114/100000 [00:00<00:09, 10183.46it/s][A
  4%|█▍                                | 4159/100000 [00:00<00:09, 10286.66it/s][A
  5%|█▊                                | 5189/100000 [00:00<00:09, 10256.64it/s][A
  6%|██                                | 6215/100000 [00:00<00:09, 10144.96it/s][A
  7%|██▍                               | 7298/100000 [00:00<00:08, 10365.59it/s][A
  8%|██▊                               | 8336/100000 [00:00<00:08, 10338.41it/s][A
  9%|███▏                              | 9371/100000 [00:00<00:08, 10341.01it/s][A
 10%|███▍                             | 10406/100000 [00:01<00:08, 10335.11it/s][A
 11%|███▊                             | 11442/100000 [00:01<00:08, 10340.70

1.0 0.0
(100000, 16, 256, 1)


 30%|████████████▉                              | 3/10 [07:45<18:03, 154.82s/it]

SCORE IS:  2.4448392  spread  1.2084209
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                | 0/100000 [00:00<?, ?it/s][A
  1%|▎                                 | 1022/100000 [00:00<00:09, 10211.61it/s][A
  2%|▋                                 | 2077/100000 [00:00<00:09, 10406.63it/s][A
  3%|█                                 | 3137/100000 [00:00<00:09, 10492.93it/s][A
  4%|█▍                                | 4187/100000 [00:00<00:09, 10432.88it/s][A
  5%|█▊                                | 5231/100000 [00:00<00:09, 10313.97it/s][A
  6%|██▏                               | 6281/100000 [00:00<00:09, 10375.60it/s][A
  7%|██▍                               | 7337/100000 [00:00<00:08, 10434.12it/s][A
  8%|██▊                               | 8393/100000 [00:00<00:08, 10473.58it/s][A
  9%|███▏                              | 9453/100000 [00:00<00:08, 10511.40it/s][A
 11%|███▍                             | 10509/100000 [00:01<00:08, 10524.78it/s][A
 12%|███▊                             | 11571/100000 [00:01<00:08, 10551.35

1.0 0.0
(100000, 16, 256, 1)


 40%|█████████████████▏                         | 4/10 [10:18<15:25, 154.23s/it]

SCORE IS:  2.6302829  spread  1.2991322
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                | 0/100000 [00:00<?, ?it/s][A
  1%|▎                                 | 1075/100000 [00:00<00:09, 10748.75it/s][A
  2%|▋                                 | 2150/100000 [00:00<00:09, 10713.34it/s][A
  3%|█                                 | 3222/100000 [00:00<00:09, 10713.05it/s][A
  4%|█▍                                | 4294/100000 [00:00<00:09, 10590.35it/s][A
  5%|█▊                                | 5361/100000 [00:00<00:08, 10618.76it/s][A
  6%|██▏                               | 6434/100000 [00:00<00:08, 10653.78it/s][A
  8%|██▌                               | 7505/100000 [00:00<00:08, 10669.42it/s][A
  9%|██▉                               | 8580/100000 [00:00<00:08, 10694.92it/s][A
 10%|███▎                              | 9655/100000 [00:00<00:08, 10711.69it/s][A
 11%|███▌                             | 10727/100000 [00:01<00:08, 10665.85it/s][A
 12%|███▉                             | 11801/100000 [00:01<00:08, 10686.25

1.0 0.0
(100000, 16, 256, 1)


 50%|█████████████████████▌                     | 5/10 [12:54<12:53, 154.71s/it]

SCORE IS:  2.566246  spread  1.1646478
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                | 0/100000 [00:00<?, ?it/s][A
  1%|▎                                 | 1024/100000 [00:00<00:09, 10234.52it/s][A
  2%|▋                                 | 2087/100000 [00:00<00:09, 10462.62it/s][A
  3%|█                                 | 3152/100000 [00:00<00:09, 10545.99it/s][A
  4%|█▍                                | 4215/100000 [00:00<00:09, 10575.41it/s][A
  5%|█▊                                | 5282/100000 [00:00<00:08, 10607.35it/s][A
  6%|██▏                               | 6347/100000 [00:00<00:08, 10619.82it/s][A
  7%|██▌                               | 7410/100000 [00:00<00:08, 10622.78it/s][A
  8%|██▉                               | 8476/100000 [00:00<00:08, 10631.51it/s][A
 10%|███▏                              | 9543/100000 [00:00<00:08, 10641.24it/s][A
 11%|███▌                             | 10610/100000 [00:01<00:08, 10649.76it/s][A
 12%|███▊                             | 11675/100000 [00:01<00:08, 10646.85

1.0 0.0
(100000, 16, 256, 1)


 60%|█████████████████████████▊                 | 6/10 [15:26<10:15, 153.92s/it]

SCORE IS:  2.6246831  spread  1.257006
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                | 0/100000 [00:00<?, ?it/s][A
  1%|▎                                 | 1003/100000 [00:00<00:09, 10022.75it/s][A
  2%|▋                                 | 2052/100000 [00:00<00:09, 10294.48it/s][A
  3%|█                                 | 3103/100000 [00:00<00:09, 10391.88it/s][A
  4%|█▍                                | 4143/100000 [00:00<00:09, 10371.51it/s][A
  5%|█▊                                | 5193/100000 [00:00<00:09, 10414.77it/s][A
  6%|██                                | 6247/100000 [00:00<00:08, 10456.76it/s][A
  7%|██▍                               | 7293/100000 [00:00<00:08, 10438.21it/s][A
  8%|██▊                               | 8352/100000 [00:00<00:08, 10484.67it/s][A
  9%|███▏                              | 9411/100000 [00:00<00:08, 10515.10it/s][A
 10%|███▍                             | 10463/100000 [00:01<00:08, 10313.88it/s][A
 12%|███▊                             | 11528/100000 [00:01<00:08, 10415.02

1.0 0.0
(100000, 16, 256, 1)


 70%|██████████████████████████████             | 7/10 [18:01<07:42, 154.31s/it]

SCORE IS:  2.6977463  spread  1.3374151
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                | 0/100000 [00:00<?, ?it/s][A
  1%|▎                                 | 1023/100000 [00:00<00:09, 10221.24it/s][A
  2%|▋                                 | 2086/100000 [00:00<00:09, 10456.20it/s][A
  3%|█                                 | 3147/100000 [00:00<00:09, 10525.22it/s][A
  4%|█▍                                | 4212/100000 [00:00<00:09, 10571.56it/s][A
  5%|█▊                                | 5270/100000 [00:00<00:09, 10291.64it/s][A
  6%|██▏                               | 6340/100000 [00:00<00:08, 10425.91it/s][A
  7%|██▌                               | 7411/100000 [00:00<00:08, 10514.55it/s][A
  8%|██▉                               | 8482/100000 [00:00<00:08, 10574.04it/s][A
 10%|███▏                              | 9553/100000 [00:00<00:08, 10613.24it/s][A
 11%|███▌                             | 10625/100000 [00:01<00:08, 10645.33it/s][A
 12%|███▊                             | 11695/100000 [00:01<00:08, 10660.63

1.0 0.0
(100000, 16, 256, 1)


 80%|██████████████████████████████████▍        | 8/10 [20:40<05:11, 155.64s/it]

SCORE IS:  2.677295  spread  1.2408786
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                | 0/100000 [00:00<?, ?it/s][A
  1%|▎                                 | 1081/100000 [00:00<00:09, 10804.47it/s][A
  2%|▋                                 | 2162/100000 [00:00<00:09, 10708.09it/s][A
  3%|█                                 | 3233/100000 [00:00<00:09, 10692.42it/s][A
  4%|█▍                                | 4303/100000 [00:00<00:08, 10677.91it/s][A
  5%|█▊                                | 5371/100000 [00:00<00:08, 10671.95it/s][A
  6%|██▏                               | 6439/100000 [00:00<00:08, 10661.06it/s][A
  8%|██▌                               | 7506/100000 [00:00<00:08, 10660.27it/s][A
  9%|██▉                               | 8575/100000 [00:00<00:08, 10666.94it/s][A
 10%|███▎                              | 9643/100000 [00:00<00:08, 10670.60it/s][A
 11%|███▌                             | 10711/100000 [00:01<00:08, 10667.34it/s][A
 12%|███▉                             | 11779/100000 [00:01<00:08, 10669.80

1.0 0.0
(100000, 16, 256, 1)


 90%|██████████████████████████████████████▋    | 9/10 [23:12<02:34, 154.58s/it]

SCORE IS:  2.7539735  spread  1.4304723
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                | 0/100000 [00:00<?, ?it/s][A
  1%|▎                                 | 1074/100000 [00:00<00:09, 10738.63it/s][A
  2%|▋                                 | 2148/100000 [00:00<00:09, 10689.19it/s][A
  3%|█                                 | 3217/100000 [00:00<00:09, 10664.10it/s][A
  4%|█▍                                | 4284/100000 [00:00<00:08, 10655.76it/s][A
  5%|█▊                                | 5350/100000 [00:00<00:08, 10645.70it/s][A
  6%|██▏                               | 6415/100000 [00:00<00:08, 10633.46it/s][A
  7%|██▌                               | 7479/100000 [00:00<00:08, 10626.72it/s][A
  9%|██▉                               | 8542/100000 [00:00<00:08, 10621.62it/s][A
 10%|███▎                              | 9605/100000 [00:00<00:08, 10556.48it/s][A
 11%|███▌                             | 10665/100000 [00:01<00:08, 10567.95it/s][A
 12%|███▊                             | 11722/100000 [00:01<00:08, 10567.24

1.0 0.0
(100000, 16, 256, 1)


100%|██████████████████████████████████████████| 10/10 [25:48<00:00, 154.82s/it]

SCORE IS:  2.869507  spread  1.510188





In [12]:
print(np.mean(total_scores))
print(np.std(total_scores))

2.672692
0.10964748
