In [1]:
import numpy as np
import setigen as stg
from blimpy import Waterfall
import matplotlib.pyplot as plt
import random
import os
from astropy import units as u
from tqdm import tqdm
from sklearn.metrics import silhouette_score
import tensorflow as tf
from tensorflow.keras import layers

os.environ["CUDA_VISIBLE_DEVICES"]="1"
num_classes = 100
num_samples_per_class = 1000


2023-07-02 11:42:33.842411: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def painting(data):
    all_data = []
    labels = []
    for c in range(num_classes):
        drift = 2*random.random()*(-1)**random.randint(0,2)
        snr = random.randint(100, 150)
        width = random.randint(20, 50)
        for s in range(num_samples_per_class):
            index = random.randint(0, data.shape[0]-1)
            window = data[index, :,:]
            
            start = random.randint(50, 180)
            
            frame = stg.Frame.from_data(df=2.7939677238464355*u.Hz,
                                        dt=18.253611008*u.s,
                                        fch1=1289*u.MHz,
                                        ascending=True,
                                        data=window)
            frame.add_signal(stg.constant_path(
                                        f_start=frame.get_frequency(index=start),
                                       drift_rate=drift*u.Hz/u.s),
                                      stg.constant_t_profile(level=frame.get_intensity(snr=snr)),
                                      stg.gaussian_f_profile(width=width*u.Hz),
                                      stg.constant_bp_profile(level=1))
            all_data.append(frame.data)
            labels.append(c)
    all_data = np.array(all_data)
    labels = np.vstack(labels)
    return all_data, labels

In [3]:
import cv2
import numpy as np


In [7]:
from tqdm import tqdm
import gc
import keras
from AE import AE

In [9]:
def build_model(enocder, decoder):
    autoencoder = AE(encoder, decoder)
    autoencoder.compile(optimizer=keras.optimizers.Adam(lr=5e-4))
    return autoencoder

In [10]:
latent_dim = 10
time_samples = 16
freq_sample =  256
encoder_inputs = keras.Input(shape=(time_samples, freq_sample, 1))
x = layers.Conv2D(3, 3, activation="relu", strides=1, padding="same")(encoder_inputs)
x = layers.MaxPool2D(pool_size=(1, 2))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, activation="relu", strides=1, padding="same")(x)
x = layers.MaxPool2D(pool_size=(1, 2))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, activation="relu", strides=1, padding="same")(x)
x = layers.MaxPool2D(pool_size=(1, 2))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, activation="relu", strides=1, padding="same")(x)
x = layers.MaxPool2D(pool_size=(1, 2))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, activation="relu", strides=1, padding="same")(x)
x = layers.MaxPool2D(pool_size=(1, 2))(x)
x = layers.BatchNormalization()(x)
x_shape = x.shape
x = layers.Flatten()(x)
x = layers.Dense(256, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(64, activation="relu")(x)
x = layers.BatchNormalization()(x)
z = layers.Dense(latent_dim, name="latent", activation="linear")(x)
encoder = keras.Model(encoder_inputs, z, name="encoder")
encoder.summary()

Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 16, 256, 1)]      0         
                                                                 
 conv2d_5 (Conv2D)           (None, 16, 256, 3)        30        
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 16, 128, 3)       0         
 2D)                                                             
                                                                 
 batch_normalization_7 (Batc  (None, 16, 128, 3)       12        
 hNormalization)                                                 
                                                                 
 conv2d_6 (Conv2D)           (None, 16, 128, 64)       1792      
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 16, 64, 64)       0   

In [11]:
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(64, activation="relu")(latent_inputs)
x = layers.Dense(256, activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(x_shape[1]* x_shape[2]* x_shape[3], activation="relu")(x)
x = layers.BatchNormalization()(x)
x = layers.Reshape((x_shape[1], x_shape[2], x_shape[3]))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.MaxPool2D(pool_size=(2, 1))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.MaxPool2D(pool_size=(2, 1))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.MaxPool2D(pool_size=(2, 1))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.MaxPool2D(pool_size=(2, 1))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2DTranspose(3, 3, activation="relu", strides=2, padding="same")(x)
x = layers.MaxPool2D(pool_size=(2, 1))(x)
x = layers.BatchNormalization()(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="linear", padding="same")(x)
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 10)]              0         
                                                                 
 dense_4 (Dense)             (None, 64)                704       
                                                                 
 dense_5 (Dense)             (None, 256)               16640     
                                                                 
 batch_normalization_14 (Bat  (None, 256)              1024      
 chNormalization)                                                
                                                                 
 dense_6 (Dense)             (None, 8192)              2105344   
                                                                 
 batch_normalization_15 (Bat  (None, 8192)             32768     
 chNormalization)                                          

In [12]:
autoencoder = AE(encoder, decoder)
autoencoder.compile(optimizer=keras.optimizers.Adam(learning_rate = 1e-3))
autoencoder.load_weights("../autoencoder/models/full-weights-"+'07-02-2023-11-08-47')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f6352093fa0>

In [13]:
def normalize(data):
    epsilon = 1
    min_val = data.min()
    data = data - min_val + epsilon
    new_data = np.log(data)
    min_val = data.min()
    max_val = data.max()
    final_data = (data - min_val) / (max_val - min_val)
    return final_data
    
def normalize_data(data):
    for i in tqdm(range(data.shape[0])):
        data[i,:,:] = normalize(data[i,:,:])
    return data

In [15]:
import os
from tqdm import tqdm
total_scores = []
for i in tqdm(range(10)):
    directory = os.fsencode( "../../../../../datax/scratch/pma/reverse_search/test/")
    count = 0
    data = []
    for folder in os.listdir(directory):
        print(folder)
        for subfolder in os.listdir(directory+folder):
            back = os.fsencode( "/")
            if '.' not in str(subfolder):
                for file in os.listdir(directory+folder+back+subfolder):
                    file_directory = str(os.path.join(directory+folder+back+subfolder, file)).replace('b', '').replace("'","")
                    if 'filtered.npy' in file_directory:
                        data.append(np.load(str(file_directory)))
                        count += 1
    data = np.vstack(data)
    print(data.shape)
    injected, labels = painting(data)
    
    print(injected.shape)
    
    input_data = np.expand_dims(normalize_data(injected), axis = -1)
    del data
    gc.collect()
    input_data = normalize_data(input_data)
    print(input_data[0,:,:].max(), input_data[0,:,:].min())
    print(input_data.shape)
    features = []
    # for i in range(1,101):
    tensor = tf.convert_to_tensor(input_data, dtype=tf.float32)
    X = autoencoder.encoder.predict(tensor, batch_size= 1024)
    # del input_data
    gc.collect()
    # features = np.vstack(features)
    score = silhouette_score(X, labels = labels[:, 0])
    print("SCORE IS: ", score)
    total_scores.append(score)

  0%|                                                                                                               | 0/10 [00:00<?, ?it/s]

b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                                                                           | 0/100000 [00:00<?, ?it/s][A
  1%|█                                                                                            | 1087/100000 [00:00<00:09, 10861.28it/s][A
  2%|██                                                                                           | 2174/100000 [00:00<00:09, 10827.58it/s][A
  3%|███                                                                                          | 3258/100000 [00:00<00:08, 10829.69it/s][A
  4%|████                                                                                         | 4341/100000 [00:00<00:08, 10819.03it/s][A
  5%|█████                                                                                        | 5424/100000 [00:00<00:08, 10821.65it/s][A
  7%|██████                                                                                       | 6507/100000 [00:00<00:08, 10822.15it/s][

1.0 0.0
(100000, 16, 256, 1)


2023-07-02 11:48:00.687882: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401


11/98 [==>...........................] - ETA: 1s

2023-07-02 11:48:02.756555: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.




 10%|██████████▏                                                                                           | 1/10 [04:28<40:18, 268.72s/it]

SCORE IS:  -0.21477889
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                                                                           | 0/100000 [00:00<?, ?it/s][A
  1%|█                                                                                            | 1082/100000 [00:00<00:09, 10809.88it/s][A
  2%|██                                                                                           | 2163/100000 [00:00<00:09, 10785.18it/s][A
  3%|███                                                                                          | 3242/100000 [00:00<00:08, 10773.01it/s][A
  4%|████                                                                                         | 4320/100000 [00:00<00:08, 10765.86it/s][A
  5%|█████                                                                                        | 5398/100000 [00:00<00:08, 10768.58it/s][A
  6%|██████                                                                                       | 6475/100000 [00:00<00:08, 10761.17it/s][

1.0 0.0
(100000, 16, 256, 1)


 20%|████████████████████▍                                                                                 | 2/10 [08:48<35:09, 263.73s/it]

SCORE IS:  -0.22718823
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                                                                           | 0/100000 [00:00<?, ?it/s][A
  1%|▉                                                                                            | 1025/100000 [00:00<00:09, 10247.08it/s][A
  2%|█▉                                                                                           | 2089/100000 [00:00<00:09, 10476.03it/s][A
  3%|██▉                                                                                          | 3153/100000 [00:00<00:09, 10546.90it/s][A
  4%|███▉                                                                                         | 4217/100000 [00:00<00:09, 10583.20it/s][A
  5%|████▉                                                                                        | 5283/100000 [00:00<00:08, 10607.76it/s][A
  6%|█████▉                                                                                       | 6348/100000 [00:00<00:08, 10620.12it/s][

1.0 0.0
(100000, 16, 256, 1)


 30%|██████████████████████████████▌                                                                       | 3/10 [13:01<30:10, 258.67s/it]

SCORE IS:  -0.23615086
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                                                                           | 0/100000 [00:00<?, ?it/s][A
  1%|█                                                                                            | 1082/100000 [00:00<00:09, 10811.79it/s][A
  2%|██                                                                                           | 2164/100000 [00:00<00:09, 10714.82it/s][A
  3%|███                                                                                          | 3236/100000 [00:00<00:09, 10686.35it/s][A
  4%|████                                                                                         | 4305/100000 [00:00<00:08, 10682.62it/s][A
  5%|████▉                                                                                        | 5374/100000 [00:00<00:08, 10684.22it/s][A
  6%|█████▉                                                                                       | 6443/100000 [00:00<00:08, 10676.95it/s][

1.0 0.0
(100000, 16, 256, 1)


 40%|████████████████████████████████████████▊                                                             | 4/10 [17:13<25:36, 256.09s/it]

SCORE IS:  -0.2307284
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                                                                           | 0/100000 [00:00<?, ?it/s][A
  1%|█                                                                                            | 1077/100000 [00:00<00:09, 10764.77it/s][A
  2%|██                                                                                           | 2154/100000 [00:00<00:09, 10735.23it/s][A
  3%|███                                                                                          | 3228/100000 [00:00<00:09, 10731.60it/s][A
  4%|████                                                                                         | 4302/100000 [00:00<00:08, 10723.04it/s][A
  5%|████▉                                                                                        | 5376/100000 [00:00<00:08, 10728.94it/s][A
  6%|█████▉                                                                                       | 6449/100000 [00:00<00:08, 10727.13it/s][

1.0 0.0
(100000, 16, 256, 1)


 50%|███████████████████████████████████████████████████                                                   | 5/10 [21:30<21:20, 256.19s/it]

SCORE IS:  -0.21825846
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                                                                           | 0/100000 [00:00<?, ?it/s][A
  1%|▉                                                                                            | 1030/100000 [00:00<00:09, 10295.03it/s][A
  2%|█▉                                                                                           | 2098/100000 [00:00<00:09, 10517.02it/s][A
  3%|██▉                                                                                          | 3166/100000 [00:00<00:09, 10587.06it/s][A
  4%|███▉                                                                                         | 4225/100000 [00:00<00:09, 10507.95it/s][A
  5%|████▉                                                                                        | 5293/100000 [00:00<00:08, 10568.28it/s][A
  6%|█████▉                                                                                       | 6356/100000 [00:00<00:08, 10588.65it/s][

1.0 0.0
(100000, 16, 256, 1)


 60%|█████████████████████████████████████████████████████████████▏                                        | 6/10 [25:48<17:08, 257.01s/it]

SCORE IS:  -0.21883951
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                                                                           | 0/100000 [00:00<?, ?it/s][A
  1%|▉                                                                                            | 1032/100000 [00:00<00:09, 10315.75it/s][A
  2%|█▉                                                                                           | 2097/100000 [00:00<00:09, 10510.87it/s][A
  3%|██▉                                                                                          | 3165/100000 [00:00<00:09, 10587.31it/s][A
  4%|███▉                                                                                         | 4224/100000 [00:00<00:09, 10528.10it/s][A
  5%|████▉                                                                                        | 5294/100000 [00:00<00:08, 10589.08it/s][A
  6%|█████▉                                                                                       | 6364/100000 [00:00<00:08, 10624.31it/s][

1.0 0.0
(100000, 16, 256, 1)


 70%|███████████████████████████████████████████████████████████████████████▍                              | 7/10 [30:04<12:49, 256.60s/it]

SCORE IS:  -0.22354512
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                                                                           | 0/100000 [00:00<?, ?it/s][A
  1%|▉                                                                                              | 965/100000 [00:00<00:10, 9645.55it/s][A
  2%|█▉                                                                                           | 2018/100000 [00:00<00:09, 10161.56it/s][A
  3%|██▊                                                                                          | 3080/100000 [00:00<00:09, 10368.87it/s][A
  4%|███▊                                                                                         | 4142/100000 [00:00<00:09, 10466.04it/s][A
  5%|████▊                                                                                        | 5206/100000 [00:00<00:09, 10526.43it/s][A
  6%|█████▊                                                                                       | 6267/100000 [00:00<00:08, 10554.16it/s][

1.0 0.0
(100000, 16, 256, 1)


 80%|█████████████████████████████████████████████████████████████████████████████████▌                    | 8/10 [34:27<08:37, 258.79s/it]

SCORE IS:  -0.2204409
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                                                                           | 0/100000 [00:00<?, ?it/s][A
  1%|█                                                                                            | 1084/100000 [00:00<00:09, 10836.03it/s][A
  2%|██                                                                                           | 2168/100000 [00:00<00:09, 10781.21it/s][A
  3%|███                                                                                          | 3247/100000 [00:00<00:08, 10768.51it/s][A
  4%|████                                                                                         | 4324/100000 [00:00<00:08, 10768.96it/s][A
  5%|█████                                                                                        | 5402/100000 [00:00<00:08, 10769.32it/s][A
  6%|██████                                                                                       | 6479/100000 [00:00<00:08, 10758.95it/s][

1.0 0.0
(100000, 16, 256, 1)


 90%|███████████████████████████████████████████████████████████████████████████████████████████▊          | 9/10 [38:48<04:19, 259.31s/it]

SCORE IS:  -0.22711425
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)



  0%|                                                                                                           | 0/100000 [00:00<?, ?it/s][A
  1%|█                                                                                            | 1079/100000 [00:00<00:09, 10789.01it/s][A
  2%|██                                                                                           | 2158/100000 [00:00<00:09, 10696.89it/s][A
  3%|███                                                                                          | 3228/100000 [00:00<00:09, 10674.02it/s][A
  4%|███▉                                                                                         | 4296/100000 [00:00<00:08, 10666.04it/s][A
  5%|████▉                                                                                        | 5363/100000 [00:00<00:08, 10659.69it/s][A
  6%|█████▉                                                                                       | 6429/100000 [00:00<00:08, 10656.08it/s][

1.0 0.0
(100000, 16, 256, 1)


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [43:07<00:00, 258.73s/it]

SCORE IS:  -0.21977752





In [16]:
print(np.mean(total_scores))
print(np.std(total_scores))

-0.22368221
0.0062217093
