In [1]:
import numpy as np
import setigen as stg
from blimpy import Waterfall
import matplotlib.pyplot as plt
import random
import os
from astropy import units as u
from tqdm import tqdm
from sklearn.metrics import silhouette_score
import tensorflow as tf

os.environ["CUDA_VISIBLE_DEVICES"]="0"
num_classes = 100
num_samples_per_class = 1000


2023-09-07 11:11:03.993602: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def painting(data):
    all_data = []
    labels = []
    for c in range(num_classes):
        drift = 2*random.random()*(-1)**random.randint(0,2)
        snr = random.randint(100, 150)
        width = random.randint(20, 50)
        for s in range(num_samples_per_class):
            index = random.randint(0, data.shape[0]-1)
            window = data[index, :,:]
            
            start = random.randint(50, 180)
            
            frame = stg.Frame.from_data(df=2.7939677238464355*u.Hz,
                                        dt=18.253611008*u.s,
                                        fch1=1289*u.MHz,
                                        ascending=True,
                                        data=window)
            frame.add_signal(stg.constant_path(
                                        f_start=frame.get_frequency(index=start),
                                       drift_rate=drift*u.Hz/u.s),
                                      stg.constant_t_profile(level=frame.get_intensity(snr=snr)),
                                      stg.gaussian_f_profile(width=width*u.Hz),
                                      stg.constant_bp_profile(level=1))
            all_data.append(frame.data)
            labels.append(c)
    all_data = np.array(all_data)
    labels = np.vstack(labels)
    return all_data, labels

In [3]:
import cv2
import numpy as np

def transform(data):
    final = []
    for i in range(data.shape[0]):
        img = np.repeat(data[i,:, :], 3, axis=2)
        res = cv2.resize(img, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
        final.append(res)
    return np.array(final)


In [4]:
from tqdm import tqdm
import gc

def normalize(data):
    epsilon = 1
    min_val = data.min()
    data = data - min_val + epsilon
    new_data = np.log(data)
    min_val = data.min()
    max_val = data.max()
    final_data = (data - min_val) / (max_val - min_val)
    return final_data
    
def normalize_data(data):
    for i in range(data.shape[0]):
        data[i,:,:] = normalize(data[i,:,:, :])
    return data

from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras import Model

model = ResNet50(include_top=True,
                weights='imagenet',
                input_tensor=None,
                input_shape=None,
                pooling=None,
                classes=1000
                )
truncated_model = Model(inputs = model.layers[0].input, outputs = model.layers[-1].output)
truncated_model.summary()
truncated_model.compile(optimizer='rmsprop')



2023-09-07 11:11:23.723318: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-09-07 11:11:24.110161: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14233 MB memory:  -> device: 0, name: NVIDIA RTX A4000, pci bus id: 0000:01:00.0, compute capability: 8.6


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                              

In [5]:
def cosine(mat, vec):
    return 1- (mat @ vec)/(np.linalg.norm(vec) * np.linalg.norm(mat,  axis = 1) )

In [6]:
# # from scipy.spatial.distance import cosine
def measure_cluster_size(x, labels):
    get_labels = list(set(labels))
    mean_cluster_spread = []
    for l in get_labels:
        index = np.asarray(labels==l)
        x_features = x[index, :]
        mean_centroid = np.mean(x_features, axis = 0)
        diff = x_features - mean_centroid
        norms = np.linalg.norm(diff, axis = 1)
        mean_cluster_spread.append(np.max(norms)/np.mean(norms))
        # print(x_features.shape, mean_centroid.shape, norms.shape)
    return np.mean(mean_cluster_spread), np.std(mean_cluster_spread)

# def measure_cluster_size(x, labels):
#     get_labels = list(set(labels))
#     mean_cluster_spread = []
#     for l in get_labels:
#         index = np.asarray(labels==l)
#         x_features = x[index, :]
#         mean_centroid = np.mean(x_features, axis = 0)
#         # diff = []
#         # for k in range(x_features.shape[0]):
#         diff = cosine(x_features[:, :], mean_centroid)
#         # norms = np.linalg.norm(diff, axis = 1)
#         mean_cluster_spread.append(diff)
#         print(x_features.shape, mean_centroid.shape, norms.shape)
#     return np.mean(mean_cluster_spread), np.std(mean_cluster_spread)

In [7]:
import os
from tqdm import tqdm
total_scores = []
total_spread = []
for i in tqdm(range(10)):
    directory = os.fsencode( "../../../../../datax/scratch/pma/reverse_search/test/")
    count = 0
    data = []
    for folder in os.listdir(directory):
        print(folder)
        for subfolder in os.listdir(directory+folder):
            back = os.fsencode( "/")
            if '.' not in str(subfolder):
                for file in os.listdir(directory+folder+back+subfolder):
                    file_directory = str(os.path.join(directory+folder+back+subfolder, file)).replace('b', '').replace("'","")
                    if 'filtered.npy' in file_directory:
                        data.append(np.load(str(file_directory)))
                        count += 1
    data = np.vstack(data)
    print(data.shape)
#     idx = np.random.randint(data.shape[0], size=(100_000))
    injected, labels = painting(data)
    
    print(injected.shape)
    
    input_data = transform(np.expand_dims(injected, axis = -1))
    del data
    gc.collect()
    input_data = normalize_data(input_data)
    print(input_data[0,:,:].max(), input_data[0,:,:].min())
    print(input_data.shape)
    features = []
    for i in range(1,101):
        tensor = tf.convert_to_tensor(input_data[1_000*(i-1):1_000*i, :, :, :], dtype=tf.float32)
        features.append( model(tensor))
    del input_data
    gc.collect()
    features = np.vstack(features)
    print('features: ',features.shape)
    score, spread = measure_cluster_size(x = features, labels = labels[:, 0])
    print("SCORE IS: ", score, spread)
    total_scores.append(score)
    total_spread.append(spread)

  0%|                                                    | 0/10 [00:00<?, ?it/s]

b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)
1.0 0.0
(100000, 224, 224, 3)


2023-09-07 11:18:52.890787: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401
2023-09-07 11:18:55.138765: W tensorflow/tsl/framework/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.79GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-09-07 11:18:55.138870: W tensorflow/tsl/framework/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 6.88GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-09-07 11:18:55.263301: W tensorflow/tsl/framework/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.79GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if m

features:  (100000, 1000)
SCORE IS:  3.428482 1.1110188
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)
1.0 0.0
(100000, 224, 224, 3)


 20%|████████▏                                | 2/10 [19:32<1:17:46, 583.31s/it]

features:  (100000, 1000)
SCORE IS:  3.612511 1.432756
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)
1.0 0.0
(100000, 224, 224, 3)


 30%|████████████▎                            | 3/10 [28:59<1:07:10, 575.72s/it]

features:  (100000, 1000)
SCORE IS:  3.869151 1.6308392
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)
1.0 0.0
(100000, 224, 224, 3)


 40%|█████████████████▏                         | 4/10 [38:24<57:10, 571.76s/it]

features:  (100000, 1000)
SCORE IS:  3.624501 1.3080271
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)
1.0 0.0
(100000, 224, 224, 3)


 50%|█████████████████████▌                     | 5/10 [47:53<47:33, 570.74s/it]

features:  (100000, 1000)
SCORE IS:  3.4937394 1.2103189
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)
1.0 0.0
(100000, 224, 224, 3)


 60%|█████████████████████████▊                 | 6/10 [57:21<37:58, 569.59s/it]

features:  (100000, 1000)
SCORE IS:  3.6793811 1.4936986
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)
1.0 0.0
(100000, 224, 224, 3)


 70%|████████████████████████████▋            | 7/10 [1:06:47<28:26, 568.68s/it]

features:  (100000, 1000)
SCORE IS:  3.4929523 1.1632562
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)
1.0 0.0
(100000, 224, 224, 3)


 80%|████████████████████████████████▊        | 8/10 [1:16:12<18:54, 567.25s/it]

features:  (100000, 1000)
SCORE IS:  3.6412823 1.408468
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)
1.0 0.0
(100000, 224, 224, 3)


 90%|████████████████████████████████████▉    | 9/10 [1:25:38<09:27, 567.01s/it]

features:  (100000, 1000)
SCORE IS:  3.4756484 1.2780347
b'HIP104887-1850'
b'HIP87579-1008'
b'clustering_tests'
(347064, 16, 256)
(100000, 16, 256)
1.0 0.0
(100000, 224, 224, 3)


100%|████████████████████████████████████████| 10/10 [1:35:05<00:00, 570.50s/it]

features:  (100000, 1000)
SCORE IS:  3.6239514 1.2359366





In [8]:
from scipy.spatial import distance
vec1 = np.zeros(shape=(100, 10))
vec2 = np.zeros(shape=(10))
distance.cosine(vec1, vec2)

ValueError: Input vector should be 1-D.

In [9]:
print(np.mean(total_scores))
print(np.std(total_scores))

3.5941596
0.12209407


In [10]:
print(np.mean(total_spread))
print(np.std(total_spread))

1.3272355
0.15363984
