In [1]:
import os
import sys
import time
import galsim
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from matplotlib import patches
from PIL import Image 

from skimage import io, img_as_uint

In [2]:
# Useful when viewing fullsize images through the VNC
save_dir = "/g/g15/ahlquist/Documents/workspace/output_dir"

In [3]:
# Directory where data is found
data_dir = "/usr/workspace/DSSI_ch/data_and_information"

In [4]:
# Shared folder
team_6_dir = "/usr/workspace/DSSI_ch/team6"
npz_index = 0 

## load data

In [5]:
npz_data = np.load(f"{data_dir}/asteroid_equalized_images_{npz_index}.npz")
starAsteroid = npz_data['data']

In [6]:
y = np.load(f"{data_dir}/asteroid_injected_information_{npz_index}.npz", allow_pickle=True)
X = starAsteroid

In [None]:
#Information of the first sample image from the Zwicky Transient Facility
y['data'][0]

In [None]:
#Information of the magnitude, length, angle and box of the injected asteroid streaks
print(y['data'])
y['data'][1]

In [None]:
box_values_ast_0 = y['data'][1]['box'][1]
plt.imshow(X[0][box_values_ast_0.ymin:box_values_ast_0.ymax,box_values_ast_0.xmin:box_values_ast_0.xmax])

In [None]:
def displayAsteroid(asteroid, image):
    box_values = asteroid
    plt.imshow(image[box_values.ymin+40:box_values.ymax-40,box_values.xmin+40:box_values.xmax-40])
    
displayAsteroid(y['data'][5]['box'][1], X[2])

## Generate images from npz file

In [None]:
""" Generate .jpg files from npz
(Don't run, unless you want to overwrite files)
"""

for i in tqdm(range(len(X))):
    io.imsave(f"{team_6_dir}/data/astroids_{npz_index}_{i}.jpg", img_as_uint(X[i]))

In [7]:
""" Generate .csv files from npz
(Don't run, unless you want to overwrite files)
"""

df = pd.DataFrame(columns=["xmin", "xmax", "ymin", "ymax"])

for i in tqdm(range(1, len(y['data']), 2)):
    
    # Get wanted filename index
    index = int(i/2)
    
    # Add all boxes to dataframe
    for _,row in y['data'][i]['box'].iteritems():
        df = df.append({"xmin": row.xmin, "xmax": row.xmax, 
                   "ymin": row.ymin, "ymax": row.ymax}, ignore_index=True)
        
    # Save dataframe to file
    df.to_csv(f"{team_6_dir}/data/astroids_{npz_index}_{index}.csv", index=False)
    
    # Clear dataframe
    df = df.iloc[0:0]

100%|██████████| 100/100 [00:09<00:00, 10.21it/s]


In [8]:
""" For each image make a set of box csv where astroids do NOT exist
"""
df = pd.DataFrame(columns=["xmin", "xmax", "ymin", "ymax"])
for i in tqdm(range(1, len(y['data']), 2)):
    
    # Get wanted filename index
    index = int(i/2)
    
    ast_df = pd.read_csv(f"{team_6_dir}/data/astroids_{npz_index}_{index}.csv")

    image = X[index]
    height = len(image)
    width = len(image[0])

    rng_x = np.random.default_rng()
    rng_y = np.random.default_rng()

    num_points = 64
    
    # Generate random center points:
    ixs = rng_x.integers(51, width-51, dtype=np.int32, size=num_points + 20, endpoint=False)
    iys = rng_y.integers(51, height-51, dtype=np.int32, size=num_points + 20, endpoint=False)
    
    ixs_min = ixs - 51
    ixs_max = ixs + 51
    iys_min = iys - 51
    iys_max = iys + 51

    # duplicate rows to make a 2D array of size ix, 
    # such that operations between the two can be done.
    
    ast_xmin = np.tile(ast_df["xmin"], (len(ixs), 1)).T
    ast_xmax = np.tile(ast_df["xmax"], (len(ixs), 1)).T
    ast_ymin = np.tile(ast_df["ymin"], (len(iys), 1)).T
    ast_ymax = np.tile(ast_df["ymax"], (len(iys), 1)).T

    mask = (ast_xmin <= ixs_max) & (ast_xmax >= ixs_min) \
         & (ast_ymin <= iys_max) & (ast_ymax >= iys_min)

    # Get all indexs where any overlap exists  
    conflict_mask = mask.any(axis=0)
    valid_mask = ~conflict_mask 

    df["xmin"] = ixs_min[valid_mask]
    df["xmax"] = ixs_max[valid_mask]
    df["ymin"] = iys_min[valid_mask]
    df["ymax"] = iys_max[valid_mask]
    
    df = df.head(num_points)

    # Save dataframe to file
    df.to_csv(f"{team_6_dir}/data/not_astroids_{npz_index}_{index}.csv", index=False)

    # Clear dataframe
    df = df.iloc[0:0]
    

100%|██████████| 100/100 [00:00<00:00, 139.48it/s]


In [9]:
""" Check asteroid boxes to see if they are in the black spaces (remove if they are)
"""

def isValidBoundingBox(b):

    cutoff = 130
    
    sliced = image[b["ymin"]:b["ymax"], b["xmin"]:b["xmax"]]

    mean = np.mean(sliced)
    sliced = Image.fromarray(sliced, 'L')
    sliced = sliced.resize((400, 400))

    #print(mean)
    #if(mean < cutoff):
        #sliced = sliced.save(f"{save_dir}/geeks.jpg")
        #print(f"ymin: {b['ymin']}, ymax: {b['ymax']}, xmin: {b['xmin']}, xmax:{b['xmax']}")
        #display(sliced)
        #if(b['ymin'] == 1841 and b['ymax'] == 1849 and b['xmin'] == 2023 and b['xmax'] == 2031):
        #    sliced = sliced.save(f"{save_dir}/geeks.png")

    return mean > cutoff

In [10]:
df = pd.DataFrame()

for i in tqdm(range(1, len(y['data']), 2)):
    
    # Get wanted filename index
    index = int(i/2)
    
    boxes = pd.read_csv(f"{team_6_dir}/data/astroids_{npz_index}_{index}.csv")
    image = Image.open(f"{team_6_dir}/data/astroids_{npz_index}_{index}.jpg").convert("L")

    image = np.array(image)
    
    offset = 47
    
    df["xmin"] = boxes["xmin"] + offset
    df["ymin"] = boxes["ymin"] + offset
    df["xmax"] = boxes["xmax"] - offset
    df["ymax"] = boxes["ymax"] - offset
    
    mask = df.apply(lambda b : isValidBoundingBox(b), axis=1)
    df = boxes.loc[mask]
    
    # Save dataframe to file
    # Overwrite:
    df.to_csv(f"{team_6_dir}/data/astroids_{npz_index}_{index}.csv", index=False)
    # NewFile:
    #df.to_csv(f"{team_6_dir}/data/astroids_cleaned_{npz_index}_{index}.csv", index=False)
    
    # Clear dataframe
    df = df.iloc[0:0]

  2%|▏         | 2/100 [00:00<00:14,  6.95it/s]

ymin: 2962, ymax: 2971, xmin: 1186, xmax:1195
ymin: 2950, ymax: 2959, xmin: 2664, xmax:2673


  9%|▉         | 9/100 [00:01<00:11,  7.95it/s]

ymin: 1394, ymax: 1402, xmin: 2256, xmax:2264
ymin: 1935, ymax: 1943, xmin: 1525, xmax:1533


 11%|█         | 11/100 [00:01<00:10,  8.74it/s]

ymin: 2979, ymax: 2986, xmin: 1952, xmax:1959
ymin: 817, ymax: 824, xmin: 2504, xmax:2511


 14%|█▍        | 14/100 [00:01<00:11,  7.37it/s]

ymin: 2686, ymax: 2695, xmin: 1682, xmax:1691


 18%|█▊        | 18/100 [00:02<00:12,  6.62it/s]

ymin: 2942, ymax: 2949, xmin: 750, xmax:757
ymin: 2983, ymax: 2990, xmin: 1044, xmax:1051


 24%|██▍       | 24/100 [00:03<00:09,  7.96it/s]

ymin: 2951, ymax: 2959, xmin: 206, xmax:214


 29%|██▉       | 29/100 [00:03<00:09,  7.37it/s]

ymin: 2878, ymax: 2886, xmin: 1502, xmax:1510


 32%|███▏      | 32/100 [00:04<00:08,  8.39it/s]

ymin: 2993, ymax: 3001, xmin: 208, xmax:216


 33%|███▎      | 33/100 [00:04<00:08,  7.92it/s]

ymin: 2969, ymax: 2978, xmin: 2597, xmax:2606
ymin: 2945, ymax: 2952, xmin: 1798, xmax:1805
ymin: 2896, ymax: 2905, xmin: 2564, xmax:2573
ymin: 2983, ymax: 2992, xmin: 2789, xmax:2798
ymin: 2905, ymax: 2913, xmin: 152, xmax:160


 37%|███▋      | 37/100 [00:04<00:07,  8.41it/s]

ymin: 1230, ymax: 1238, xmin: 827, xmax:835
ymin: 2991, ymax: 2998, xmin: 170, xmax:177


 40%|████      | 40/100 [00:05<00:07,  7.80it/s]

ymin: 2939, ymax: 2946, xmin: 1896, xmax:1903


 42%|████▏     | 42/100 [00:05<00:07,  8.24it/s]

ymin: 835, ymax: 842, xmin: 261, xmax:268
ymin: 888, ymax: 896, xmin: 180, xmax:188


 44%|████▍     | 44/100 [00:05<00:06,  8.88it/s]

ymin: 2975, ymax: 2982, xmin: 961, xmax:968
ymin: 310, ymax: 318, xmin: 409, xmax:417
ymin: 257, ymax: 266, xmin: 225, xmax:234
ymin: 1386, ymax: 1393, xmin: 1942, xmax:1949
ymin: 2899, ymax: 2908, xmin: 315, xmax:324
ymin: 2914, ymax: 2922, xmin: 1910, xmax:1918
ymin: 2954, ymax: 2961, xmin: 687, xmax:694


 46%|████▌     | 46/100 [00:05<00:06,  8.30it/s]

ymin: 1866, ymax: 1874, xmin: 2682, xmax:2690
ymin: 2986, ymax: 2994, xmin: 2229, xmax:2237


 48%|████▊     | 48/100 [00:06<00:06,  7.73it/s]

ymin: 2988, ymax: 2997, xmin: 2937, xmax:2946


 51%|█████     | 51/100 [00:06<00:05,  9.37it/s]

ymin: 2974, ymax: 2982, xmin: 1254, xmax:1262
ymin: 2897, ymax: 2906, xmin: 2835, xmax:2844


 54%|█████▍    | 54/100 [00:06<00:05,  8.87it/s]

ymin: 2994, ymax: 3003, xmin: 1721, xmax:1730
ymin: 2990, ymax: 2998, xmin: 1212, xmax:1220
ymin: 1897, ymax: 1906, xmin: 302, xmax:311
ymin: 2885, ymax: 2894, xmin: 1053, xmax:1062
ymin: 1822, ymax: 1830, xmin: 2569, xmax:2577
ymin: 2895, ymax: 2902, xmin: 2307, xmax:2314


 58%|█████▊    | 58/100 [00:07<00:05,  7.72it/s]

ymin: 2929, ymax: 2938, xmin: 332, xmax:341
ymin: 2724, ymax: 2733, xmin: 1833, xmax:1842


 60%|██████    | 60/100 [00:07<00:04,  8.94it/s]

ymin: 2932, ymax: 2941, xmin: 226, xmax:235
ymin: 401, ymax: 408, xmin: 1244, xmax:1251


 61%|██████    | 61/100 [00:07<00:04,  8.87it/s]

ymin: 2952, ymax: 2960, xmin: 2650, xmax:2658
ymin: 2956, ymax: 2963, xmin: 1245, xmax:1252
ymin: 2939, ymax: 2946, xmin: 1652, xmax:1659


 65%|██████▌   | 65/100 [00:08<00:04,  7.66it/s]

ymin: 1868, ymax: 1876, xmin: 1675, xmax:1683


 67%|██████▋   | 67/100 [00:08<00:04,  7.51it/s]

ymin: 2907, ymax: 2916, xmin: 2771, xmax:2780
ymin: 1841, ymax: 1849, xmin: 2023, xmax:2031
ymin: 2136, ymax: 2145, xmin: 1195, xmax:1204


 70%|███████   | 70/100 [00:08<00:04,  7.04it/s]

ymin: 2986, ymax: 2993, xmin: 829, xmax:836


 72%|███████▏  | 72/100 [00:09<00:03,  7.24it/s]

ymin: 2981, ymax: 2988, xmin: 1958, xmax:1965
ymin: 2950, ymax: 2958, xmin: 2437, xmax:2445
ymin: 2922, ymax: 2929, xmin: 633, xmax:640


 75%|███████▌  | 75/100 [00:09<00:02,  8.67it/s]

ymin: 2950, ymax: 2958, xmin: 447, xmax:455


 79%|███████▉  | 79/100 [00:09<00:02,  8.66it/s]

ymin: 1783, ymax: 1791, xmin: 2224, xmax:2232


 80%|████████  | 80/100 [00:10<00:02,  8.39it/s]

ymin: 704, ymax: 712, xmin: 2625, xmax:2633
ymin: 604, ymax: 611, xmin: 1460, xmax:1467
ymin: 819, ymax: 827, xmin: 2826, xmax:2834
ymin: 665, ymax: 673, xmin: 2790, xmax:2798
ymin: 641, ymax: 649, xmin: 699, xmax:707
ymin: 1111, ymax: 1119, xmin: 1371, xmax:1379
ymin: 1183, ymax: 1192, xmin: 1968, xmax:1977


 84%|████████▍ | 84/100 [00:10<00:01, 10.33it/s]

ymin: 507, ymax: 515, xmin: 482, xmax:490
ymin: 2821, ymax: 2828, xmin: 2739, xmax:2746


 86%|████████▌ | 86/100 [00:10<00:01,  9.98it/s]

ymin: 305, ymax: 314, xmin: 1333, xmax:1342
ymin: 2988, ymax: 2995, xmin: 1425, xmax:1432
ymin: 2987, ymax: 2995, xmin: 2929, xmax:2937


 90%|█████████ | 90/100 [00:11<00:01,  8.77it/s]

ymin: 539, ymax: 547, xmin: 1966, xmax:1974
ymin: 175, ymax: 182, xmin: 2072, xmax:2079


 91%|█████████ | 91/100 [00:11<00:01,  8.25it/s]

ymin: 1122, ymax: 1129, xmin: 266, xmax:273
ymin: 2874, ymax: 2883, xmin: 2764, xmax:2773
ymin: 224, ymax: 232, xmin: 2763, xmax:2771
ymin: 618, ymax: 625, xmin: 408, xmax:415


 95%|█████████▌| 95/100 [00:11<00:00,  8.40it/s]

ymin: 2008, ymax: 2017, xmin: 687, xmax:696
ymin: 1216, ymax: 1223, xmin: 1548, xmax:1555


 98%|█████████▊| 98/100 [00:12<00:00,  7.37it/s]

ymin: 518, ymax: 527, xmin: 2954, xmax:2963
ymin: 2358, ymax: 2366, xmin: 598, xmax:606
ymin: 2279, ymax: 2287, xmin: 829, xmax:837


100%|██████████| 100/100 [00:12<00:00,  8.09it/s]


In [None]:
""" Generate tensorflow.csv containing all bounding boxes
filename,width,height,class,xmin,ymin,xmax,ymax
"""
# Generate tensorflow.csv containing all bounding boxes
#filename,width,height,class,xmin,ymin,xmax,ymax

df = pd.DataFrame(columns=["filename", "width", "height", "class", "xmin", "xmax", "ymin", "ymax"])

# Possibly add for-loop of other npz files
for i in range(1, len(y['data']), 2):
    
    # Get wanted filename index
    index = int((i+1)/2)
    
    img_file = f"{team_6_dir}/data/astroids_{npz_index}_{index}.jpg"
    
    if not os.path.exists(img_file):
        continue
    
    width, height = Image.open(img_file).size
    
    # Add all boxes in image to dataframe
    for _,row in y['data'][i]['box'].iteritems():
        df = df.append({
                "filename": img_file,
                "width": width, "height": height, 
                "class": "AST",
                "xmin": row.xmin, "xmax": row.xmax, 
                "ymin": row.ymin, "ymax": row.ymax}, ignore_index=True)
        
# Save dataframe to file
df.to_csv(f"{team_6_dir}/data/tensorflow.csv", index=False)

In [None]:
""" View some files with Ground Truth boxes labeled
"""
index = 0
i = index * 2 + 1
#for i in tqdm(range(1, len(y['data']), 2)):
    
# Get wanted filename index
index = int(i/2)
image = X[index]

fig = plt.figure(figsize=(40, 40))

#add axes to the image
ax = fig.add_axes([0,0,1,1])
plt.imshow(image, cmap="Greys_r")

ground_truth = y['data'][i]['box']
for box in ground_truth:  
    xmin = box.xmin
    ymin = box.ymin
    xmax = box.xmax
    ymax = box.ymax

    width = xmax - xmin
    height = ymax - ymin

    # assign different color to different classes of objects  
    edgecolor = 'lime'
    ax.annotate('Ast', xy=(xmax-40,ymin+20))

    # add bounding boxes to the image
    rect = patches.Rectangle((xmin,ymin), width, height, edgecolor = edgecolor, facecolor = 'none', lw=4)
    ax.add_patch(rect)


plt.savefig(f"{save_dir}/asteroid_ground_truths_{index}", format='png')