# Self-supervised learning of meteor data

### Imports

In [1]:
import os
import pandas as pd
import cv2
import ipywidgets as widgets
from IPython.display import display


### Read in files

In [2]:
# Set csv path of cam data
base_path = '/mnt/disk1/KILabDaten/Geminiden2021'

csv1_path = base_path + '/kam1.csv'
csv2_path = base_path + '/kam2.csv'

# Set path of cam data
path_c1 = base_path + "/Kamera1"
path_c2 = base_path + "/Kamera2"
fileend = '.mov'

# Set path of meteor frames
meteor_frames_path_c1 = path_c1 + "/MeteorFrames/"
meteor_frames_path_c2 = path_c2 + "/MeteorFrames/"


### Set Cam (1 or 2)

In [4]:
# Dropdown widget for cam1 and cam2
w = widgets.Dropdown(
    options=['1', '2'],
    value='2',
    description='Cam Number:',
    disabled=False,
    )
w

Dropdown(description='Cam Number:', index=1, options=('1', '2'), value='2')

In [5]:
camdict = {'1':[csv1_path, path_c1, meteor_frames_path_c1], '2':[csv2_path, path_c2, meteor_frames_path_c2]}
# Set paths for selected cam

camnum = w.value
file_path = camdict[camnum][1] #path_c1
csv_path = camdict[camnum][0] #csv1_path
frame_path = camdict[camnum][2] #outpath_c1


# Print how many files are in a subfolder of a path
path, dirs, files = os.walk(frame_path).__next__()

print("Cam " + camnum + " selected")
print("Folders with wrong number of frames: ")
for d in dirs:
    p, _, f = os.walk(path + '/' +  d).__next__()
    if len(f)!= 251:
        print(p, len(f))


Cam 2 selected
Folders with wrong number of frames: 
/mnt/disk1/KILabDaten/Geminiden2021/Kamera2/MeteorFrames//63 209
/mnt/disk1/KILabDaten/Geminiden2021/Kamera2/MeteorFrames//526 125


In [6]:
def split_extension_list(string_list):
    # Split the extension from the file name
    return sorted([int(os.path.splitext(i)[0]) for i in string_list])


def sorted_png_list(string_list):
    # Split the extension from the file name and sort the list
    lsort = split_extension_list(string_list)
    return [str(i) + '.png' for i in lsort]

def sorted_string_list(string_list):
    # input is a list of strings with numbers
    # output is a list of strings with numbers sorted
    lsort = sorted([int(i) for i in string_list])
    return [str(i) for i in lsort]

In [7]:
# Print how many files are in a subfolder of a path
path, dirs, _ = os.walk(frame_path).__next__()

dirs = sorted_string_list(dirs)


img_d = {}

for d in dirs:
    # Get the list of files in the folder
    _, _, f = os.walk(path + d).__next__()
    lsort = sorted([int(os.path.splitext(f1)[0]) for f1 in f])
    lsort = [str(i) + '.png' for i in lsort]
    img_d[d] = lsort


### Load images and process them

# Data Generator for reading in files

In [3]:
class CustomDataGen(tf.keras.utils.Sequence):
    
    def __init__(self, df, X_col, y_col,
                 batch_size,
                 input_size=(224, 224, 3),
                 shuffle=True):
        
        self.df = df.copy()
        self.X_col = X_col
        self.y_col = y_col
        self.batch_size = batch_size
        self.input_size = input_size
        self.shuffle = shuffle
        
        self.n = len(self.df)
        self.n_name = df[y_col['name']].nunique()
        self.n_type = df[y_col['type']].nunique()
    
    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)
    
    def __get_input(self, path, bbox, target_size):
    
        xmin, ymin, w, h = bbox['x'], bbox['y'], bbox['width'], bbox['height']

        image = tf.keras.preprocessing.image.load_img(path)
        image_arr = tf.keras.preprocessing.image.img_to_array(image)

        image_arr = image_arr[ymin:ymin+h, xmin:xmin+w]
        image_arr = tf.image.resize(image_arr,(target_size[0], target_size[1])).numpy()

        return image_arr/255.
    
    def __get_output(self, label, num_classes):
        return tf.keras.utils.to_categorical(label, num_classes=num_classes)
    
    def __get_data(self, batches):
        # Generates data containing batch_size samples

        path_batch = batches[self.X_col['path']]
        bbox_batch = batches[self.X_col['bbox']]
        
        name_batch = batches[self.y_col['name']]
        type_batch = batches[self.y_col['type']]

        X_batch = np.asarray([self.__get_input(x, y, self.input_size) for x, y in zip(path_batch, bbox_batch)])

        y0_batch = np.asarray([self.__get_output(y, self.n_name) for y in name_batch])
        y1_batch = np.asarray([self.__get_output(y, self.n_type) for y in type_batch])

        return X_batch, tuple([y0_batch, y1_batch])
    
    def __getitem__(self, index):
        
        batches = self.df[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__get_data(batches)        
        return X, y
    
    def __len__(self):
        return self.n // self.batch_size

In [None]:
traingen = CustomDataGen(train_df,
                         X_col={'path':'filename', 'bbox': 'region_shape_attributes'},
                         y_col={'name': 'name', 'type': 'type'},
                         batch_size=batch_size, input_size=target_size)

valgen = CustomDataGen(val_df,
                       X_col={'path':'filename', 'bbox': 'region_shape_attributes'},
                       y_col={'name': 'name', 'type': 'type'},
                       batch_size=batch_size, input_size=target_size)

model.fit(traingen,
          validation_data=valgen,
          epochs=num_epochs)