### Zindi Computer Vision Challenge
Functions and Classes for Data Preprocessing and Analysis

In [None]:
def obtain_hist_bins(in_array, in_bins):
    flatten_array = in_array.flatten()
    hist, bin_edges = np.histogram(flatten_array, in_bins)
    return bin_edges

In [None]:
def generate_2d_matrix(in_array, in_bins, num_bins):
    
    out_matrix = np.zeros((num_bins, 12))
    for i in range(12):
        out_matrix[:, i] = np.histogram(in_array[i].flatten(), in_bins)[0]
        
    # Scale based on the total number of observations in the bin x time matrix
    out_matrix = out_matrix/np.max([1.0, out_matrix.sum()])
    
    return out_matrix

In [None]:
def generate_3d_matrix(in_channel_set, in_idx_per_channel, in_channel_bins, in_sample):
    
    temp_array = np.load(in_sample)
    num_channels = len(in_channel_set)
    num_bins = in_channel_bins.shape[0] -1
     
    out_matrix = np.zeros((num_channels, num_bins, 12))
    
    for channel_idx in range(num_channels):
        actual_channel = in_channel_set[channel_idx]
        actual_idx = in_idx_per_channel[actual_channel]
        out_matrix[channel_idx, :, :] = generate_2d_matrix(temp_array[actual_idx], in_channel_bins[:, channel_idx]
                                                           , num_bins)
                
    return out_matrix

In [None]:
# Create a custom DataSet class
class ZindiDataset(torch.utils.data.Dataset):
    def __init__(self, x, y):
        super(ZindiDataset, self).__init__()
        assert x.shape[0] == y.shape[0] # assuming shape[0] = dataset size
        self.x = x
        self.y = y


    def __len__(self):
        return self.y.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]