In [1]:
#using scipy.io to load mat files
import scipy.io as scio
#https://stackoverflow.com/questions/874461/read-mat-files-in-python
import os
import numpy as np

In [2]:
curr_direc=os.path.dirname(os.getcwd())
parent_direc=os.path.join(curr_direc,'Dataset')

In [3]:
#store the groundtruth files corresponsing to each dataset in dict format
NAMES_GT_MAPPING={
    'avenue':os.path.join(parent_direc,'Avenue Dataset','avenue.mat'),
    'ped2':os.path.join(parent_direc,'ped2','ped2.mat'),
    'shangai':os.path.join(parent_direc,'ShangaiTech_testing','test_frame_mask')
}

In [4]:
#store the frame locations for each dataset
NAMES_FRAMES_MAPPING={
    'avenue':os.path.join(parent_direc,'Avenue Dataset','testing','frames'),
    'ped2':os.path.join(parent_direc,'ped2','testing','frames'),
    'shangai':os.path.join(parent_direc,'ShangaiTech_testing','testing','frames')
}

In [5]:
def get_video_length(dataset_frames,vid_list,vid_num):
    #the location where frames for the video are located
    vid_name=os.path.join(dataset_frames,vid_list[vid_num])
    #extracting the location
    length=len(os.listdir(vid_name))
    return length

In [6]:
def gen_dataset_gt(dataset):
    #getting the location of the groundtruth mat files
    gt_loc=NAMES_GT_MAPPING[dataset]
    #the 'gt' attribute of the dictionary returned contains the start and end frame of the anomaly in all 21 video 
    abnormal_events=scio.loadmat(gt_loc,squeeze_me=True)['gt']
    #print(abnormal_events)
    #First dimension of the stored array is the number of videos in the dataset
    num_video=abnormal_events.shape[0]
    #extract the frames location
    dataset_frames=NAMES_FRAMES_MAPPING[dataset]
    #preparing a list for all the folders and sorting by name
    video_list=os.listdir(dataset_frames)
    video_list.sort()
    #appending the gt list
    for i in range(num_video):
        #total length of the video
        length=get_video_length(dataset_frames,video_list,i)
        #initialization for video gt storage
        indiv_vid_gt=np.zeros((length,), dtype=np.int8)
        #abnormal events for that video
        indiv_ab_eve=abnormal_events[i]
        #reshape
        if indiv_ab_eve.ndim == 1:
            indiv_ab_eve = indiv_ab_eve.reshape((indiv_ab_eve.shape[0], -1))
        #get the number of abnormal events in the video
        _,num_abnormal = indiv_ab_eve.shape
        #for each batch of abnormal instance in video, mark it as 1 for those frames
        for j in range(num_abnormal):
            # (start - 1, end - 1)
            start = indiv_ab_eve[0, j] - 1
            #print(start)
            end = indiv_ab_eve[1, j]
            #print(end)
            indiv_vid_gt[start: end] = 1
        gt.append(indiv_vid_gt)

In [7]:
def shangai_dataset_gt():
    direc=NAMES_GT_MAPPING['shangai']
    for npy in np.sort(os.listdir(direc)):
        #np.load-> extract the numpy array stored in .npy format
        gt.append(np.load(os.path.join(direc,npy)))
        #for more details->https://www.geeksforgeeks.org/numpy-load-in-python/

In [8]:
#store groundtruth labels
gt=[]
gen_dataset_gt('avenue')
gen_dataset_gt('ped2')
shangai_dataset_gt()
print(gt)

[array([0, 0, 0, ..., 0, 0, 0], dtype=int8), array([0, 0, 0, ..., 0, 0, 0], dtype=int8), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [20]:
print(len(gt))

140


In [17]:
#https://www.geeksforgeeks.org/numpy-save/
np.save('labels.npy',gt,allow_pickle=True, fix_imports=True)

In [18]:
result_gt=np.load('labels.npy',mmap_mode=None, allow_pickle=True, fix_imports=True)
print(result_gt.shape)
print(result_gt)

(140,)
[array([0, 0, 0, ..., 0, 0, 0], dtype=int8)
 array([0, 0, 0, ..., 0, 0, 0], dtype=int8)
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 