In [None]:
import torch
import data_reading
import os
import cv2
import numpy as np
from scipy import signal
import pickle

In [None]:
image_width = 128
image_height = 96
labels_folder = '../data/labels'
imgs_folder = '../data/images'
video_folder = '../data/training_videos'

In [None]:
def read_video(file_name):
    cap = cv2.VideoCapture(file_name)
    frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    buf = np.empty((frameCount, 3, frameHeight, frameWidth), np.dtype('uint8'))
    fc = 0
    ret = True
    SAMPLING_FREQUENCY = cap.get(cv2.CAP_PROP_FPS)
    print(frameCount/SAMPLING_FREQUENCY)
    while (fc < frameCount  and ret):
        ret, img = cap.read()
        buf[fc] = np.transpose(img, (2,0,1))
        fc += 1
    cap.release()
    freqs, times, spectr = signal.spectrogram(buf, fs=SAMPLING_FREQUENCY, window=('hamming'), noverlap=13, nperseg=14, axis=0, mode='magnitude')
    return freqs, times, spectr

In [None]:
all_freqs = {}
all_times = {}
all_spectr = {}
for video_path in os.listdir(video_folder):
    if 'W' in video_path or 'NW' in video_path:
        freqs, times, spectr = read_video(f'{video_folder}/{video_path}')
        name = video_path.split('.')[0]
        all_freqs[name] = freqs
        all_times[name] = times
        all_spectr[name] = spectr
        # this is not viable because the spectrogram turns out to take 24 gigabytes

In [None]:
with open('../data/all_freqs.data','wb') as f:
    pickle.dump(all_freqs, f)
with open('../data/all_times.data','wb') as f:
    pickle.dump(all_times, f)
with open('../data/all_spectr.data', 'wb') as f:
    pickle.dump(all_spectr, f)

In [None]:
frame_counts = {}
duration = {}
for video_path in os.listdir(video_folder):
    if 'W' in video_path or 'NW' in video_path:
        cap = cv2.VideoCapture(f'{video_folder}/{video_path}')
        frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        dure = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))/cap.get(cv2.CAP_PROP_FPS)
        frame_counts[video_path.split('.')[0]] = frameCount
        duration[video_path.split('.')[0]] = dure

In [None]:
from bisect import bisect_left
data = []
for folder in os.listdir('data/labels'):
    if folder != '.DS_Store':
        name = folder[7:]
        total_frame = frame_counts[name]
        dure = duration[name]
        for filename in os.listdir(f"{labels_folder}{folder}"):
            # i is the image count
            if filename.endswith("YOLO"):
                _file_names = os.listdir(f"{labels_folder}{folder}/{filename}")
                _file_names.sort()
                for i, _file in enumerate(_file_names):
                    if i == 0 or i == len(_file_names)-1 :
                        continue # drop the ends
                    data.append((None,None,None))
                    with open(f"{labels_folder}{folder}/{filename}/{_file}") as f:
                        boxes = []
                        labels = []
                        for line in f:
                            line = line.split()
                            label, rest = int(line[0]), line[1:]
                            xmin, ymin, w, h = map(float, rest)
                            xmin, ymin = int(xmin*image_width), int(ymin*image_height)
                            w, h = int(w*image_width), int(h*image_height)
                            boxes.append(torch.tensor([xmin, ymin, min(w+xmin, image_width-1), min(h+ymin, image_height-1)]))
                            labels.append(label)                                     
                        front_part = i*10/total_frame*dure
                        pos = bisect_left(all_times[name],front_part,0,len(all_times[name]))
                        # pos value is >= this
                        left = 0
                        if pos > 0:
                            left = all_times[name][pos-1]
                        right = all_times[name][pos]
                        assert(left<=front_part and front_part<=right)
                        leftW = (right-front_part)/(right-left) # this should weight left
                        rightW = (front_part-left)/(right-left) # this should weight right
                        spectr = all_spectr[name][:,:,:,:,pos-1]*leftW + all_spectr[name][:,:,:,:,pos]*rightW
                        # freq, height, width, depth
                        data[-1][1] = torch.tensor(spectr).double()
                        data[-1][2] = {'boxes':torch.stack(boxes).float(), 'labels':torch.tensor(labels).long()+1}
                    image = torch.tensor(cv2.cvtColor(cv2.imread(f"{imgs_folder}/{folder}/{_file[:-3]}jpg", cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB))
                    image = image.permute(2,0,1).double()
                    data[-1][0] = image

In [None]:
with open('../data/all_freqs.data','wb') as f:
    pickle.dump(all_freqs, f)
with open('../data/all_times.data','wb') as f:
    pickle.dump(all_times, f)
with open('../data/all_spectr.data', 'wb') as f:
    pickle.dump(all_spectr, f)