In [1]:
import cv2
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import pickle

from pycochleagram.utils import wav_to_array
from pycochleagram.cochleagram import human_cochleagram
from tqdm import tqdm
from collections import defaultdict
from typing import List, Tuple

from VISDataPoint import VISDataPoint

import warnings
warnings.filterwarnings("ignore")

In [2]:
with open('../../data/test.txt', 'r') as f:
    file_names = [x.strip() for x in f.readlines()] 

root = '../../data/'

In [3]:
def createDatapointsFromFile(file_name, frame_size=(224, 224), window_duration=0.5):
    wav_file = os.path.join(root, f'{file_name}_denoised.wav')
    video_file = os.path.join(root, f'{file_name}_denoised.mp4')
    annotation_file = os.path.join(root, f'{file_name}_times.txt')

    annotations = pd.read_csv(annotation_file, sep=' ', names=['Time', 'Material', 'Contact Type', 'Motion Type'])
    wav, sample_rate = wav_to_array(wav_file)

    cap = cv2.VideoCapture(video_file)
    frame_rate = cap.get(cv2.CAP_PROP_FPS)
    frames = []

    while True:
        ret, frame = cap.read()

        if not ret:
            break
        
        resized_frame = cv2.resize(frame, dsize=frame_size, interpolation=cv2.INTER_CUBIC)
        frames.append(resized_frame)
    
    cap.release()

    data_points = []
    for row in annotations.iterrows():
        peak_time = row[1]['Time']
        start_time = peak_time - window_duration/2

        start_frame = int(start_time * frame_rate)
        end_frame = start_frame + int(frame_rate * window_duration)
        window_frames = frames[start_frame-1:end_frame+2]

        start_sound = int(start_time * sample_rate)
        end_sound = start_sound + int(sample_rate * window_duration)
        window_sound = wav[start_sound:end_sound]

        coch = human_cochleagram(window_sound, sample_rate, n=40, low_lim=100, hi_lim=10000, sample_factor=1, downsample=90, nonlinearity='power')

        data_points.append(VISDataPoint(coch, window_frames, row[1]['Material']))

    return data_points

In [4]:
n_points = 0
n_file_fails = 0

material_stats = defaultdict(int)

for file_name in tqdm(file_names):
    try:
        data_points = createDatapointsFromFile(file_name)
        for data_point in data_points:
            material_stats[data_point.material] += 1
            with open(f'/scratch/kapur/test/{n_points}.pkl', 'wb') as f:
                pickle.dump(data_point, f)
            n_points += 1
    except:
        n_file_fails += 1
        pass

 11%|█         | 27/244 [02:02<12:34,  3.48s/it][mov,mp4,m4a,3gp,3g2,mj2 @ 0x3884c40] moov atom not found
 39%|███▊      | 94/244 [08:05<15:18,  6.12s/it][mov,mp4,m4a,3gp,3g2,mj2 @ 0x4058f80] moov atom not found
 96%|█████████▌| 234/244 [21:14<00:32,  3.23s/it][mov,mp4,m4a,3gp,3g2,mj2 @ 0x4058f80] moov atom not found
100%|██████████| 244/244 [22:18<00:00,  5.48s/it]


In [5]:
n_points

7036