# Extracting Frames

In [41]:
# Imports
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import torch
import torch.nn as nn
import pandas as pd
import re
import shutil
from torchvision import transforms
from transformers import ViTForImageClassification, ViTImageProcessor, ViTConfig
from torch.utils.data import DataLoader, Dataset, Subset
from tqdm.notebook import tqdm
from sklearn.metrics import RocCurveDisplay, roc_curve, ConfusionMatrixDisplay, confusion_matrix
from sklearn.mixture import GaussianMixture

## Moving Average Background Subtraction

In [42]:
# Helper Functions
def calc_median(frames):
    median_frame = np.median(frames, axis=0).astype(dtype=np.uint8)
    return median_frame

def doMovingAverageBGS(image, prev_frames):
    median_img = calc_median(prev_frames)
    image = cv2.absdiff(image, median_img)
    return image

In [43]:
dir_path = os.path.dirname(os.path.realpath("__file__"))

# get paths to training and testing data
data_dir = os.path.join(dir_path, 'data')
train_data_dir = os.path.join(data_dir, 'train')
test_data_dir = os.path.join(data_dir, 'test')

# get path to folder where extracted frames are stored
background_path = os.path.join(dir_path, 'background_sub_testing_movingavg2')
os.makedirs(background_path, exist_ok=True)

#path to where binary frames are stored
frame_data_dir = os.path.join(dir_path, "background_sub_movingavg_frames2")
frame_train_data_dir = os.path.join(frame_data_dir, 'train')
frame_test_data_dir = os.path.join(frame_data_dir, 'test')

frame_train_data_dir_nonleak = os.path.join(frame_train_data_dir, 'Nonleaks')
frame_train_data_dir_leak = os.path.join(frame_train_data_dir, 'Leaks')
frame_test_data_dir_nonleak = os.path.join(frame_test_data_dir, 'Nonleaks')
frame_test_data_dir_leak = os.path.join(frame_test_data_dir, 'Leaks')

os.makedirs(frame_data_dir, exist_ok=True)
os.makedirs(frame_train_data_dir, exist_ok=True)
os.makedirs(frame_test_data_dir, exist_ok=True)

os.makedirs(frame_train_data_dir_nonleak, exist_ok=True)
os.makedirs(frame_train_data_dir_leak, exist_ok=True)
os.makedirs(frame_test_data_dir_nonleak, exist_ok=True)
os.makedirs(frame_test_data_dir_leak, exist_ok=True)

# get folder to put 8 classes in
classes_folder = os.path.join(dir_path, 'background_sub_movingavg8_frames2')
os.makedirs(classes_folder, exist_ok=True)

classes_train_folder = os.path.join(classes_folder, 'train')
os.makedirs(classes_train_folder, exist_ok = True)

classes_test_folder = os.path.join(classes_folder, 'test')
os.makedirs(classes_test_folder, exist_ok = True)

for i in range(8):
    os.makedirs(os.path.join(classes_train_folder, 'C' + str(i)), exist_ok=True)
    os.makedirs(os.path.join(classes_test_folder, 'C' + str(i)), exist_ok=True)


In [44]:
def get_frames(vid_path, out_path, med_count):

    before_path = os.path.join(out_path, 'before')
    after_path = os.path.join(out_path, 'after')
    median_path = os.path.join(out_path, 'median')

    print("Before path" + before_path, flush = True)
    print("After path" + after_path, flush = True)
    print("Median path" + median_path, flush = True)

    os.makedirs(before_path, exist_ok=True)
    os.makedirs(after_path, exist_ok=True)
    os.makedirs(median_path, exist_ok=True)

    cap = cv2.VideoCapture(vid_path)

    cap.set(cv2.CAP_PROP_POS_MSEC, 0)
    success = True

    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS) 
    print("Num frames: %d" % num_frames, flush = True)
    print("Frames per second: %d" % fps, flush = True)

    prev_imgs = []
    times = []

    for i in range(med_count):
        success, image = cap.read()
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        prev_imgs.append(image) 

    cap.set(cv2.CAP_PROP_POS_MSEC, 0)

    for i in range(num_frames):
        success, image = cap.read()
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        time = cap.get(cv2.CAP_PROP_POS_MSEC)
        times.append(time)
        cv2.imwrite(os.path.join(before_path, 'test%d.jpg' % i), image)
        median_background = np.median(prev_imgs, axis = 0)
        cv2.imwrite(os.path.join(median_path, 'test%d.jpg' % i), median_background)
        removed_image = doMovingAverageBGS(image, prev_imgs)
        cv2.imwrite(os.path.join(after_path, 'test%d.jpg' % i), removed_image)
        prev_imgs.pop(0)
        prev_imgs.append(image)

    cap.release()
    cv2.destroyAllWindows()

    df_time = pd.DataFrame(times)
    df_time.to_csv(out_path + '/df_time.csv')

In [45]:
for file in os.listdir(train_data_dir):
    vid_path = os.path.join(train_data_dir, file)
    vid_id = int(re.findall("_(\d{4}).mp4",os.path.basename(vid_path))[0])
    print("Extracting vid_id: %d" % vid_id, flush = True)
    output_path = os.path.join(background_path, str(vid_id))
    os.makedirs(output_path, exist_ok=True)
    get_frames(vid_path, output_path, 210)

for file in os.listdir(test_data_dir):
    vid_path = os.path.join(test_data_dir, file)
    vid_id = int(re.findall("_(\d{4}).mp4",os.path.basename(vid_path))[0])
    print("Extracting vid_id: %d" % vid_id, flush = True)
    output_path = os.path.join(background_path, str(vid_id))
    os.makedirs(output_path, exist_ok=True)
    get_frames(vid_path, output_path, 210)

Extracting vid_id: 2580
Before path/home/bestlab/Desktop/Squishy-Methane-URAP-New/AngelineLee/MethaneModel/background_sub_testing_movingavg2/2580/before
After path/home/bestlab/Desktop/Squishy-Methane-URAP-New/AngelineLee/MethaneModel/background_sub_testing_movingavg2/2580/after
Median path/home/bestlab/Desktop/Squishy-Methane-URAP-New/AngelineLee/MethaneModel/background_sub_testing_movingavg2/2580/median
Num frames: 22095
Frames per second: 14


## Moving frames to binary classes

In [None]:
ranges = pd.read_csv('/home/bestlab/Desktop/Squishy-Methane-URAP-New/AngelineLee/MethaneModel/GasVid_Ranges_Seconds.csv')
ranges = ranges.set_index('Video No.')
ranges.shape

(31, 4)

In [None]:
def copy_frames_nonleak(vid_id, train_test_path):
    col_names = ["Index", "Times"]
    stat_times = pd.read_csv(os.path.join(background_path, '%d/df_time.csv' % vid_id), names = col_names, header = None)
    stat_times = stat_times.dropna()

    nonleak_start = ranges.loc[vid_id,'Nonleak Range Start (s)'] * 1000
    nonleak_end = ranges.loc[vid_id,'Nonleak Range End (s)'] * 1000

    start_bool = stat_times['Times'] >= nonleak_start
    end_bool = stat_times['Times'] <= nonleak_end

    valid_index = stat_times[start_bool & end_bool]
    valid_index = valid_index.astype({'Index': 'int32'})

    folder_before = '/home/bestlab/Desktop/Squishy-Methane-URAP-New/AngelineLee/MethaneModel/background_sub_testing_movingavg/%d/after' %vid_id
    valid_index['Before Filename'] = valid_index["Index"].apply(lambda x: os.path.join(folder_before, 'test%d.jpg'%x))
    valid_index['After Filename'] = valid_index["Index"].apply(lambda x: os.path.join(train_test_path, 'vid%dtest%d.jpg'%(vid_id,x)))
    for i in range(valid_index.shape[0]):
        before = valid_index.iloc[i,]['Before Filename']
        after = valid_index.iloc[i,]['After Filename']
        shutil.copy(before, after)

def copy_frames_leak(vid_id, train_test_path):
    col_names = ["Index", "Times"]
    stat_times = pd.read_csv('/home/bestlab/Desktop/Squishy-Methane-URAP-New/AngelineLee/MethaneModel/background_sub_testing_movingavg/%d/df_time.csv' % vid_id, names = col_names, header = None)
    stat_times = stat_times.dropna()

    leak_start = ranges.loc[vid_id,'Leak Range Start (s)'] * 1000
    leak_end = ranges.loc[vid_id,'Leak Range End (s)'] * 1000

    start_bool = stat_times['Times'] >= leak_start
    end_bool = stat_times['Times'] <= leak_end

    valid_index = stat_times[start_bool & end_bool]
    valid_index = valid_index.astype({'Index': 'int32'})

    folder_before = '/home/bestlab/Desktop/Squishy-Methane-URAP-New/AngelineLee/MethaneModel/background_sub_testing_movingavg/%d/after' %vid_id
    valid_index['Before Filename'] = valid_index["Index"].apply(lambda x: os.path.join(folder_before, 'test%d.jpg'%x))
    valid_index['After Filename'] = valid_index["Index"].apply(lambda x: os.path.join(train_test_path, 'vid%dtest%d.jpg'%(vid_id,x)))
    for i in range(valid_index.shape[0]):
        before = valid_index.iloc[i,]['Before Filename']
        after = valid_index.iloc[i,]['After Filename']
        shutil.copy(before, after)

In [None]:
print("Begin moving training data", flush = True)
for file in os.listdir(train_data_dir):
    vid_path = os.path.join(train_data_dir, file)
    vid_id = int(re.findall("_(\d{4}).mp4",os.path.basename(vid_path))[0])
    print("Moving vid_id: %d" % vid_id, flush = True)
    copy_frames_nonleak(vid_id, frame_train_data_dir_nonleak)
    copy_frames_leak(vid_id, frame_train_data_dir_leak)

print("Begin moving testing data", flush = True)
for file in os.listdir(test_data_dir):
    vid_path = os.path.join(test_data_dir, file)
    vid_id = int(re.findall("_(\d{4}).mp4",os.path.basename(vid_path))[0])
    print("Moving vid_id: %d" % vid_id, flush = True)
    copy_frames_nonleak(vid_id, frame_test_data_dir_nonleak)
    copy_frames_leak(vid_id, frame_test_data_dir_leak)
print("Completed", flush = True)

Begin moving training data
Moving vid_id: 2580


FileNotFoundError: [Errno 2] No such file or directory: '/home/bestlab/Desktop/Squishy-Methane-URAP-New/AngelineLee/MethaneModel/background_sub_testing_movingavg2/2580/df_time.csv'

## Moving frames to 8 classes

In [None]:
ranges = pd.read_csv("/home/bestlab/Desktop/Squishy-Methane-URAP-New/AngelineLee/MethaneModel/GasVid_Ranges_all.csv", index_col = "Video No.")
ranges.head(5)

Unnamed: 0_level_0,C0(S),C0(E),C1(S),C1(E),C2(S),C2(E),C3(S),C3(E),C4(S),C4(E),C5(S),C5(E),C6(S),C6(E),C7(S),C7(E)
Video No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2564,31,191,211,371,391,551,571,731,751,911,931,1091,1111,1271,1291,1451
2559,44,204,224,384,404,564,584,744,764,924,944,1104,1124,1284,1304,1464
2560,32,192,212,372,392,552,572,732,752,912,932,1092,1112,1272,1292,1452
2561,34,194,214,374,394,554,574,734,754,914,934,1094,1114,1274,1294,1454
2562,39,199,219,379,399,559,579,739,759,919,939,1099,1119,1279,1299,1459


In [None]:
def extract_8_classes(vid_id, train_test_path):
    col_names = ["Index", "Times"]
    frame_times = pd.read_csv('/home/bestlab/Desktop/Squishy-Methane-URAP-New/AngelineLee/MethaneModel/background_sub_testing_movingavg/%d/df_time.csv' % vid_id, names = col_names, header = None)
    frame_times = frame_times.dropna()
    folder_vid = '/home/bestlab/Desktop/Squishy-Methane-URAP-New/AngelineLee/MethaneModel/background_sub_testing_movingavg/%d/after' %vid_id

    for i in range(8):
        start_time = ranges.loc[vid_id, 'C%d(S)'%i] * 1000
        end_time = ranges.loc[vid_id, 'C%d(E)'%i] * 1000

        start_bool = frame_times['Times'] >= start_time
        end_bool = frame_times['Times'] <= end_time
        valid_index = frame_times[start_bool & end_bool]
        valid_index = valid_index.astype({'Index': 'int32'})
        
        valid_index['Before Filename'] = valid_index["Index"].apply(lambda x: os.path.join(folder_vid, 'test%d.jpg'%x))
        class_folder = os.path.join(train_test_path, 'C' + str(i))
        valid_index['After Filename'] = valid_index["Index"].apply(lambda x: os.path.join(class_folder, 'vid%dtest%d.jpg'%(vid_id,x)))
        for i in range(valid_index.shape[0]):
            before = valid_index.iloc[i,]['Before Filename']
            after = valid_index.iloc[i,]['After Filename']
            shutil.copy(before, after)
    print('Finished moving video %d'%vid_id)

In [None]:
for file in os.listdir(background_path):
    vid_path = os.path.join(background_path, file)
    vid_id = int(file)
    print("Moving vid_id: %d" % vid_id, flush = True)
    if file[0] == '1':
        extract_8_classes(vid_id, classes_test_folder)
    if file[0] == '2':
        extract_8_classes(vid_id, classes_train_folder)

Moving vid_id: 2580


KeyboardInterrupt: 