In [66]:
import warnings

warnings.filterwarnings("ignore")

import os
import shutil
import platform
import cv2
import random
import math
import uuid
import pandas as pd
import sys
import traceback
import utils

%load_ext autoreload
%autoreload 2

%reload_ext nb_black
%config IPCompleter.greedy=True

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [67]:
CURRENT_MICE = "PrL-4"
parent_directory = os.path.dirname(os.getcwd())
path_to_videos = os.path.join(parent_directory, "videos")
path_to_data = os.path.join(parent_directory, "data")

mice = [
    "PrL-2",
    "PrL-3",
    "PrL-4",
    "PrL-5",
    "PrL-8",
    "PrL-9",
    "PrL-10",
    "PrL-16",
    "PrL-19",
]
labels = ["parent-mice", "child-mice"]

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [68]:
mice_df = pd.DataFrame(mice, columns=["Mice"])

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [69]:
def get_video_data(mice: str):
    video_file_path = f"{path_to_videos}/{mice}.mp4"

    if not os.path.isfile(video_file_path):
        raise FileNotFoundError(f"{mice}.mp4 does not exist!")

    videoCaptureInstance = cv2.VideoCapture(video_file_path)
    fps = videoCaptureInstance.get(cv2.CAP_PROP_FPS)
    frames = videoCaptureInstance.get(cv2.CAP_PROP_FRAME_COUNT)
    return pd.Series([fps, frames])

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [70]:
mice_df[["video_fps", "total_video_frames"]] = mice_df["Mice"].apply(
    lambda mice: get_video_data(mice)
)
mice_df.head()

Unnamed: 0,Mice,video_fps,total_video_frames
0,PrL-2,30.00003,83713.0
1,PrL-3,30.00003,40655.0
2,PrL-4,30.00003,48634.0
3,PrL-5,30.00003,46981.0
4,PrL-8,30.00003,48489.0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [71]:
mice_row = mice_df[mice_df.Mice == CURRENT_MICE]
mice_row

Unnamed: 0,Mice,video_fps,total_video_frames
2,PrL-4,30.00003,48634.0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [72]:
def create_images_from_video(start_seconds: int, end_seconds: int, class_label: str):

    if class_label not in labels:
        raise ValueError("Enter a valid class label!")
    if CURRENT_MICE not in mice:
        raise ValueError("Enter a valid mice!")

    if not os.path.isdir(f"{path_to_data}/{class_label}"):
        os.mkdir(f"{path_to_data}/{class_label}")
        print(f"made directory... at {path_to_data}/{class_label}")

    frame_rate = math.floor(mice_row["video_fps"].item())

    video_file_path = f"{path_to_videos}/{CURRENT_MICE}.mp4"
    videoCaptureInstance = cv2.VideoCapture(video_file_path)

    start_frame = start_seconds * frame_rate
    seconds = start_seconds
    try:
        while seconds != end_seconds:
            ret, frame = videoCaptureInstance.read()
            if not ret:
                raise BufferError(
                    "Unexpected error occurred with reading image due to bad video file!"
                )
                break

            # Record frame every 1 second
            if start_frame % frame_rate == 0:
                filename = f"{CURRENT_MICE}-{uuid.uuid4()}"
                filepath = f"{path_to_data}/{class_label}/{filename}.jpg"
                cv2.imwrite(filepath, frame)

                print(
                    f"Saved image as {filename} at timestamp {utils.sec_to_time(seconds)}"
                )
                seconds += 1
            client_key_press = cv2.waitKey(1) & 0xFF

            # end video stream by escape key
            if client_key_press == 27:
                break
            start_frame += 1

        videoCaptureInstance.release()

        if platform.system() == "Darwin":
            cv2.waitKey(1)

    except cv2.error:
        print(
            "Unexpected error occurred with reading image and error code is",
            cv2.error.code,
        )

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [73]:
def split_train_test(class_label: str, split_ratio: float):

    if class_label not in labels:
        raise ValueError("Enter a valid class label!")

    split_ratio = split_ratio
    all_mice_files = os.listdir(f"{path_to_data}/{class_label}")
    random.shuffle(all_mice_files)
    train_size = len(all_mice_files) * split_ratio
    files_moved = 0

    for mice in all_mice_files:
        if files_moved < train_size:
            shutil.move(
                f"{path_to_data}/{class_label}/{mice}",
                f"{path_to_data}/train/{class_label}",
            )
        else:
            shutil.move(
                f"{path_to_data}/{class_label}/{mice}",
                f"{path_to_data}/test/{class_label}",
            )
        files_moved += 1
    if len(os.listdir(f"{path_to_data}/{class_label}")) == 0:
        shutil.rmtree(f"{path_to_data}/{class_label}")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [74]:
def create_dataset_pipeline(
    start_seconds, end_seconds, class_label: str, split=True, split_ratio=0.8
):
    try:
        create_images_from_video(start_seconds, end_seconds, class_label)
        if split:
            if len(os.listdir(f"{path_to_data}/{class_label}")) != 0:
                split_train_test(class_label, split_ratio)
            else:
                sys.exit(
                    f"{path_to_data}/{class_label} is empty and that means frames from videos were not captured successfully!"
                )
        else:
            pass
    except:
        var = traceback.format_exc()
        print(var)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [75]:
# start = floor(Total frames / frame_rate) - (ignore last trial(120 seconds) + 30 second threshold)
def create_parent_dataset(parent_images):
    frame_rate = math.floor(mice_row["video_fps"].item())
    start_seconds = random.randint(
        0, (mice_row["total_video_frames"].item() // frame_rate) - 150
    )
    end_seconds = start_seconds + parent_images
    create_dataset_pipeline(start_seconds, end_seconds, "parent-mice")

    train_size = len(os.listdir(f"{path_to_data}/train/parent-mice"))
    test_size = len(os.listdir(f"{path_to_data}/test/parent-mice"))

    print(
        f"Saved all {parent_images} parent image files and train has {train_size} pictures and test has {test_size} pictures!"
    )

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [76]:
experiment = pd.read_csv(f"{parent_directory}/experiment_new.csv")
experiment.head()

Unnamed: 0,Group,Cohort,MouseID,Virus,Trial,Laser,Start,Time_Latency_Contact_s,Latency_Contact_s,Time_Latency_Retrieve_s,Latency_Retrieve_s,Retrieved
0,Control,1,PrL-10,mCherry,1,OFF,0:13,0:27,14.0,NO,120,0
1,Control,1,PrL-10,mCherry,2,ON,2:38,2:57,19.0,NO,120,0
2,Control,1,PrL-10,mCherry,3,OFF,5:00,6:11,71.0,NO,120,0
3,Control,1,PrL-10,mCherry,4,ON,7:19,8:27,68.0,NO,120,0
4,Control,1,PrL-10,mCherry,5,OFF,9:37,NO,120.0,NO,120,0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [77]:
curr_mouse_df = experiment[
    (experiment.MouseID == CURRENT_MICE)
    & (experiment.Time_Latency_Contact_s != "NO")
    & (experiment.Time_Latency_Retrieve_s != "NO")
]
curr_mouse_df

Unnamed: 0,Group,Cohort,MouseID,Virus,Trial,Laser,Start,Time_Latency_Contact_s,Latency_Contact_s,Time_Latency_Retrieve_s,Latency_Retrieve_s,Retrieved
26,PrL-1,1,PrL-4,ChR2,1,OFF,0:06,0:18,12.0,0:43,37,1
27,PrL-1,1,PrL-4,ChR2,2,ON,2:15,2:19,4.0,2:24,9,1
28,PrL-1,1,PrL-4,ChR2,3,OFF,4:25,4:35,10.0,4:41,16,1
29,PrL-1,1,PrL-4,ChR2,4,ON,6:33,6:41,8.0,6:46,13,1
30,PrL-1,1,PrL-4,ChR2,5,OFF,9:06,9:36,30.0,9:42,36,1
31,PrL-1,1,PrL-4,ChR2,6,ON,11:15,12:01,46.0,12:05,50,1
33,PrL-1,1,PrL-4,ChR2,8,ON,16:07,16:13,5.0,16:17,10,1
35,PrL-1,1,PrL-4,ChR2,10,ON,20:32,20:37,5.0,20:40,8,1


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [78]:
def create_pup_dataset(pup_images: int):
    START_PUP_CAPTURE_THRESH = 1
    END_PUP_CAPTURE_THRESH = 1

    min_trial = curr_mouse_df.head(1).Trial.item()
    max_trial = curr_mouse_df.tail(1).Trial.item()

    trials_done = []
    all_possible_trials = list(curr_mouse_df.Trial)
    images_recorded = 0

    while images_recorded < pup_images:

        trial = random.randint(min_trial, max_trial)
        trial_row = curr_mouse_df.loc[curr_mouse_df.Trial == trial]

        if len(trials_done) == len(curr_mouse_df):
            print("Insufficient contact group images...")
            break
        if (trial not in trials_done) and (trial in all_possible_trials):
            trial_row = curr_mouse_df[curr_mouse_df.Trial == trial]

            start_time = str(trial_row.Start.item())
            end_time = str(trial_row.Time_Latency_Contact_s.item())

            start_seconds = utils.time_to_sec(start_time) + START_PUP_CAPTURE_THRESH
            end_seconds = utils.time_to_sec(end_time) - END_PUP_CAPTURE_THRESH
            diff_seconds = end_seconds - start_seconds

            future_number_of_images = images_recorded + diff_seconds
            # Stop recording excess number of images.
            if future_number_of_images > pup_images:
                end_seconds = start_seconds + (pup_images - images_recorded)
                diff_seconds = end_seconds - start_seconds

            print(
                f"Starts at {start_seconds} seconds and end at {end_seconds} seconds of interval {diff_seconds} seconds"
            )
            create_dataset_pipeline(start_seconds, end_seconds, "child-mice", False)

            images_recorded += diff_seconds

            trials_done.append(trial)  # append unique trials
    split_train_test("child-mice", 0.8)

    train_size = len(os.listdir(f"{path_to_data}/train/child-mice"))
    test_size = len(os.listdir(f"{path_to_data}/test/child-mice"))

    print(
        f"Saved all {images_recorded} child image files and train has {train_size} pictures and test has {test_size} pictures!"
    )

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [79]:
def create_final_parent_pup_dataset(images: int, create_parent: bool, create_child: bool):
    try:
        if create_child:
            utils.remove_all_files(f"{path_to_data}/train/child-mice")
            utils.remove_all_files(f"{path_to_data}/test/child-mice")
            create_pup_dataset(images)
        if create_parent:
            utils.remove_all_files(f"{path_to_data}/train/parent-mice")
            utils.remove_all_files(f"{path_to_data}/test/parent-mice")
            create_parent_dataset(images)
    except:
        var = traceback.format_exc()
        print(var)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [82]:
os.mkdir("/Users/user/Applications/machine-learning/deeproute/data/child-mice")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [80]:
create_final_parent_pup_dataset(40, create_parent=False, create_child=True)

Starts at 394 seconds and end at 400 seconds of interval 6 seconds
made directory... at /Users/user/Applications/machine-learning/deeproute/data/child-mice
Saved image as PrL-4-c36eb4b5-4b66-43ce-84e4-94f6a844020d at timestamp 06:34
Saved image as PrL-4-2cfab2ad-3c04-4f9c-834c-509d28a3bab4 at timestamp 06:35
Saved image as PrL-4-dc19a270-5aa4-4262-9c19-0e2e592c70a4 at timestamp 06:36
Saved image as PrL-4-bb2493ed-0f0d-440f-8ce2-49d79a798e36 at timestamp 06:37
Saved image as PrL-4-e5b44e44-a418-4a8c-a162-a620e502ab97 at timestamp 06:38
Saved image as PrL-4-4cfb9b4c-af8d-4975-985d-e9d453714df7 at timestamp 06:39
Starts at 676 seconds and end at 710 seconds of interval 34 seconds
Saved image as PrL-4-fc57c53f-5c53-433f-aef6-336dbfea789c at timestamp 11:16
Saved image as PrL-4-42a9e9f4-daba-4523-bc69-075f9a5a4f80 at timestamp 11:17
Saved image as PrL-4-6772802b-4d1f-4ee4-8b10-85e33525e577 at timestamp 11:18
Saved image as PrL-4-edee2953-a157-49cd-a77b-e2556d9053b3 at timestamp 11:19
Saved 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>