# MED-PC Extracting the Recording Data and Metadata

## Importing the Python Libraries

In [1]:
import sys
import glob
from collections import defaultdict
import os
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from moviepy.editor import *
from datetime import datetime
from IPython.display import Video

In [2]:
from medpc2excel.medpc_read import medpc_read

In [3]:
# setting path
sys.path.append('../../src')

In [4]:
# All the libraries that were created for this repository
import extract.dataframe
import processing.tone
import extract.metadata

## Getting the Metadata from all the MED-PC Recording Files

In [5]:
VideoFileClip??

- Getting all the file paths of the recording files(that happen to all end in `.txt`

# NOTE: The following path must be changed to the directory where your MED-PC recording files are located, if they are not in the specied folder

In [6]:
all_med_pc_file = glob.glob("./data/*.txt")

- Use this instead if you're using you're own data

In [7]:
all_med_pc_file[:10]

['./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt',
 './data/2022-05-19_09h09m_Subject 2.3v1.4.txt']

- Example of what the MED-PC Recording file looks like

In [8]:
with open(all_med_pc_file[0]) as f:
    lines = f.readlines()
    for line in lines[:20]:
        print(line)

File: C:\MED-PC\Data\2022-05-20_10h24m_Subject 2.1 v 1.3.txt







Start Date: 05/20/22

End Date: 05/20/22

Subject: 2.1 v 1.3

Experiment: Pilot of Pilot

Group: Cage 2

Box: 4

Start Time: 10:24:10

End Time: 11:55:14

MSN: C57_reward

A:    1599.000

D:    8000.000

F:    2000.000

G:       0.000

H:       0.000

I:       0.000

L:       0.000



- We will be extracting the first 10 or so lines that look like:

```
File: C:\MED-PC\Data\2022-05-06_12h59m_Subject 3.4 (2).txt

Start Date: 05/06/22

End Date: 05/06/22

Subject: 3.4 (2)

Experiment: Pilot of Pilot

Group: Cage 4

Box: 1

Start Time: 12:59:58

End Time: 14:02:38

MSN: levelNP_CS_reward_laserepochON1st_noshock
```
    
- We will just find all the lines that start with "File", "Start Date", "End Date", "Subject", "Experiment", "Group", "Box", "Start Time", "End Time", or "MSN". And then stop once all the metadata types have been collected

In [9]:
# This makes a nested dictionary of file paths to each individual metadata type
file_path_to_meta_data = extract.metadata.get_all_med_pc_meta_data_from_files(list_of_files=all_med_pc_file)

In [10]:
for key, value in file_path_to_meta_data.items():
    print("File path: {}".format(key))
    print("Metadata types and associated values: {}".format(value))
    break

File path: ./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt
Metadata types and associated values: {'File': 'C:\\MED-PC\\Data\\2022-05-20_10h24m_Subject 2.1 v 1.3.txt', 'Start Date': '05/20/22', 'End Date': '05/20/22', 'Subject': '2.1 v 1.3', 'Experiment': 'Pilot of Pilot', 'Group': 'Cage 2', 'Box': '4', 'Start Time': '10:24:10', 'End Time': '11:55:14', 'MSN': 'C57_reward'}


## Making a Dataframe out of the Metadata

In [11]:
# Turning the dictionary into a Pandas Dataframe
metadata_df = pd.DataFrame.from_dict(file_path_to_meta_data, orient="index")
# Resetting the index because currently the file path is the index 
metadata_df = metadata_df.reset_index()

In [12]:
metadata_df.head()

Unnamed: 0,index,File,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN
0,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt,C:\MED-PC\Data\2022-05-20_10h24m_Subject 2.1 v...,05/20/22,05/20/22,2.1 v 1.3,Pilot of Pilot,Cage 2,4,10:24:10,11:55:14,C57_reward
1,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,C:\MED-PC\Data\2022-05-19_09h09m_Subject 2.3v1...,05/19/22,05/19/22,2.3v1.4,Pilot of Pilot,Cage 1,2,09:09:40,09:51:34,C57_reward


In [13]:
metadata_df.tail()

Unnamed: 0,index,File,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN
0,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt,C:\MED-PC\Data\2022-05-20_10h24m_Subject 2.1 v...,05/20/22,05/20/22,2.1 v 1.3,Pilot of Pilot,Cage 2,4,10:24:10,11:55:14,C57_reward
1,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,C:\MED-PC\Data\2022-05-19_09h09m_Subject 2.3v1...,05/19/22,05/19/22,2.3v1.4,Pilot of Pilot,Cage 1,2,09:09:40,09:51:34,C57_reward


- Just getting the numbers out of the column that contains the cage information

## Inputting all the MED-PC log files

- Example of what the MED-PC Script looks like that was ran when recording the behaviors

In [15]:
with open("./data/levelNP_CS_reward_laserepochON1st_noshock.MPC") as f:
    lines = f.readlines()
    for line in lines[:100]:
        print(line)

\v3 stop tone with poke

\v3.2 monitor port entries AND exits



\INPUTS

^port = 8



\OUTPUTS

^fan = 16

^houselight = 11

^tone1 = 2

^tone2 = 3

^tone3 = 4

^tone4 = 5

^pump = 9

^whitenoise = 1

^laser=10

^csout = 5

^peout = 15

^cs1out = 12

^cs2out = 13

^cs3out = 14



\EXP SETTINGS

^ncsNoShock = 0

^initCS1trials = 3



\ARRAYS

DIM P = 20000 \Port entry time stamp array

DIM Q = 2500 \US delivery time stamp array (absolute)

DIM R = 2500 \US time stamp array (relative to last CS)

DIM W = 2500 \ITI values used for CS

DIM S = 2500 \CS presentation values (absolute - every time light turns on)

DIM N = 20000 \Port exit time stamp array

DIM K = 2500 \CS type

DIM B = 2500 \shock intensity

DIM G = 2500 \

DIM H = 2500 \

DIM I = 2500 \



LIST V = 30", 30", 30", 30", 100", 95", 90", 80", 90", 100", 90", 120", 90", 85", 90", 95", 120", 80", 95", 80", 90", 80", 75", 100", 95", 90", 80", 90", 100", 90", 120", 90", 85", 90", 95", 90", 80", 90", 100", 90", 90", 90", 90", 90", 

- We will be using the comments in the MED-PC script(which starts with the `\`) to create a name for the variables. By default, MED-PC uses a single letter as the name of the variable.
    - This will use the medpc2excel library found in https://github.com/cyf203/medpc2excel
- Example of the comments in the MED-PC script that we will use the names from:

```
\Variables

\A - Time since last CS

\B - Shock intensity

\C - Counter array

\D - Current ITI value

\E - CS ITI values for first few trials

\F - Shock duration

\G -

\H -

\I -

\J - Shock intensity repo

\K - CS type

\L -

\M - CS type repo

\N - Port exit time stamp array

\O -

\P - Port entry time stamp array

\Q - Sucrose delivery time stamp array (absolute)

\R - Sucrose delivery time stamp array (relative to last CS)

\S - CS presentation time stamp array

\T - Session timer

\U - Time since last CS presentation

\V - List of CS ITI values (tone + houselight)

\W - ITI values used for CS one each trial

\X -

\Y - Beam break monitor variable

\Z -
```

In [16]:
with open(all_med_pc_file[0]) as f:
    lines = f.readlines()
    for line in lines[:20]:
        print(line)

File: C:\MED-PC\Data\2022-05-20_10h24m_Subject 2.1 v 1.3.txt







Start Date: 05/20/22

End Date: 05/20/22

Subject: 2.1 v 1.3

Experiment: Pilot of Pilot

Group: Cage 2

Box: 4

Start Time: 10:24:10

End Time: 11:55:14

MSN: C57_reward

A:    1599.000

D:    8000.000

F:    2000.000

G:       0.000

H:       0.000

I:       0.000

L:       0.000



- **Please make sure that the corresponding `.mpc` file (aka the MED-PC script) that was ran to create the log file, is also in the same folder**

In [18]:
concatted_medpc_df = extract.dataframe.get_medpc_dataframe_from_list_of_files(medpc_files=all_med_pc_file)
concatted_medpc_df = concatted_medpc_df.reset_index(drop=True)

In [19]:
concatted_medpc_df.head()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(B)shockintensity,date,subject,file_path
0,1.89,64.0,399.0,0.0,60.01,3.07,1.0,0.0,20220520,2.1 v 1.3,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt
1,3.36,144.0,399.0,0.0,140.01,3.5,1.0,0.0,20220520,2.1 v 1.3,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt
2,4.99,234.0,399.0,0.0,230.01,6.8,1.0,0.0,20220520,2.1 v 1.3,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt
3,7.57,314.0,399.0,0.0,310.01,7.61,1.0,0.0,20220520,2.1 v 1.3,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt
4,8.0,389.0,399.0,0.0,385.01,8.51,1.0,0.0,20220520,2.1 v 1.3,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt


In [20]:
concatted_medpc_df.tail()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(B)shockintensity,date,subject,file_path
5077,,,,,,,1.0,,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt
5078,,,,,,,1.0,,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt
5079,,,,,,,1.0,,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt
5080,,,,,,,1.0,,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt
5081,,,,,,,1.0,,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt


- Dropping all rows that don't have the CS presentation time

In [21]:
cs_time_df = concatted_medpc_df[concatted_medpc_df["(P)Portentry"] != 0.0].dropna(subset="(P)Portentry")

In [22]:
cs_time_df["file_path"].unique()

array(['./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt',
       './data/2022-05-19_09h09m_Subject 2.3v1.4.txt'], dtype=object)

# Adding the time of the start of the video

In [24]:
from datetime import datetime, timedelta


In [26]:

cage_1_video_start_time = datetime(2022, 5, 19, 9, 20, 28)
cage_2_video_start_time = datetime(2022, 5, 20, 11, 24, 29)

In [27]:
cage_1_video_start_time

datetime.datetime(2022, 5, 19, 9, 20, 28)

In [28]:
cage_2_video_start_time

datetime.datetime(2022, 5, 20, 11, 24, 29)

In [31]:
cage_1_video_start_time + timedelta(seconds=60)

datetime.datetime(2022, 5, 19, 9, 21, 28)

## Correcting for the delay in recording time

- In cage 1 video, tone 2(140 seconds) happened at 2 minutes 31 seconds(151 seconds)
    - In theory 11 second delay between the video time suggested by MED-PC versus the actual time in the video. 

- In cage 2 video, tone 2(140 seconds) happened at 2 minutes 29 seconds(149 seconds)
    - In theory 9 second delay between the video time suggested by MED-PC versus the actual time in the video. 

In [44]:
cage_1_video_delay_duration = 11
cage_2_video_delay_duration = 9


## Adding a column for the tone/port entry time in the video

In [37]:
concatted_medpc_df["file_path"].unique()

array(['./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt',
       './data/2022-05-19_09h09m_Subject 2.3v1.4.txt'], dtype=object)

In [39]:
cage_2_medpc_df = concatted_medpc_df[concatted_medpc_df["file_path"] == concatted_medpc_df["file_path"].unique()[0]]

In [40]:
cage_1_medpc_df = concatted_medpc_df[concatted_medpc_df["file_path"] == concatted_medpc_df["file_path"].unique()[1]]

In [42]:
cage_1_medpc_df.head()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(B)shockintensity,date,subject,file_path
2541,329.87,64.0,399.0,0.0,60.01,332.03,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt
2542,356.64,389.0,399.0,0.0,140.01,356.68,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt
2543,382.57,489.0,399.0,0.0,230.01,383.32,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt
2544,389.84,584.0,399.0,0.0,310.01,410.48,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt
2545,410.75,674.0,399.0,0.0,385.01,411.59,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt


In [43]:
cage_2_medpc_df.head()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(B)shockintensity,date,subject,file_path
0,1.89,64.0,399.0,0.0,60.01,3.07,1.0,0.0,20220520,2.1 v 1.3,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt
1,3.36,144.0,399.0,0.0,140.01,3.5,1.0,0.0,20220520,2.1 v 1.3,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt
2,4.99,234.0,399.0,0.0,230.01,6.8,1.0,0.0,20220520,2.1 v 1.3,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt
3,7.57,314.0,399.0,0.0,310.01,7.61,1.0,0.0,20220520,2.1 v 1.3,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt
4,8.0,389.0,399.0,0.0,385.01,8.51,1.0,0.0,20220520,2.1 v 1.3,./data/2022-05-20_10h24m_Subject 2.1 v 1.3.txt


In [45]:
cage_1_medpc_df["tone_playing_time_in_video"] = cage_1_medpc_df["(S)CSpresentation"] + cage_1_video_delay_duration
cage_1_medpc_df["port_entry_time_in_video"] = cage_1_medpc_df["(P)Portentry"] + cage_1_video_delay_duration


In [46]:
cage_2_medpc_df["tone_playing_time_in_video"] = cage_2_medpc_df["(S)CSpresentation"] + cage_2_video_delay_duration
cage_2_medpc_df["port_entry_time_in_video"] = cage_2_medpc_df["(P)Portentry"] + cage_2_video_delay_duration


In [48]:
cage_1_medpc_df.head()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(B)shockintensity,date,subject,file_path,tone_playing_time_in_video,port_entry_time_in_video
2541,329.87,64.0,399.0,0.0,60.01,332.03,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,71.01,340.87
2542,356.64,389.0,399.0,0.0,140.01,356.68,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,151.01,367.64
2543,382.57,489.0,399.0,0.0,230.01,383.32,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,241.01,393.57
2544,389.84,584.0,399.0,0.0,310.01,410.48,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,321.01,400.84
2545,410.75,674.0,399.0,0.0,385.01,411.59,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,396.01,421.75


In [51]:
cage_1_medpc_df.head(n=50)

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(B)shockintensity,date,subject,file_path,tone_playing_time_in_video,port_entry_time_in_video
2541,329.87,64.0,399.0,0.0,60.01,332.03,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,71.01,340.87
2542,356.64,389.0,399.0,0.0,140.01,356.68,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,151.01,367.64
2543,382.57,489.0,399.0,0.0,230.01,383.32,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,241.01,393.57
2544,389.84,584.0,399.0,0.0,310.01,410.48,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,321.01,400.84
2545,410.75,674.0,399.0,0.0,385.01,411.59,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,396.01,421.75
2546,411.71,754.0,399.0,0.0,485.01,412.8,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,496.01,422.71
2547,415.91,844.0,399.0,0.0,580.01,416.03,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,591.01,426.91
2548,416.08,944.0,399.0,0.0,670.01,421.43,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,681.01,427.08
2549,422.09,1034.0,399.0,0.0,750.01,422.21,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,761.01,433.09
2550,422.41,1154.0,399.0,0.0,840.01,423.68,1.0,0.0,20220519,2.3v1.4,./data/2022-05-19_09h09m_Subject 2.3v1.4.txt,851.01,433.41


In [56]:
cage_1_medpc_df.to_csv("./data/cage_1_medpc.csv")

In [57]:
cage_2_medpc_df.to_csv("./data/cage_2_medpc.csv")

In [58]:
raise ValueError("Rest of the notebook is previous work")

ValueError: Rest of the notebook is previous work

# Stuff Below if previous work

# Making a column for the Controlle Stimulus time

- Because the hour, the minute, and the seconds are all in different columns we will combine them into one

In [23]:
cs_time_df.columns

Index(['(P)Portentry', '(Q)USdelivery', '(R)UStime', '(W)ITIvalues',
       '(S)CSpresentation', '(N)Portexit', '(K)CStype', '(B)shockintensity',
       'date', 'subject', 'file_path'],
      dtype='object')

- Making the numbers for hours, minutes, and seconds into whole numbers

In [22]:
cs_time_df["cs_second"] = cs_time_df["(G)controlled_stimulus_secondscomputer"].astype(int).astype(str)
cs_time_df["cs_minute"] = cs_time_df["(H)controlled_stimulus_minutescomputer"].astype(int).astype(str)
cs_time_df["cs_hour"] = cs_time_df["(I)controlled_stimulus_hourscomputer"].astype(int).astype(str)


- Combining the hours, minutes, and seconds into one column as a string

In [23]:
cs_time_df["cs_time_str"] = cs_time_df.apply(lambda x: ":".join([x["cs_hour"], x["cs_minute"],  x["cs_second"]]), axis=1)

- Converting the time into a Python datetime object
    - This will allow us to make calculations

In [24]:
cs_time_df["cs_time_object"] = cs_time_df["cs_time_str"].apply(lambda x: datetime.strptime(x, '%H:%M:%S'))


In [25]:
cs_time_df

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(G)controlled_stimulus_secondscomputer,(H)controlled_stimulus_minutescomputer,(I)controlled_stimulus_hourscomputer,...,(F)port_entry_minutescomputer,(J)port_entry_hourscomputer,date,subject,file_path,cs_second,cs_minute,cs_hour,cs_time_str,cs_time_object
0,75.47,64.0,399.0,0.0,60.01,76.09,1.0,13.0,4.0,20.0,...,4.0,20.0,20220713,,./data/automatic_30_min_end_test/2022-07-13_20...,13,4,20,20:4:13,1900-01-01 20:04:13
1,110.92,144.0,399.0,0.0,140.01,111.35,1.0,33.0,5.0,20.0,...,5.0,20.0,20220713,,./data/automatic_30_min_end_test/2022-07-13_20...,33,5,20,20:5:33,1900-01-01 20:05:33
2,185.83,234.0,399.0,0.0,230.01,186.37,1.0,3.0,7.0,20.0,...,6.0,20.0,20220713,,./data/automatic_30_min_end_test/2022-07-13_20...,3,7,20,20:7:3,1900-01-01 20:07:03
3,257.49,314.0,399.0,0.0,310.01,258.08,1.0,23.0,8.0,20.0,...,7.0,20.0,20220713,,./data/automatic_30_min_end_test/2022-07-13_20...,23,8,20,20:8:23,1900-01-01 20:08:23
4,270.67,,,0.0,8000.0,271.5,0.0,0.0,0.0,0.0,...,7.0,20.0,20220713,,./data/automatic_30_min_end_test/2022-07-13_20...,0,0,0,0:0:0,1900-01-01 00:00:00
5,307.1,,,0.0,9000.0,307.91,0.0,0.0,0.0,0.0,...,8.0,20.0,20220713,,./data/automatic_30_min_end_test/2022-07-13_20...,0,0,0,0:0:0,1900-01-01 00:00:00
6,329.32,,,0.0,8000.0,330.08,0.0,0.0,0.0,0.0,...,8.0,20.0,20220713,,./data/automatic_30_min_end_test/2022-07-13_20...,0,0,0,0:0:0,1900-01-01 00:00:00
7,332.74,,,0.0,7500.0,333.61,0.0,0.0,0.0,0.0,...,8.0,20.0,20220713,,./data/automatic_30_min_end_test/2022-07-13_20...,0,0,0,0:0:0,1900-01-01 00:00:00
8,337.1,,,0.0,10000.0,337.62,0.0,0.0,0.0,0.0,...,8.0,20.0,20220713,,./data/automatic_30_min_end_test/2022-07-13_20...,0,0,0,0:0:0,1900-01-01 00:00:00
9,341.29,,,0.0,9500.0,341.97,0.0,0.0,0.0,0.0,...,8.0,20.0,20220713,,./data/automatic_30_min_end_test/2022-07-13_20...,0,0,0,0:0:0,1900-01-01 00:00:00


# Making a column for the Port Entry time

- Because the hour, the minute, and the seconds are all in different columns we will combine them into one

In [26]:
cs_time_df.columns

Index(['(P)Portentry', '(Q)USdelivery', '(R)UStime', '(W)ITIvalues',
       '(S)CSpresentation', '(N)Portexit', '(K)CStype',
       '(G)controlled_stimulus_secondscomputer',
       '(H)controlled_stimulus_minutescomputer',
       '(I)controlled_stimulus_hourscomputer', '(B)port_entry_secondscomputer',
       '(F)port_entry_minutescomputer', '(J)port_entry_hourscomputer', 'date',
       'subject', 'file_path', 'cs_second', 'cs_minute', 'cs_hour',
       'cs_time_str', 'cs_time_object'],
      dtype='object')

- Making the numbers for hours, minutes, and seconds into whole numbers

In [27]:
cs_time_df["pe_second"] = cs_time_df["(B)port_entry_secondscomputer"].astype(int).astype(str)
cs_time_df["pe_minute"] = cs_time_df["(F)port_entry_minutescomputer"].astype(int).astype(str)
cs_time_df["pe_hour"] = cs_time_df["(J)port_entry_hourscomputer"].astype(int).astype(str)


- Combining the hours, minutes, and seconds into one column as a string

In [28]:
cs_time_df["pe_time_str"] = cs_time_df.apply(lambda x: ":".join([x["pe_hour"], x["pe_minute"],  x["pe_second"]]), axis=1)

- Converting the time into a Python datetime object
    - This will allow us to make calculations

In [29]:
cs_time_df["pe_time_object"] = cs_time_df["pe_time_str"].apply(lambda x: datetime.strptime(x, '%H:%M:%S'))


In [74]:
cs_time_df["pe_time_object"]

0   1900-01-01 20:04:28
1   1900-01-01 20:05:04
2   1900-01-01 20:06:19
3   1900-01-01 20:07:31
4   1900-01-01 20:07:44
5   1900-01-01 20:08:20
6   1900-01-01 20:08:42
7   1900-01-01 20:08:46
8   1900-01-01 20:08:50
9   1900-01-01 20:08:54
Name: pe_time_object, dtype: datetime64[ns]

In [None]:
cs_time_df["pe_time_object"]

In [30]:
cs_time_df

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(G)controlled_stimulus_secondscomputer,(H)controlled_stimulus_minutescomputer,(I)controlled_stimulus_hourscomputer,...,cs_second,cs_minute,cs_hour,cs_time_str,cs_time_object,pe_second,pe_minute,pe_hour,pe_time_str,pe_time_object
0,75.47,64.0,399.0,0.0,60.01,76.09,1.0,13.0,4.0,20.0,...,13,4,20,20:4:13,1900-01-01 20:04:13,28,4,20,20:4:28,1900-01-01 20:04:28
1,110.92,144.0,399.0,0.0,140.01,111.35,1.0,33.0,5.0,20.0,...,33,5,20,20:5:33,1900-01-01 20:05:33,4,5,20,20:5:4,1900-01-01 20:05:04
2,185.83,234.0,399.0,0.0,230.01,186.37,1.0,3.0,7.0,20.0,...,3,7,20,20:7:3,1900-01-01 20:07:03,19,6,20,20:6:19,1900-01-01 20:06:19
3,257.49,314.0,399.0,0.0,310.01,258.08,1.0,23.0,8.0,20.0,...,23,8,20,20:8:23,1900-01-01 20:08:23,31,7,20,20:7:31,1900-01-01 20:07:31
4,270.67,,,0.0,8000.0,271.5,0.0,0.0,0.0,0.0,...,0,0,0,0:0:0,1900-01-01 00:00:00,44,7,20,20:7:44,1900-01-01 20:07:44
5,307.1,,,0.0,9000.0,307.91,0.0,0.0,0.0,0.0,...,0,0,0,0:0:0,1900-01-01 00:00:00,20,8,20,20:8:20,1900-01-01 20:08:20
6,329.32,,,0.0,8000.0,330.08,0.0,0.0,0.0,0.0,...,0,0,0,0:0:0,1900-01-01 00:00:00,42,8,20,20:8:42,1900-01-01 20:08:42
7,332.74,,,0.0,7500.0,333.61,0.0,0.0,0.0,0.0,...,0,0,0,0:0:0,1900-01-01 00:00:00,46,8,20,20:8:46,1900-01-01 20:08:46
8,337.1,,,0.0,10000.0,337.62,0.0,0.0,0.0,0.0,...,0,0,0,0:0:0,1900-01-01 00:00:00,50,8,20,20:8:50,1900-01-01 20:08:50
9,341.29,,,0.0,9500.0,341.97,0.0,0.0,0.0,0.0,...,0,0,0,0:0:0,1900-01-01 00:00:00,54,8,20,20:8:54,1900-01-01 20:08:54


# Importing Video Data

- These CSVs are create in Bonsai when the video is being recorded. They give a time stamp of when the video is first being recorded.

In [31]:
video_recording_time_stamps = pd.read_csv("./data/automatic_30_min_end_test/clock_test_time_2022-07-13T20_02_37.csv", header=None)

In [32]:
video_recording_time_stamps.head()

Unnamed: 0,0,1
0,(7/14/2022 12:02:39 AM,693); 2022-07-13 20:02:39.361000
1,(7/14/2022 12:02:39 AM,740); 2022-07-13 20:02:39.408000
2,(7/14/2022 12:02:39 AM,789); 2022-07-13 20:02:39.456000
3,(7/14/2022 12:02:39 AM,853); 2022-07-13 20:02:39.520000
4,(7/14/2022 12:02:39 AM,900); 2022-07-13 20:02:39.567000


- The video is first being recorded in the time specified by the cell in the first row and the first column
    - We will try to get on the hour, minute, seconds from this
        - By getting everthing after the space, and then removing everything before the period

In [33]:
video_recording_time_stamps["time"] = video_recording_time_stamps[1].apply(lambda x: x.split()[-1].split(".")[0])


In [34]:
video_recording_time_stamps.head()

Unnamed: 0,0,1,time
0,(7/14/2022 12:02:39 AM,693); 2022-07-13 20:02:39.361000,20:02:39
1,(7/14/2022 12:02:39 AM,740); 2022-07-13 20:02:39.408000,20:02:39
2,(7/14/2022 12:02:39 AM,789); 2022-07-13 20:02:39.456000,20:02:39
3,(7/14/2022 12:02:39 AM,853); 2022-07-13 20:02:39.520000,20:02:39
4,(7/14/2022 12:02:39 AM,900); 2022-07-13 20:02:39.567000,20:02:39


- Getting the first time that's listed, which is when the video starts being recorded

In [35]:
video_recording_start_time_str = video_recording_time_stamps["time"][0]


In [36]:
video_recording_start_time_str

'20:02:39'

- Converting this into a datetime object so that we can make calculations

In [37]:
video_recording_start_time_object = datetime.strptime(video_recording_start_time_str, '%H:%M:%S')


In [38]:
video_recording_start_time_object

datetime.datetime(1900, 1, 1, 20, 2, 39)

# Syncing the MED-PC timestamps with the time in the video

- To get when in the video something happens(i.e. 10 seconds), we will calculate the difference of the time of the event with the time that the video first starts being recorded
- NOTE: Make sure that you are using the associated files for the MED-PC recording file and the video file

In [39]:
first_medpc_recording_file = cs_time_df["file_path"].unique()[0]

first_cs_time_df = cs_time_df[cs_time_df["file_path"] == first_medpc_recording_file]

In [40]:
# The time of the first controlled stimulus 
first_cs_time_df["cs_time_object"].iloc[0]

Timestamp('1900-01-01 20:04:13')

In [41]:
# The time of the second controlled stiumuls
first_cs_time_df["cs_time_object"].iloc[1]

Timestamp('1900-01-01 20:05:33')

In [42]:
# The time that the video started being recorded
video_recording_start_time_object

datetime.datetime(1900, 1, 1, 20, 2, 39)

In [43]:
# The time in the video that the controlled stimulus occurs
cs_01_time_object = first_cs_time_df["cs_time_object"].iloc[0] - video_recording_start_time_object 
cs_02_time_object = first_cs_time_df["cs_time_object"].iloc[1] - video_recording_start_time_object 
cs_03_time_object = first_cs_time_df["cs_time_object"].iloc[2] - video_recording_start_time_object 

In [44]:
cs_01_time_object.seconds

94

In [45]:
cs_02_time_object.seconds

174

In [46]:
cs_03_time_object.seconds

264

# Syncing the MED-PC Port Entires with the time in the video

- To get when in the video something happens(i.e. 10 seconds), we will calculate the difference of the time of the event with the time that the video first starts being recorded
- NOTE: Make sure that you are using the associated files for the MED-PC recording file and the video file

In [47]:
first_cs_time_df["(F)port_entry_minutescomputer"].head(n=50)

0    4.0
1    5.0
2    6.0
3    7.0
4    7.0
5    8.0
6    8.0
7    8.0
8    8.0
9    8.0
Name: (F)port_entry_minutescomputer, dtype: float64

In [48]:
first_cs_time_df["(F)port_entry_minutescomputer"].tail(n=50)

0    4.0
1    5.0
2    6.0
3    7.0
4    7.0
5    8.0
6    8.0
7    8.0
8    8.0
9    8.0
Name: (F)port_entry_minutescomputer, dtype: float64

In [49]:
first_cs_time_df["(B)port_entry_secondscomputer"].head(n=50)

0    28.0
1     4.0
2    19.0
3    31.0
4    44.0
5    20.0
6    42.0
7    46.0
8    50.0
9    54.0
Name: (B)port_entry_secondscomputer, dtype: float64

In [50]:
first_cs_time_df["(B)port_entry_secondscomputer"].tail(n=50)

0    28.0
1     4.0
2    19.0
3    31.0
4    44.0
5    20.0
6    42.0
7    46.0
8    50.0
9    54.0
Name: (B)port_entry_secondscomputer, dtype: float64

In [51]:
# The time in the video that the controlled stimulus occurs
port_entry_01_time_object = first_cs_time_df["pe_time_object"].iloc[0] - video_recording_start_time_object 
port_entry_02_time_object = first_cs_time_df["pe_time_object"].iloc[5] - video_recording_start_time_object 
port_entry_03_time_object = first_cs_time_df["pe_time_object"].iloc[8] - video_recording_start_time_object 

In [52]:
first_cs_time_df["pe_time_object"].iloc[8]

Timestamp('1900-01-01 20:08:50')

In [53]:
port_entry_01_time_object.seconds

109

In [54]:
port_entry_02_time_object.seconds

341

In [55]:
port_entry_03_time_object.seconds

371

# Trimming the video

In [56]:
video_object = VideoFileClip("./data/automatic_30_min_end_test/2clock_test_recording_2022-07-13T20_02_39.avi")

In [57]:
video_object.duration

385.57

fixed func issue with: https://github.com/Zulko/moviepy/issues/1765


In [58]:
cs1_trimmed_video = video_object.subclip(cs_01_time_object.seconds, cs_02_time_object.seconds + 20)

In [59]:
cs2_trimmed_video = video_object.subclip(cs_02_time_object.seconds, cs_03_time_object.seconds + 20)

In [60]:
cs3_trimmed_video = video_object.subclip(cs_03_time_object.seconds, video_object.duration)

In [61]:
pe1_trimmed_video = video_object.subclip(port_entry_01_time_object.seconds, port_entry_02_time_object.seconds + 20)

In [62]:
pe2_trimmed_video = video_object.subclip(port_entry_02_time_object.seconds, port_entry_03_time_object.seconds + 20)

In [63]:
pe3_trimmed_video = video_object.subclip(port_entry_03_time_object.seconds, video_object.duration)

In [64]:
cs1_trimmed_video.to_videofile("./data/automatic_30_min_end_test/trimmed_cs1.mp4")

Moviepy - Building video ./data/automatic_30_min_end_test/trimmed_cs1.mp4.
Moviepy - Writing video ./data/automatic_30_min_end_test/trimmed_cs1.mp4



                                                                                                                                       

Moviepy - Done !
Moviepy - video ready ./data/automatic_30_min_end_test/trimmed_cs1.mp4




In [65]:
cs2_trimmed_video.to_videofile("./data/automatic_30_min_end_test/trimmed_cs2.mp4")

Moviepy - Building video ./data/automatic_30_min_end_test/trimmed_cs2.mp4.
Moviepy - Writing video ./data/automatic_30_min_end_test/trimmed_cs2.mp4



                                                                                                                                       

Moviepy - Done !
Moviepy - video ready ./data/automatic_30_min_end_test/trimmed_cs2.mp4


In [66]:
cs3_trimmed_video.to_videofile("./data/automatic_30_min_end_test//trimmed_cs3.mp4")

Moviepy - Building video ./data/automatic_30_min_end_test//trimmed_cs3.mp4.
Moviepy - Writing video ./data/automatic_30_min_end_test//trimmed_cs3.mp4



                                                                                                                                       

Moviepy - Done !
Moviepy - video ready ./data/automatic_30_min_end_test//trimmed_cs3.mp4




In [67]:
pe1_trimmed_video = video_object.subclip(port_entry_01_time_object.seconds, port_entry_02_time_object.seconds + 20)

In [68]:
pe2_trimmed_video = video_object.subclip(port_entry_02_time_object.seconds, port_entry_03_time_object.seconds + 20)

In [69]:
pe3_trimmed_video = video_object.subclip(port_entry_03_time_object.seconds, video_object.duration)

In [70]:
pe1_trimmed_video.to_videofile("./data/automatic_30_min_end_test/trimmed_pe1.mp4")

Moviepy - Building video ./data/automatic_30_min_end_test/trimmed_pe1.mp4.
Moviepy - Writing video ./data/automatic_30_min_end_test/trimmed_pe1.mp4



                                                                                                                                       

Moviepy - Done !
Moviepy - video ready ./data/automatic_30_min_end_test/trimmed_pe1.mp4


In [71]:
pe2_trimmed_video.to_videofile("./data/automatic_30_min_end_test/trimmed_pe2.mp4")

Moviepy - Building video ./data/automatic_30_min_end_test/trimmed_pe2.mp4.
Moviepy - Writing video ./data/automatic_30_min_end_test/trimmed_pe2.mp4



                                                                                                                                       

Moviepy - Done !
Moviepy - video ready ./data/automatic_30_min_end_test/trimmed_pe2.mp4




In [72]:
pe3_trimmed_video.to_videofile("./data/automatic_30_min_end_test//trimmed_pe3.mp4")

Moviepy - Building video ./data/automatic_30_min_end_test//trimmed_pe3.mp4.
Moviepy - Writing video ./data/automatic_30_min_end_test//trimmed_pe3.mp4



                                                                                                                                       

Moviepy - Done !
Moviepy - video ready ./data/automatic_30_min_end_test//trimmed_pe3.mp4


In [73]:
Video("./data/automatic_30_min_end_test/trimmed_cs1.mp4")