# Test Playground: A place to try out our ideas in a notebook!

This is a place where we want to try out new ideas with test scripts for data analysis of FED-related data.

### Import libraries of interest

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
import os
pd.set_option('display.max_rows', None)


In [2]:
#############################################################################   
#####################################################
# FUNCTION TO READ AND SORT EACH MOUSE DATA, 
# AND TO GET ALL MICE IDS

def read_csv_files(file_path):
        # reads csv files and organizes them into dataframes
        
        all_dataframes = []
        # for all files in folder
        for file in os.listdir(file_path):
            if file.endswith(".CSV"):
                if file.startswith("FED"):
                    # read that file into a dataframe
                    file_path_df = os.path.join(file_path ,file)
                    df = pd.read_csv(file_path_df)
                    all_dataframes.append(df)
##################################################                    
        # create a single dataframe from all files
        singular_df = pd.concat(all_dataframes).sort_values(by=['MM:DD:YYYY hh:mm:ss'])
        return singular_df

In [3]:
concat_df = read_csv_files('/Users/kevinmcpherson/github-projects/fed-scripts/data/FED001')

In [4]:
start_time = input("Enter the date and start time (YYYY-MM-DD hh:mm:ss, e.g., 2023-05-01 10:00:00): ")
start_time = dt.strptime(start_time, '%Y-%m-%d %H:%M:%S')
print(start_time)

2022-06-16 12:00:00


### Import a sample piece of data

In [5]:
# # Import the data, or read from Dropbox
# df = pd.read_csv('~/github-projects/fed-scripts/data/FED001_061622_00.CSV')
# # df = df.concat('','','')
# df

### Make time column datetime object

In [6]:
concat_df['MM:DD:YYYY hh:mm:ss'] = pd.to_datetime(concat_df['MM:DD:YYYY hh:mm:ss'])
concat_df['Time Delta'] = concat_df['MM:DD:YYYY hh:mm:ss'] - start_time
concat_df.head()

Unnamed: 0,MM:DD:YYYY hh:mm:ss,Library_Version,Session_type,Device_Number,Battery_Voltage,Motor_Turns,FR,Event,Active_Poke,Left_Poke_Count,Right_Poke_Count,Pellet_Count,Block_Pellet_Count,Retrieval_Time,InterPelletInterval,Poke_Time,Time Delta
0,2022-06-16 14:14:23,1.12.0,ClosedEcon,13,4.01,,1,Right,Left,0,1,0,0,,,0.22,0 days 02:14:23
1,2022-06-16 14:14:41,1.12.0,ClosedEcon,13,4.01,,1,Left,Left,1,1,0,0,,,0.1,0 days 02:14:41
2,2022-06-16 14:14:49,1.12.0,ClosedEcon,13,4.01,1.0,1,Pellet,Left,1,1,1,1,6.91,,,0 days 02:14:49
3,2022-06-16 15:27:35,1.12.0,ClosedEcon,13,4.0,,1,Left,Left,2,1,1,0,,,0.4,0 days 03:27:35
4,2022-06-16 15:27:36,1.12.0,ClosedEcon,13,4.0,1.0,1,Pellet,Left,2,1,2,1,0.52,4367.0,,0 days 03:27:36


### For now, find out where the file starts and ends

In [7]:

print('The file starts at', concat_df['MM:DD:YYYY hh:mm:ss'].iloc[0])
print('The file ends at', concat_df['MM:DD:YYYY hh:mm:ss'].iloc[-1])

print('The file starts at', concat_df['Time Delta'].iloc[0])
print('The file ends at', concat_df['Time Delta'].iloc[-1])

The file starts at 2022-06-16 14:14:23
The file ends at 2022-07-06 09:49:18
The file starts at 0 days 02:14:23
The file ends at 19 days 21:49:18


In [8]:
bin_ends = list(pd.timedelta_range(start='0 days 00:12:00', end='20 days 00:00:00', freq='24H'))
bin_ends_series = pd.Series(bin_ends)

In [9]:
concat_df.head()

Unnamed: 0,MM:DD:YYYY hh:mm:ss,Library_Version,Session_type,Device_Number,Battery_Voltage,Motor_Turns,FR,Event,Active_Poke,Left_Poke_Count,Right_Poke_Count,Pellet_Count,Block_Pellet_Count,Retrieval_Time,InterPelletInterval,Poke_Time,Time Delta
0,2022-06-16 14:14:23,1.12.0,ClosedEcon,13,4.01,,1,Right,Left,0,1,0,0,,,0.22,0 days 02:14:23
1,2022-06-16 14:14:41,1.12.0,ClosedEcon,13,4.01,,1,Left,Left,1,1,0,0,,,0.1,0 days 02:14:41
2,2022-06-16 14:14:49,1.12.0,ClosedEcon,13,4.01,1.0,1,Pellet,Left,1,1,1,1,6.91,,,0 days 02:14:49
3,2022-06-16 15:27:35,1.12.0,ClosedEcon,13,4.0,,1,Left,Left,2,1,1,0,,,0.4,0 days 03:27:35
4,2022-06-16 15:27:36,1.12.0,ClosedEcon,13,4.0,1.0,1,Pellet,Left,2,1,2,1,0.52,4367.0,,0 days 03:27:36


In [48]:
def convert_to_hours(delta):
    total_seconds = delta.total_seconds()
    hours = str(int(total_seconds // 3600)).zfill(2)
    minutes= str(int((total_seconds % 3600) // 60)).zfill(2)
    seconds = str(int(total_seconds % 60)).zfill(2)
    return f"{hours}:{minutes}:{seconds}"

def extract_hours(time_delta):
    hours_int = int(time_delta.split(':')[0])
    return hours_int

In [49]:
concat_df['Time Delta Reformatted'] = concat_df['Time Delta'].apply(convert_to_hours)
concat_df['Hours Since Start'] = concat_df['Time Delta Reformatted'].apply(extract_hours)
concat_df.head()

Unnamed: 0,MM:DD:YYYY hh:mm:ss,Library_Version,Session_type,Device_Number,Battery_Voltage,Motor_Turns,FR,Event,Active_Poke,Left_Poke_Count,Right_Poke_Count,Pellet_Count,Block_Pellet_Count,Retrieval_Time,InterPelletInterval,Poke_Time,Time Delta,Time Delta Reformatted,Hours Since Start
0,2022-06-16 14:14:23,1.12.0,ClosedEcon,13,4.01,,1,Right,Left,0,1,0,0,,,0.22,0 days 02:14:23,02:14:23,2
1,2022-06-16 14:14:41,1.12.0,ClosedEcon,13,4.01,,1,Left,Left,1,1,0,0,,,0.1,0 days 02:14:41,02:14:41,2
2,2022-06-16 14:14:49,1.12.0,ClosedEcon,13,4.01,1.0,1,Pellet,Left,1,1,1,1,6.91,,,0 days 02:14:49,02:14:49,2
3,2022-06-16 15:27:35,1.12.0,ClosedEcon,13,4.0,,1,Left,Left,2,1,1,0,,,0.4,0 days 03:27:35,03:27:35,3
4,2022-06-16 15:27:36,1.12.0,ClosedEcon,13,4.0,1.0,1,Pellet,Left,2,1,2,1,0.52,4367.0,,0 days 03:27:36,03:27:36,3


In [71]:
# def create_bins(time_period_var, time_delta_series):
#     
#     hour_end = int(time_delta_series.loc[-1].split(':')[0])
#     for i in range(0, hour_end, time_period_var):


bins = [i for i in range(0,int(concat_df['Time Delta Reformatted'].iloc[-1].split(':')[0])+24,24)]

labels = [f'{i}-{i+24}hrs' for i in range(0,int(concat_df['Time Delta Reformatted'].iloc[-1].split(':')[0]),24)]

concat_df['Bin'] = pd.cut(concat_df['Hours Since Start'], bins, labels=labels, right=False)

In [72]:
concat_df.head()

Unnamed: 0,MM:DD:YYYY hh:mm:ss,Library_Version,Session_type,Device_Number,Battery_Voltage,Motor_Turns,FR,Event,Active_Poke,Left_Poke_Count,Right_Poke_Count,Pellet_Count,Block_Pellet_Count,Retrieval_Time,InterPelletInterval,Poke_Time,Time Delta,Time Delta Reformatted,Hours Since Start,Bin
0,2022-06-16 14:14:23,1.12.0,ClosedEcon,13,4.01,,1,Right,Left,0,1,0,0,,,0.22,0 days 02:14:23,02:14:23,2,0-24hrs
1,2022-06-16 14:14:41,1.12.0,ClosedEcon,13,4.01,,1,Left,Left,1,1,0,0,,,0.1,0 days 02:14:41,02:14:41,2,0-24hrs
2,2022-06-16 14:14:49,1.12.0,ClosedEcon,13,4.01,1.0,1,Pellet,Left,1,1,1,1,6.91,,,0 days 02:14:49,02:14:49,2,0-24hrs
3,2022-06-16 15:27:35,1.12.0,ClosedEcon,13,4.0,,1,Left,Left,2,1,1,0,,,0.4,0 days 03:27:35,03:27:35,3,0-24hrs
4,2022-06-16 15:27:36,1.12.0,ClosedEcon,13,4.0,1.0,1,Pellet,Left,2,1,2,1,0.52,4367.0,,0 days 03:27:36,03:27:36,3,0-24hrs


### Make bins based on those times

In [26]:
# bins = [10, 11, 12, 13, 14]
# labels = ['hour 1', 'hour 2', 'hour 3', 'hour 4']
# events = ['Left', 'Right', 'Pellet', 'LeftWithPellet', 'RightWithPellet']



### Aggregate based on the results

In [73]:
concat_df.loc[(concat_df['Event'] == 'Right') | (concat_df['Event'] == 'Left') | (concat_df['Event'] == 'Pellet') | (concat_df['Event']=='LeftWithPellet') | (concat_df['Event']=='RightWithPellet'), 'Event Binary'] = 1   

In [75]:
nt_df = pd.DataFrame(concat_df[['Bin', 'Event', 'Event Binary']].groupby(['Bin','Event']).sum())
nt_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Event Binary
Bin,Event,Unnamed: 2_level_1
0-24hrs,Left,1642.0
0-24hrs,LeftWithPellet,1.0
0-24hrs,Pellet,134.0
0-24hrs,Right,55.0
0-24hrs,RightWithPellet,0.0
24-48hrs,Left,2397.0
24-48hrs,LeftWithPellet,0.0
24-48hrs,Pellet,176.0
24-48hrs,Right,8.0
24-48hrs,RightWithPellet,0.0


In [77]:
tdf = nt_df.groupby(['Bin','Event'])['Event Binary'].first().unstack(fill_value=1).rename_axis(None)
tdf['Pokes Per Pellet'] = tdf['Left']/tdf['Pellet']
tdf['Pellet (kcal)'] = tdf['Pellet']*0.066

tdf

Event,Left,LeftWithPellet,Pellet,Right,RightWithPellet,Pokes Per Pellet,Pellet (kcal)
0-24hrs,1642.0,1.0,134.0,55.0,0.0,12.253731,8.844
24-48hrs,2397.0,0.0,176.0,8.0,0.0,13.619318,11.616
48-72hrs,1214.0,1.0,176.0,14.0,0.0,6.897727,11.616
72-96hrs,1343.0,0.0,152.0,22.0,0.0,8.835526,10.032
96-120hrs,982.0,0.0,155.0,11.0,1.0,6.335484,10.23
120-144hrs,1098.0,0.0,149.0,5.0,0.0,7.369128,9.834
144-168hrs,1085.0,1.0,131.0,10.0,0.0,8.282443,8.646
168-192hrs,711.0,340.0,129.0,10.0,0.0,5.511628,8.514
192-216hrs,44.0,4.0,15.0,5.0,2.0,2.933333,0.99
216-240hrs,1.0,0.0,1.0,1.0,0.0,1.0,0.066


In [32]:
# Plus 1 of whatever the start time is as a label
# Left, right, pellet, leftwithpellet, rightwithpellet