# Test Playground: A place to try out our ideas in a notebook!

This is a place where we want to try out new ideas with test scripts for data analysis of FED-related data.

### Import libraries of interest

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime as dt

In [2]:
start_time = input("Enter the date and start time (YYYY-MM-DD hh:mm:ss, e.g., 2023-05-01 10:00:00): ")
start_time = dt.strptime(start_time, '%Y-%m-%d %H:%M:%S')
print(start_time)

2022-06-16 10:00:00


### Import a sample piece of data

In [3]:
# Import the data, or read from Dropbox
df = pd.read_csv('~/github-projects/fed-scripts/data/FED001_061622_00.CSV')
# df = df.concat('','','')
df

Unnamed: 0,MM:DD:YYYY hh:mm:ss,Library_Version,Session_type,Device_Number,Battery_Voltage,Motor_Turns,FR,Event,Active_Poke,Left_Poke_Count,Right_Poke_Count,Pellet_Count,Block_Pellet_Count,Retrieval_Time,InterPelletInterval,Poke_Time
0,6/16/2022 10:38:41,1.12.0,FR1_2s_delay,13,4.02,,1,Right,Left,0,1,0,0,,,0.37
1,6/16/2022 10:38:43,1.12.0,FR1_2s_delay,13,4.02,,1,Right,Left,0,2,0,0,,,0.26
2,6/16/2022 10:40:22,1.12.0,FR1_2s_delay,13,4.01,,1,Left,Left,1,2,0,0,,,0.14
3,6/16/2022 10:40:31,1.12.0,FR1_2s_delay,13,4.02,3.0,1,Pellet,Left,1,2,1,0,1.56,,
4,6/16/2022 10:40:56,1.12.0,FR1_2s_delay,13,4.02,,1,Left,Left,2,2,1,0,,,0.1
5,6/16/2022 10:41:01,1.12.0,FR1_2s_delay,13,4.02,1.0,1,Pellet,Left,2,2,2,0,2.06,30.0,
6,6/16/2022 10:50:02,1.12.0,FR1_2s_delay,13,4.02,,1,Left,Left,3,2,2,0,,,0.48
7,6/16/2022 10:50:05,1.12.0,FR1_2s_delay,13,4.02,1.0,1,Pellet,Left,3,2,3,0,0.52,544.0,
8,6/16/2022 12:00:39,1.12.0,FR1_2s_delay,13,4.02,,1,Left,Left,4,2,3,0,,,0.15
9,6/16/2022 12:00:42,1.12.0,FR1_2s_delay,13,4.01,1.0,1,Pellet,Left,4,2,4,0,0.58,4237.0,


### Make time column datetime object

In [77]:
df['MM:DD:YYYY hh:mm:ss'] = pd.to_datetime(df['MM:DD:YYYY hh:mm:ss'])
df['Time Delta'] = df['MM:DD:YYYY hh:mm:ss'] - start_time
df.head()

Unnamed: 0,MM:DD:YYYY hh:mm:ss,Library_Version,Session_type,Device_Number,Battery_Voltage,Motor_Turns,FR,Event,Active_Poke,Left_Poke_Count,Right_Poke_Count,Pellet_Count,Block_Pellet_Count,Retrieval_Time,InterPelletInterval,Poke_Time,Time Delta
0,2022-06-16 10:38:41,1.12.0,FR1_2s_delay,13,4.02,,1,Right,Left,0,1,0,0,,,0.37,0 days 00:38:41
1,2022-06-16 10:38:43,1.12.0,FR1_2s_delay,13,4.02,,1,Right,Left,0,2,0,0,,,0.26,0 days 00:38:43
2,2022-06-16 10:40:22,1.12.0,FR1_2s_delay,13,4.01,,1,Left,Left,1,2,0,0,,,0.14,0 days 00:40:22
3,2022-06-16 10:40:31,1.12.0,FR1_2s_delay,13,4.02,3.0,1,Pellet,Left,1,2,1,0,1.56,,,0 days 00:40:31
4,2022-06-16 10:40:56,1.12.0,FR1_2s_delay,13,4.02,,1,Left,Left,2,2,1,0,,,0.1,0 days 00:40:56


### For now, find out where the file starts and ends

In [15]:

print('The file starts at', df['MM:DD:YYYY hh:mm:ss'][0])
print('The file ends at', df['MM:DD:YYYY hh:mm:ss'].iloc[-1])

The file starts at 2022-06-16 10:38:41
The file ends at 2022-06-16 13:56:04


### Make bins based on those times

In [16]:
bins = [10, 11, 12, 13, 14]
labels = ['hour 1', 'hour 2', 'hour 3', 'hour 4']

In [17]:
df['Time Bin'] = pd.cut(df['MM:DD:YYYY hh:mm:ss'].dt.hour, bins, labels=labels, right=False)

In [18]:
df.head()

Unnamed: 0,MM:DD:YYYY hh:mm:ss,Library_Version,Session_type,Device_Number,Battery_Voltage,Motor_Turns,FR,Event,Active_Poke,Left_Poke_Count,Right_Poke_Count,Pellet_Count,Block_Pellet_Count,Retrieval_Time,InterPelletInterval,Poke_Time,Time Bin
0,2022-06-16 10:38:41,1.12.0,FR1_2s_delay,13,4.02,,1,Right,Left,0,1,0,0,,,0.37,hour 1
1,2022-06-16 10:38:43,1.12.0,FR1_2s_delay,13,4.02,,1,Right,Left,0,2,0,0,,,0.26,hour 1
2,2022-06-16 10:40:22,1.12.0,FR1_2s_delay,13,4.01,,1,Left,Left,1,2,0,0,,,0.14,hour 1
3,2022-06-16 10:40:31,1.12.0,FR1_2s_delay,13,4.02,3.0,1,Pellet,Left,1,2,1,0,1.56,,,hour 1
4,2022-06-16 10:40:56,1.12.0,FR1_2s_delay,13,4.02,,1,Left,Left,2,2,1,0,,,0.1,hour 1


### Aggregate based on the results

In [19]:
df.loc[(df['Event'] == 'Right') | (df['Event'] == 'Left') | (df['Event'] == 'Pellet'), 'Event Binary'] = 1   

In [44]:
nt_df = pd.DataFrame(df[['Time Bin', 'Event', 'Event Binary']].groupby(['Time Bin','Event']).sum())
nt_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Event Binary
Time Bin,Event,Unnamed: 2_level_1
hour 1,Left,3.0
hour 1,Pellet,3.0
hour 1,Right,2.0
hour 2,Left,0.0
hour 2,Pellet,0.0
hour 2,Right,0.0
hour 3,Left,9.0
hour 3,Pellet,9.0
hour 3,Right,0.0
hour 4,Left,5.0


In [46]:
tdf = nt_df.groupby(['Time Bin','Event'])['Event Binary'].first().unstack(fill_value=1).rename_axis(None)
tdf['Pokes Per Pellet'] = tdf['Left']/tdf['Pellet']
tdf['Chow'] = tdf['Pellet']*0.066
tdf

Event,Left,Pellet,Right,Pokes Per Pellet,Chow
hour 1,3.0,3.0,2.0,1.0,0.198
hour 2,0.0,0.0,0.0,,0.0
hour 3,9.0,9.0,0.0,1.0,0.594
hour 4,5.0,5.0,2.0,1.0,0.33


In [None]:
        # create separate dataframe for each mouse
        # (all original columns)
        by_mouse_df_list = []
        
        # find unique mouse indexes
        mice_indexes = pd.unique(self.main_df[' Mouse'])
        # split main dataframe into single dataframe per mouse
        for index in mice_indexes:
            single_mouse_df = self.main_df[self.main_df[' Mouse']==index]
            by_mouse_df_list.append(single_mouse_df)
            
########################################################################        
        # list of dataframes by mouse (only given dates)
        # (only sorted timestamps, mouse index, pellet count, motorturn count)
        self.mouse_df_list = []
########################################################################
            
        # make sure all dates are sorted:
        for i in range(len(by_mouse_df_list)):
            # count how many rows are there
            # that is equal to the total pellet count
            total_pellet_count = by_mouse_df_list[i].shape[0]
            # create consecutive pellet count values
            total_pellet_count_list = [i+1 for i in range(total_pellet_count)]
            # convert dates to pandas datetime
            ts_list = pd.to_datetime(by_mouse_df_list[i]['MM:DD:YYYY hh:mm:ss']).tolist()
            # create new dataframe
            new_df = pd.DataFrame({"MM:DD:YYYY hh:mm:ss" :ts_list,
                                   "Mouse" : by_mouse_df_list[i][' Mouse'].tolist(),
                                   "PelletCount" : total_pellet_count_list,
                                   "MotorTurns" : by_mouse_df_list[i][' MotorTurns'].tolist()})
            # make timestamps indexes
            new_df.index = new_df['MM:DD:YYYY hh:mm:ss']
            # remove old column
            del new_df['MM:DD:YYYY hh:mm:ss']
            # sort dates
            new_df = new_df.sort_index()
            
            # select only user defined timeframe
            # https://pandas.pydata.org/pandas-docs/stable/timeseries.html
            new_df = new_df[self.my_start_date:self.my_end_date]
            # replace pellet count with new consecutive pellet count for that dates
            new_df['PelletCount'] = [i+1 for i in range(new_df.shape[0])]
            if new_df.shape[0] != 0:
                self.mouse_df_list.append(new_df)
            else:
                # if for a mouse, there is no data within given dates
#                my_start_year,my_start_month,my_start_day = self.my_start_date.split('-')
#                my_end_year,my_end_month,my_end_day = self.my_end_date.split('-')
                # create dataframe with all zero values
                start = datetime.datetime.strptime(self.my_start_date, "%Y-%m-%d %H:%M:%S")
                end = datetime.datetime.strptime(self.my_end_date, "%Y-%m-%d %H:%M:%S")
                new_df = pd.DataFrame({"MM:DD:YYYY hh:mm:ss" :[start,end],
                                       "Mouse" : [by_mouse_df_list[i][' Mouse'].iloc[0], by_mouse_df_list[i][' Mouse'].iloc[0]],
                                       "PelletCount" : [0,0],
                                       "MotorTurns" : [0,0]})
                new_df.index = new_df['MM:DD:YYYY hh:mm:ss']
                del new_df['MM:DD:YYYY hh:mm:ss']
                new_df = new_df.sort_index()
                self.mouse_df_list.append(new_df)

        # check if there was any data    
        if len(self.mouse_df_list) == 0:
            messagebox.showwarning(
                "Warning!",
                "No data for given dates!"
                )
            return 0

            
        # get all mice ids from dataframes
        self.mice_ids_list = []
        for i in range(len(self.mouse_df_list)):
            mouse_id = self.mouse_df_list[i]['Mouse'].iloc[0]
            if mouse_id not in self.mice_ids_list:
                self.mice_ids_list.append(mouse_id)
                
                     
        return 1   
    
    def include_selected_mice(self):
        included_mice_df = []
        for i in range(len(self.mouse_df_list)):
            # get mouse id from the dataframe
            mouse_id = self.mouse_df_list[i]['Mouse'].iloc[0]
            # check if that is was selected by user
            if mouse_id in self.retrieved_id_ints:
                included_mice_df.append(self.mouse_df_list[i])
        # make new list of dataframes only with selected mice a main source of data
        self.mouse_df_list = included_mice_df