In [1]:
#%%
# This is where I might put helper functions and scripts
import csv
from datetime import datetime
import numpy as np
import trompy as tp

def get_FEDevents(filename, eventname):
    
    formats = ['%Y-%m-%d %H:%M:%S', '%m/%d/%Y %H:%M:%S']

    file = open (filename)
    csvreader= csv.reader(file)
    next(csvreader)
    rows= []
    for row in csvreader:
        rows.append(row)

    # works out which format should be used


    
    try:
        tmp_date_time_obj = datetime.strptime(rows[0][0], formats[0])
        date_format = formats[0]
    except ValueError:
        tmp_date_time_obj = datetime.strptime(rows[0][0], formats[1])
        date_format = formats[1]

    timestamps = []
    for row in rows: 
        if row[9] == eventname:
            date_time_obj = datetime.strptime(row [0], date_format)
            timestamps.append (date_time_obj)
           
    t0 = rows[0] [0] 
    day = t0.split()[0] 
    lightson = day + " 07:00:00" 
    refpoint = datetime.strptime(lightson, date_format)
    
    pellettimes = []
    for t in timestamps:
        Deltat = t-refpoint
        Deltatinseconds = Deltat.total_seconds()
        Deltatinhours = Deltatinseconds/3600
        pellettimes.append(Deltatinhours)

    return pellettimes

def get_data_subset(dictionary, selectors, verbose=True):

    output_dictionary = dictionary.copy()
    for key, value in selectors.items():
        for mouse_id in dictionary.keys():
            try:
                if output_dictionary[mouse_id][key] != value:
                    output_dictionary.pop(mouse_id)
            except KeyError: pass

    if verbose:
        print("{} items in output dictionary".format(len(output_dictionary.keys())))
    
    return output_dictionary

def get_data_fields(dictionary, fields, selectors):

    output_list = []
    reduced_dictionary = get_data_subset(dictionary, selectors)
    
    if len(reduced_dictionary.keys()) > 0:

        for field in fields:
            output_sublist =[]
            try:
                for key in reduced_dictionary.keys():
                    output_sublist.append(reduced_dictionary[key][field])
            except KeyError:
                print("{} is not a key in selected dictionary".format(field))
                return
            output_list.append(output_sublist)
    else:
        print("No data in fields in selected dictionary")

    if len(output_list) == 1:
        output_list = output_list[0]
        
    return output_list

def get_intermealinterval (pellettimes):
    IPIs = np.diff(pellettimes)
    IMI= np.mean([x for x in IPIs if x > (1/60)])
    return IMI
#%%

# function to get timestamps from fed csv files
metafile = "/workspaces/FEDPROFERENCE/FEDXD_METAFILE.xls"
rows, header = tp.metafilereader(metafile, sheetname="METAFILE")

mice = {}
for row in rows:
    mouse_id = row[0]
    if mouse_id not in mice.keys():
        mice[mouse_id] = {}
        mice[mouse_id]["SEX"] = row[1]
        mice[mouse_id]["CHOICE_SESSION"] = row[4]
        mice[mouse_id]["FED_PELLET"] = row[5]
        mice[mouse_id]["MODE"] = row[6]
        mice[mouse_id]["DIET"] = row[7]

for key in mice.keys():
    for row in rows:
        if row[0] == key and row[6] == "FR":
            filename = "/workspaces/FEDPROFERENCE/data/{}".format(row[3])
            if row[5] == "CAS20":
                mice[key]["CAS20_timestamps"] = get_FEDevents(filename, "Pellet")
            elif row[7] == "PR":
                mice[key]["pr_timestamps"] = get_FEDevents(filename, "Pellet")
            elif row[7] == "NR":
                mice[key]["nr_timestamps"] = get_FEDevents(filename, "Pellet")
            else:
                print(row[27], "is not a valid type of pellet for", key)






# %%
# to get average pellets per day
for key in mice.keys():
    mice[key]["grain_avg_pellets"] = len(mice[key]["grain_timestamps"]) / 3
    mice[key]["pr_avg_pellets"] = len(mice[key]["pr_timestamps"]) / 7
    mice[key]["nr_avg_pellets"] = len(mice[key]["nr_timestamps"]) / 7

# %%
# 
def get_pellets_per_day(timestamps, start_time=4, days=7):
    pellets_per_day = []
    for day in range(days):
        pellets = [t for t in timestamps if (t>day*24) and (t<(day+1)*24)]
        n_pellets = len(pellets)
        pellets_per_day.append(n_pellets)

    return pellets_per_day

for key in mice.keys():
    mice[key]["grain_pellets_per_day"] = get_pellets_per_day(mice[key]["grain_timestamps"], days=3)
    mice[key]["pr_pellets_per_day"] = get_pellets_per_day(mice[key]["pr_timestamps"])
    mice[key]["nr_pellets_per_day"] = get_pellets_per_day(mice[key]["nr_timestamps"])

# %%
# assemble pellets per day for whole timecourse
for key in mice.keys():
    if mice[key]["order"] == 2:
        mice[key]["all_pellets_per_day"] = mice[key]["grain_pellets_per_day"] + \
            mice[key]["pr_pellets_per_day"] + mice[key]["nr_pellets_per_day"]
    else:
                mice[key]["all_pellets_per_day"] = mice[key]["grain_pellets_per_day"] + \
                    mice[key]["nr_pellets_per_day"] + mice[key]["pr_pellets_per_day"]



# %%
# add meal parameters to dictionary

def get_interpellet_intervals(pellettimes):
    IPIs = np.diff(pellettimes)
    return IPIs

def get_intermealinterval (pellettimes):
    IPIs = np.diff(pellettimes)
    IMI= np.mean([x for x in IPIs if x > (1/60)])
    return IMI

def get_mealsize(pellettimes):
    """
    calculates meal size from times of pellets
    parameters 
    ----------
    pellettimes : list of floats
        timestamps of pellet deliveries

    returns
    --------
    mealsize : float 
        mean size of meal in pellets 
    """
    npellets = len(pellettimes)
    IPIs = np.diff(pellettimes)
    nmeals = len([idx for idx, val in enumerate(IPIs) if val > 1/60])
    mealsize = npellets/nmeals

    return mealsize

for key in mice.keys():
    pr_timestamps = mice[key]["pr_timestamps"]
    mice[key]["interpellet_intervals_pr"] = get_interpellet_intervals(pr_timestamps)
    mice[key]["intermeal_interval_pr"] = get_intermealinterval(pr_timestamps)
    mice[key]["mealsize_pr"] = get_mealsize(pr_timestamps)

    nr_timestamps = mice[key]["nr_timestamps"]
    mice[key]["interpellet_intervals_nr"] = get_interpellet_intervals(nr_timestamps)
    mice[key]["intermeal_interval_nr"] = get_intermealinterval(nr_timestamps)
    mice[key]["mealsize_nr"] = get_mealsize(nr_timestamps)
    
# %%




FileNotFoundError: [Errno 2] No such file or directory: '\\workspaces\\FEDPROFERENCE\\FEDXD_METAFILE.xls'

In [2]:
def get_FEDevents(filename, eventname):
    formats = ['%m/%d/%Y %H:%M:%S']  # Adjusted to the format in your CSV
    file = open(filename)
    csvreader = csv.reader(file)
    next(csvreader)  # Skip header
    rows = [row for row in csvreader]

    if not rows:  # Check if rows is empty
        print(f"No data in file {filename}.")
        return []

    # Assuming the first column contains the date and time info
    # and the "Event" column is consistent with the provided files
    event_column_index = 9  # Based on your CSV structure
    
    # Determine which date format should be used (assuming consistency, so just using one format)
    date_format = formats[0]

    timestamps = []
    for row in rows:
        if row[event_column_index] == eventname:
            date_time_obj = datetime.strptime(row[0], date_format)
            timestamps.append(date_time_obj)

    if not timestamps:  # If no events match, return an empty list
        return []

    # Calculate time since "lights on", using the first event as reference if needed
    t0 = timestamps[0].strftime(date_format)
    day = t0.split()[0]
    lightson = day + " 07:00:00"  # Assuming lights on at 7 AM
    refpoint = datetime.strptime(lightson, date_format)

    pellettimes = [(t - refpoint).total_seconds() / 3600 for t in timestamps]

    return pellettimes


In [3]:
#%%
# This is where I might put helper functions and scripts
import csv
from datetime import datetime
import numpy as np
import trompy as tp

def get_FEDevents(filename, eventname):
    formats = ['%m/%d/%Y %H:%M:%S']  # Adjusted to the format in your CSV
    file = open(filename)
    csvreader = csv.reader(file)
    next(csvreader)  # Skip header
    rows = [row for row in csvreader]

    if not rows:  # Check if rows is empty
        print(f"No data in file {filename}.")
        return []

    # Assuming the first column contains the date and time info
    # and the "Event" column is consistent with the provided files
    event_column_index = 9  # Based on your CSV structure
    
    # Determine which date format should be used (assuming consistency, so just using one format)
    date_format = formats[0]

    timestamps = []
    for row in rows:
        if row[event_column_index] == eventname:
            date_time_obj = datetime.strptime(row[0], date_format)
            timestamps.append(date_time_obj)

    if not timestamps:  # If no events match, return an empty list
        return []

    # Calculate time since "lights on", using the first event as reference if needed
    t0 = timestamps[0].strftime(date_format)
    day = t0.split()[0]
    lightson = day + " 07:00:00"  # Assuming lights on at 7 AM
    refpoint = datetime.strptime(lightson, date_format)

    pellettimes = [(t - refpoint).total_seconds() / 3600 for t in timestamps]

    return pellettimes


def get_data_subset(dictionary, selectors, verbose=True):

    output_dictionary = dictionary.copy()
    for key, value in selectors.items():
        for mouse_id in dictionary.keys():
            try:
                if output_dictionary[mouse_id][key] != value:
                    output_dictionary.pop(mouse_id)
            except KeyError: pass

    if verbose:
        print("{} items in output dictionary".format(len(output_dictionary.keys())))
    
    return output_dictionary

def get_data_fields(dictionary, fields, selectors):

    output_list = []
    reduced_dictionary = get_data_subset(dictionary, selectors)
    
    if len(reduced_dictionary.keys()) > 0:

        for field in fields:
            output_sublist =[]
            try:
                for key in reduced_dictionary.keys():
                    output_sublist.append(reduced_dictionary[key][field])
            except KeyError:
                print("{} is not a key in selected dictionary".format(field))
                return
            output_list.append(output_sublist)
    else:
        print("No data in fields in selected dictionary")

    if len(output_list) == 1:
        output_list = output_list[0]
        
    return output_list

def get_intermealinterval (pellettimes):
    IPIs = np.diff(pellettimes)
    IMI= np.mean([x for x in IPIs if x > (1/60)])
    return IMI
#%%

# function to get timestamps from fed csv files
metafile = "/workspaces/FEDPROFERENCE/FEDXD_METAFILE.xls"
rows, header = tp.metafilereader(metafile, sheetname="METAFILE")

mice = {}
for row in rows:
    mouse_id = row[0]
    if mouse_id not in mice.keys():
        mice[mouse_id] = {}
        mice[mouse_id]["SEX"] = row[1]
        mice[mouse_id]["CHOICE_SESSION"] = row[4]
        mice[mouse_id]["FED_PELLET"] = row[5]
        mice[mouse_id]["MODE"] = row[6]
        mice[mouse_id]["DIET"] = row[7]

for key in mice.keys():
    for row in rows:
        if row[0] == key and row[6] == "FR":
            filename = "/workspaces/FEDPROFERENCE/data/{}".format(row[3])
            if row[5] == "CAS20":
                mice[key]["CAS20_timestamps"] = get_FEDevents(filename, "Pellet")
            elif row[7] == "PR":
                mice[key]["pr_timestamps"] = get_FEDevents(filename, "Pellet")
            elif row[7] == "NR":
                mice[key]["nr_timestamps"] = get_FEDevents(filename, "Pellet")
            else:
                print(row[27], "is not a valid type of pellet for", key)






# %%
# to get average pellets per day
for key in mice.keys():
    mice[key]["grain_avg_pellets"] = len(mice[key]["grain_timestamps"]) / 3
    mice[key]["pr_avg_pellets"] = len(mice[key]["pr_timestamps"]) / 7
    mice[key]["nr_avg_pellets"] = len(mice[key]["nr_timestamps"]) / 7

# %%
# 
def get_pellets_per_day(timestamps, start_time=4, days=7):
    pellets_per_day = []
    for day in range(days):
        pellets = [t for t in timestamps if (t>day*24) and (t<(day+1)*24)]
        n_pellets = len(pellets)
        pellets_per_day.append(n_pellets)

    return pellets_per_day

for key in mice.keys():
    mice[key]["grain_pellets_per_day"] = get_pellets_per_day(mice[key]["grain_timestamps"], days=3)
    mice[key]["pr_pellets_per_day"] = get_pellets_per_day(mice[key]["pr_timestamps"])
    mice[key]["nr_pellets_per_day"] = get_pellets_per_day(mice[key]["nr_timestamps"])

# %%
# assemble pellets per day for whole timecourse
for key in mice.keys():
    if mice[key]["order"] == 2:
        mice[key]["all_pellets_per_day"] = mice[key]["grain_pellets_per_day"] + \
            mice[key]["pr_pellets_per_day"] + mice[key]["nr_pellets_per_day"]
    else:
                mice[key]["all_pellets_per_day"] = mice[key]["grain_pellets_per_day"] + \
                    mice[key]["nr_pellets_per_day"] + mice[key]["pr_pellets_per_day"]



# %%
# add meal parameters to dictionary

def get_interpellet_intervals(pellettimes):
    IPIs = np.diff(pellettimes)
    return IPIs

def get_intermealinterval (pellettimes):
    IPIs = np.diff(pellettimes)
    IMI= np.mean([x for x in IPIs if x > (1/60)])
    return IMI

def get_mealsize(pellettimes):
    """
    calculates meal size from times of pellets
    parameters 
    ----------
    pellettimes : list of floats
        timestamps of pellet deliveries

    returns
    --------
    mealsize : float 
        mean size of meal in pellets 
    """
    npellets = len(pellettimes)
    IPIs = np.diff(pellettimes)
    nmeals = len([idx for idx, val in enumerate(IPIs) if val > 1/60])
    mealsize = npellets/nmeals

    return mealsize

for key in mice.keys():
    pr_timestamps = mice[key]["pr_timestamps"]
    mice[key]["interpellet_intervals_pr"] = get_interpellet_intervals(pr_timestamps)
    mice[key]["intermeal_interval_pr"] = get_intermealinterval(pr_timestamps)
    mice[key]["mealsize_pr"] = get_mealsize(pr_timestamps)

    nr_timestamps = mice[key]["nr_timestamps"]
    mice[key]["interpellet_intervals_nr"] = get_interpellet_intervals(nr_timestamps)
    mice[key]["intermeal_interval_nr"] = get_intermealinterval(nr_timestamps)
    mice[key]["mealsize_nr"] = get_mealsize(nr_timestamps)
    
# %%




FileNotFoundError: [Errno 2] No such file or directory: '\\workspaces\\FEDPROFERENCE\\FEDXD_METAFILE.xls'

In [4]:
# Ensure trompy is correctly installed and imported, or use an alternative method to read the Excel file.

# Read the metafile (assuming tp.metafilereader is correctly implemented)
metafile_path = "/workspaces/FEDPROFERENCE/FEDXD_METAFILE.xls"  # Adjusted path to the uploaded file
rows, header = tp.metafilereader(metafile_path, sheetname="METAFILE")

mice = {}
# Iterate through each row in the metafile to populate the mice dictionary
for row in rows:
    mouse_id, choice_session = row[0], row[4]  # Assuming these are the correct indices for MOUSEID and CHOICE_SESSION
    if mouse_id not in mice:
        mice[mouse_id] = {"CHOICE_SESSION": choice_session, "events": []}  # Initialize dictionary

# Now, focus on processing FED event data for CHOICE_SESSION
for mouse_id, data in mice.items():
    choice_session = data["CHOICE_SESSION"]
    # Filter rows for the current mouse and its CHOICE_SESSION
    session_rows = [row for row in rows if row[0] == mouse_id and row[4] == choice_session]
    
    for row in session_rows:
        # Assuming row[3] is the filename/path for the FED event data
        filename = f"/workspaces/FEDPROFERENCE/data/{row[3]}"  # Adjust path as needed
        eventname = "Pellet"  # Adjust as needed based on your CSV files
        # Collect FED event timestamps
        timestamps = get_FEDevents(filename, eventname)
        # Store timestamps in the mice dictionary
        if timestamps:  # Check if timestamps were found
            mice[mouse_id]["events"].extend(timestamps)

# This approach organizes the FED event timestamps by mouse and by CHOICE_SESSION.


FileNotFoundError: [Errno 2] No such file or directory: '\\workspaces\\FEDPROFERENCE\\FEDXD_METAFILE.xls'

In [5]:
# Assuming the metafile processing part remains the same

# Extending the mice dictionary to include pellet types
for mouse_id, data in mice.items():
    choice_session = data["CHOICE_SESSION"]
    session_rows = [row for row in rows if row[0] == mouse_id and row[4] == choice_session]

    # Initialize dictionary entries for pellet counts
    data["pellet_counts"] = {}  # This will store counts for each pellet type

    for row in session_rows:
        filename = f"/workspaces/FEDPROFERENCE/data/{row[3]}"  # Adjust path as needed
        # Assuming pellet types are differentiated somehow in your data or filenames
        # For illustration, let's say pellet types are indicated in `FED_PELLET` column (row[5])
        pellet_type = row[5]  # Identify the pellet type
        if pellet_type not in data["pellet_counts"]:
            data["pellet_counts"][pellet_type] = 0  # Initialize count for this pellet type
        
        # Here you might need to adjust the event name to match the specific pellet type
        # This is an example assuming all pellets are logged as "Pellet" events
        # You might need to adjust this logic based on how your data indicates pellet type
        timestamps = get_FEDevents(filename, "Pellet")  # Collect timestamps
        data["pellet_counts"][pellet_type] += len(timestamps)  # Count pellets for the type

# Now, `mice[mouse_id]["pellet_counts"]` contains the counts of each pellet type taken in the choice session


NameError: name 'mice' is not defined

In [6]:
# Initialize a dictionary to hold the lists for each group
grouped_data = {}

# Iterate through each mouse to organize their data into the corresponding list
for mouse_id, data in mice.items():
    # Using .get() to avoid KeyError, defaulting to 'Unknown' if not found
    sex = data.get("SEX", "Unknown")  # Consider how you want to handle unknown sexes
    choice_session = data.get("CHOICE_SESSION", "Unknown")
    diet = data.get("DIET", "Unknown")
    
    # Check if 'pellet_counts' exists to avoid further errors
    pellet_counts = data.get("pellet_counts", {})
    
    # Iterate through pellet counts to further classify by pellet type
    for pellet_type, count in pellet_counts.items():
        # Define a unique key for the group
        key = f"{sex}_{choice_session}_{diet}_{pellet_type}"
        
        # Initialize the list for this group if it doesn't exist
        if key not in grouped_data:
            grouped_data[key] = []
        
        # Append the mouse data to the list for its group
        grouped_data[key].append({
            "MOUSEID": mouse_id,
            "COUNT": count
        })


NameError: name 'mice' is not defined

In [7]:
# Initialize a structure to store data across all choice sessions
all_sessions_data = {}

# Iterate through each row directly, allowing for multiple sessions per mouse
for row in rows:
    mouse_id = row[0]
    choice_session = row[4]  # This is now directly taken from each row
    pellet_type = row[5]
    filename = f"/workspaces/FEDPROFERENCE/data/{row[3]}"
    
    # Unique key for each mouse, session, and pellet type
    key = f"{mouse_id}_{choice_session}_{pellet_type}"
    
    # Initialize the entry if it doesn't exist
    if key not in all_sessions_data:
        all_sessions_data[key] = {
            "MOUSEID": mouse_id,
            "CHOICE_SESSION": choice_session,
            "PELLET_TYPE": pellet_type,
            "COUNT": 0
        }
    
    # Collect timestamps and count pellets for the type
    timestamps = get_FEDevents(filename, "Pellet")
    all_sessions_data[key]["COUNT"] += len(timestamps)

# Convert all_sessions_data dictionary to a list of dictionaries for each mouse/session/pellet type
all_sessions_list = list(all_sessions_data.values())


NameError: name 'rows' is not defined

In [8]:
# Initialize a dictionary to hold lists for each unique group
grouped_lists = {}

# Iterate through the rows to process and group data
for row in rows:
    mouse_id = row[0]
    sex = row[1]  # Assuming this is the correct column for SEX
    diet = row[7]  # Assuming this is the correct column for DIET
    choice_session = row[4]
    pellet_type = row[5]
    filename = f"/workspaces/FEDPROFERENCE/data/{row[3]}"
    
    # Create a unique key for each group
    group_key = f"{sex}_{diet}_{choice_session}_{pellet_type}"
    
    # Collect timestamps for this row's file and pellet event
    timestamps = get_FEDevents(filename, "Pellet")
    pellet_count = len(timestamps)
    
    # Initialize the list for this group if it doesn't exist
    if group_key not in grouped_lists:
        grouped_lists[group_key] = []
    
    # Append the data for this mouse to the list for its group
    grouped_lists[group_key].append({
        "MOUSEID": mouse_id,
        "SEX": sex,
        "DIET": diet,
        "CHOICE_SESSION": choice_session,
        "PELLET_TYPE": pellet_type,
        "COUNT": pellet_count
    })

# At this point, 'grouped_lists' contains separate lists for each combination of SEX, DIET, CHOICE_SESSION, and PELLET_TYPE


NameError: name 'rows' is not defined

In [9]:
import pandas as pd

# Assuming 'grouped_lists' is your dictionary containing the grouped data

# Flatten the grouped data into a single list of dictionaries
flattened_data = []
for group_data in grouped_lists.values():
    flattened_data.extend(group_data)

# Convert the flattened data list into a DataFrame
df = pd.DataFrame(flattened_data)

# Display the first few rows of the DataFrame to verify
df


In [10]:
import numpy as np

# Assuming you're within the loop where you collect FED event timestamps for each session

# For each mouse session, after collecting timestamps
timestamps = get_FEDevents(filename, "Pellet")  # This gets the timestamps for the session

# Convert timestamps to hours since the first event (if not already in this format)
# Assumes timestamps are sorted in ascending order
pellettimes = [((t - timestamps[0]).total_seconds() / 3600) for t in timestamps]

# Calculate Interpellet Intervals (IPIs), Intermeal Interval (IMI), and Meal Size
IPIs = get_interpellet_intervals(pellettimes)
IMI = get_intermealinterval(pellettimes)
mealsize = get_mealsize(pellettimes)

# Now, you can include IMI and Meal Size in your data structure for this session
grouped_lists[group_key].append({
    "MOUSEID": mouse_id,
    "SEX": sex,
    "DIET": diet,
    "CHOICE_SESSION": choice_session,
    "PELLET_TYPE": pellet_type,
    "COUNT": pellet_count,
    "IMI": IMI,
    "MEALSIZE": mealsize
})


NameError: name 'filename' is not defined