# In this script , I try to define a new meal definition based on what I did in [FEDPROTEINPRO](https://github.com/Htbibalan/FEDPROTEINPRO) and analysis the FEDPROTEIN experiment with this new approach, i.e. min pellet 1 pellet and snacks and mega meals included.  

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
import trompy as tp
import csv
from datetime import datetime, timedelta


# the next two lines will expand the printed tables
pd.options.display.max_rows= 40000
pd.options.display.max_columns= 40000


ctl_color= "gray"
exp_color="deepskyblue"
fig_folder=r"../plots\\"
READ_METAFILE= pd.read_excel(r"C:\Users\hta031\Github\FEDProtein\FEDProtein_METAFILE.xls" ,sheet_name="METAFILE" )

READ_METAFILE


Unnamed: 0,fedfile,mouse,diet,mode,sex,order
0,FED001_042322_00.CSV,FEDXA01,GRAIN,FF,M,2
1,FED001_042622_00.CSV,FEDXA01,PR,FF,M,2
2,FED001_050322_00.CSV,FEDXA01,NR,FF,M,2
3,FED001_051022_04.CSV,FEDXA01,NR,FR1,M,2
4,FED001_051122_01.CSV,FEDXA01,NR,FR1R,M,2
5,FED001_051522_00.CSV,FEDXA01,PR,ECO,M,2
6,FED002_051522_00.CSV,FEDXA01,RICH,ECO,M,2
7,FED002_042322_00.CSV,FEDXA02,GRAIN,FF,M,2
8,FED002_042622_00.CSV,FEDXA02,PR,FF,M,2
9,FED002_050322_00.CSV,FEDXA02,NR,FF,M,2


In [8]:
from datetime import datetime
import csv

metafile_path = "../FEDPROTEIN_METAFILE.xls"  # Adjusted path to the uploaded file
rows, header = tp.metafilereader(metafile_path, sheetname="METAFILE")

def get_FEDevents(filename, eventname):
    formats = ['%Y-%m-%d %H:%M:%S', '%m/%d/%Y %H:%M:%S', '%m/%d/%Y %H:%M:%S %p']  # List of possible formats
    file = open(filename)
    csvreader = csv.reader(file)
    next(csvreader)  # Skip header
    rows = [row for row in csvreader]

    if not rows:  # Check if rows is empty
        print(f"No data in file {filename}.")
        return []

    event_column_index = 7  # Based on your CSV structure
    
    timestamps = []
    for row in rows:
        if row[event_column_index] == eventname:
            for date_format in formats:
                try:
                    date_time_obj = datetime.strptime(row[0], date_format)
                    timestamps.append(date_time_obj)
                    break  # Exit the loop if the date format matches
                except ValueError:
                    continue  # Try the next format if there's a ValueError

            else:
                # If none of the formats worked, raise an error
                raise ValueError(f"Date format not recognized for row: {row[0]}")

    if not timestamps:  # If no events match, return an empty list
        return []

    # Assuming the date format in t0 is '%Y-%m-%d %H:%M:%S'
    t0 = timestamps[0].strftime('%Y-%m-%d %H:%M:%S')
    day = t0.split()[0]
    lightson = day + " 07:00:00"  # Assuming lights on at 7 AM
    refpoint = datetime.strptime(lightson, '%Y-%m-%d %H:%M:%S')

    pellettimes = [(t - refpoint).total_seconds() / 3600 for t in timestamps]

    return pellettimes

# Initialize a dictionary to hold lists for each unique group
grouped_lists = {}

# Iterate through the rows to process and group data
for row in rows:
    mouse_id = row[1]
    sex = row[4]  # Assuming this is the correct column for SEX
    diet = row[2]  # Assuming this is the correct column for DIET
    order = row[5]
    mode = row[3]
    filename = f"../data/{row[0]}"

    group_key = f"{sex}_{order}_{mode}_{diet}"
    
    timestamps = get_FEDevents(filename, "Pellet")
    pellet_count = len(timestamps)
    
    if group_key not in grouped_lists:
        grouped_lists[group_key] = []
    
    grouped_lists[group_key].append({
        "MOUSEID": mouse_id,
        "SEX": sex,
        "DIET": diet,
        "MODE": mode,
        "ORDER": order,
        "COUNT": pellet_count
    })


In [18]:
import os
from datetime import datetime
import csv
import numpy as np
import pandas as pd

# Load the CSV metafile
metafile_path = '../FEDProtein_METAFILE.csv'
metafile_df = pd.read_csv(metafile_path)

def get_FEDevents(filename, eventname, formats=None):
    if formats is None:
        formats = ['%m/%d/%Y %H:%M:%S', '%Y-%m-%d %H:%M:%S', '%m/%d/%Y %I:%M:%S %p']
    
    timestamps = []
    with open(filename) as file:
        csvreader = csv.reader(file)
        next(csvreader)  # Skip header
        rows = [row for row in csvreader if row[7] == eventname]
    
    for row in rows:
        for date_format in formats:
            try:
                date_time_obj = datetime.strptime(row[0], date_format)
                timestamps.append(date_time_obj)
                break  # Exit the loop if a format matches
            except ValueError:
                continue  # Try the next format if the current one doesn't match

    if not timestamps:
        return [], None

    refpoint = timestamps[0].replace(hour=7, minute=0, second=0, microsecond=0)
    pellettimes = [(t - refpoint).total_seconds() / 3600 for t in timestamps]
    return pellettimes, refpoint

def get_meal_and_snack_metrics(pellettimes, meal_threshold=1/60, min_pellets_for_meal=1):
    if not pellettimes:
        return (0, 0, 0, 0, 0, 0, [0]*24)

    IPIs = np.diff(np.array(pellettimes))
    meals = []
    snacks = []
    current_event = [pellettimes[0]]

    for i, ipi in enumerate(IPIs):
        if ipi <= meal_threshold:
            current_event.append(pellettimes[i + 1])
        else:
            if len(current_event) >= min_pellets_for_meal:
                meals.append(current_event)
            else:
                snacks.append(current_event)
            current_event = [pellettimes[i + 1]]

    if current_event:
        if len(current_event) >= min_pellets_for_meal:
            meals.append(current_event)
        else:
            snacks.append(current_event)

    nmeals = len(meals)
    nsnacks = len(snacks)
    hourly_meals = np.zeros(24)
    for meal in meals:
        start_hour = int(meal[0]) % 24
        hourly_meals[start_hour] += 1

    total_pellets = len(pellettimes)
    mealsize = sum(len(meal) for meal in meals) / nmeals if nmeals else 0
    snack_size = sum(len(snack) for snack in snacks) / nsnacks if nsnacks else total_pellets / nsnacks if nsnacks else 0
    total_observation_period = max(pellettimes) - min(pellettimes)
    meal_frequency = nmeals / total_observation_period if total_observation_period > 0 else 0
    snack_frequency = nsnacks / total_observation_period if total_observation_period > 0 else 0

    return mealsize, snack_size, nmeals, meal_frequency, nsnacks, snack_frequency, hourly_meals.tolist()

# Initialize a dictionary to hold lists for each unique group
grouped_lists = {}




for row in rows:
    mouse_id = row[1]
    sex = row[4]  # Assuming this is the correct column for SEX
    diet = row[2]  # Assuming this is the correct column for DIET
    order = row[5]
    mode = row[3]
    filename = f"../data/{row[0]}"



# Iterate through the rows to process and group data
for index, row in metafile_df.iterrows():
    mouse_id = row[1]
    sex = row[4]
    diet = row[2]
    order = row[5]
    mode = row[3]
    filename = f"../data/{row['fedfile']}"

    group_key = f"{sex}_{order}_{mode}_{diet}"
    
    pellettimes, _ = get_FEDevents(filename, "Pellet")
    pellet_count = len(pellettimes)
    
    if group_key not in grouped_lists:
        grouped_lists[group_key] = []
    
    grouped_lists[group_key].append({
        "MOUSEID": mouse_id,
        "SEX": sex,
        "DIET": diet,
        "MODE": mode,
        "ORDER": order,
        "COUNT": pellet_count
    })

# Combine pellet times and calculate metrics
final_data = []

for group_key, file_info in grouped_lists.items():
    all_pellet_times = []
    for info in file_info:
        filename = f"../data/{info['MOUSEID']}.csv"  # Adjust this to use the correct file path if necessary
        if not os.path.exists(filename):
            print(f"File not found: {filename}")
            continue  # Skip if the file does not exist
        
        pellettimes, refpoint = get_FEDevents(filename, "Pellet")
        all_pellet_times.extend(pellettimes)

    if not all_pellet_times:
        continue

    all_pellet_times.sort()
    mealsize, snack_size, nmeals, meal_frequency, nsnacks, snack_frequency, hourly_meals = get_meal_and_snack_metrics(all_pellet_times)

    entry = {
        "GROUP_KEY": group_key,
        "MEAL_SIZE": mealsize,
        "SNACK_SIZE": snack_size,
        "NUMBER_OF_MEALS": nmeals,
        "MEAL_FREQUENCY": meal_frequency,
        "NUMBER_OF_SNACKS": nsnacks,
        "SNACK_FREQUENCY": snack_frequency,
        "HOURLY_MEALS": hourly_meals,
        "TOTAL_PELLETS": len(all_pellet_times)
    }
    final_data.append(entry)
    # Add hourly meals data
    for hour in range(24):
        entry[f"MEALS_HOUR_{hour}"] = hourly_meals[hour]

df = pd.DataFrame(final_data)

# Split GROUP_KEY into separate columns
df[['MOUSEID', 'SEX', 'DIET', "MODE", "ORDER"]] = df['GROUP_KEY'].str.split('_', expand=True)

# Optionally filter out specific rows or drop unnecessary columns
# df = df[~df['GROUP_KEY'].str.contains('MIX')]

# Save DataFrame to CSV
# df.to_csv(r'../data/meal_snack_data.csv', index=False)

df  # Display the final DataFrame


  mouse_id = row[1]
  sex = row[4]
  diet = row[2]
  order = row[5]
  mode = row[3]


File not found: ../data/FEDXA01.csv
File not found: ../data/FEDXA02.csv
File not found: ../data/FEDXA03.csv
File not found: ../data/FEDXA04.csv
File not found: ../data/FEDXA05.csv
File not found: ../data/FEDXA06.csv
File not found: ../data/FEDXA01.csv
File not found: ../data/FEDXA02.csv
File not found: ../data/FEDXA03.csv
File not found: ../data/FEDXA04.csv
File not found: ../data/FEDXA05.csv
File not found: ../data/FEDXA06.csv
File not found: ../data/FEDXA01.csv
File not found: ../data/FEDXA02.csv
File not found: ../data/FEDXA03.csv
File not found: ../data/FEDXA04.csv
File not found: ../data/FEDXA05.csv
File not found: ../data/FEDXA06.csv
File not found: ../data/FEDXA01.csv
File not found: ../data/FEDXA02.csv
File not found: ../data/FEDXA03.csv
File not found: ../data/FEDXA04.csv
File not found: ../data/FEDXA05.csv
File not found: ../data/FEDXA06.csv
File not found: ../data/FEDXA01.csv
File not found: ../data/FEDXA02.csv
File not found: ../data/FEDXA03.csv
File not found: ../data/FEDX

KeyError: 'GROUP_KEY'