In [3]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Load the data
file_path = r'../results/FED_PROTEIN_FINAL_ULTIMATE_DATA_WITHOUT_IMI_IPI_LISTS.csv'
data = pd.read_csv(file_path)

# Function to unpack list-like columns into individual day columns
def unpack_days(df, column_prefix):
    unpacked_cols = []
    for col in df.columns:
        if column_prefix in col:
            # Unpack the list into separate columns
            unpacked = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
            max_len = max(unpacked.apply(len))
            for i in range(max_len):
                day_col = f"{col}_day_{i+1}"
                df[day_col] = unpacked.apply(lambda x: x[i] if len(x) > i else np.nan)
                unpacked_cols.append(day_col)
    return df, unpacked_cols

# Unpack relevant list columns for each meal/snack/mega meal per day
columns_to_unpack = [
    'all_mega_meals_per_day', 'all_pellets_per_day',
    'all_meals_per_day', 'all_snacks_per_day'
]

# Unpack the day-wise data
for col in columns_to_unpack:
    data, _ = unpack_days(data, col)

# Exclude hourly columns (filter out columns with 'hourly' in their name)
columns_to_exclude = [col for col in data.columns if 'hourly' in col]
data = data.drop(columns=columns_to_exclude)

# Perform ANOVA for multiple components
components = ['grain_meal_size', 'grain_snack_size', 'grain_number_of_meals', 
              'grain_number_of_snacks', 'grain_mega_meal_frequency']

anova_results = {}

for component in components:
    try:
        # Fit the ANOVA model with sex, order, and interaction terms
        # Since we don't have a column named 'diet_phase', we use 'grain', 'pr', 'nr' phases implicitly
        formula = f'{component} ~ C(sex) * C(order)'  # Interaction of sex and order (diet phase can be added later if needed)
        model = ols(formula, data=data).fit()
        anova_table = sm.stats.anova_lm(model, typ=2)
        anova_results[component] = anova_table
    except Exception as e:
        print(f"Could not process ANOVA for {component}: {e}")

# Descriptive statistics for the dataset
descriptive_stats = data.describe()

# Display the descriptive statistics
print("Descriptive Statistics:")
print(descriptive_stats)

# Output ANOVA tables for each component
for component, anova_table in anova_results.items():
    print(f"\nANOVA results for {component}:")
    print(anova_table)


Descriptive Statistics:
           order  grain_meal_size  grain_snack_size  grain_number_of_meals  \
count  23.000000        23.000000              23.0              23.000000   
mean    1.521739         2.896004               1.0              80.478261   
std     0.510754         0.105025               0.0              28.124511   
min     1.000000         2.716216               1.0              31.000000   
25%     1.000000         2.836067               1.0              61.000000   
50%     2.000000         2.895652               1.0              80.000000   
75%     2.000000         2.956617               1.0             104.500000   
max     2.000000         3.098361               1.0             127.000000   

       grain_meal_frequency  grain_number_of_snacks  grain_snack_frequency  \
count             23.000000               23.000000              23.000000   
mean               1.193044               47.695652               0.704957   
std                0.407931            

In [4]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Load the data
file_path = r'../results/FED_PROTEIN_FINAL_ULTIMATE_DATA_WITHOUT_IMI_IPI_LISTS.csv'
data = pd.read_csv(file_path)

# Function to unpack list-like columns into individual day columns
def unpack_days(df, column_prefix):
    unpacked_cols = []
    for col in df.columns:
        if column_prefix in col:
            # Unpack the list into separate columns
            unpacked = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
            max_len = max(unpacked.apply(len))
            for i in range(max_len):
                day_col = f"{col}_day_{i+1}"
                df[day_col] = unpacked.apply(lambda x: x[i] if len(x) > i else np.nan)
                unpacked_cols.append(day_col)
    return df, unpacked_cols

# Unpack relevant list columns for each meal/snack/mega meal per day
columns_to_unpack = [
    'all_mega_meals_per_day', 'all_pellets_per_day',
    'all_meals_per_day', 'all_snacks_per_day'
]

# Unpack the day-wise data
for col in columns_to_unpack:
    data, _ = unpack_days(data, col)

# Exclude hourly columns (filter out columns with 'hourly' in their name)
columns_to_exclude = [col for col in data.columns if 'hourly' in col]
data = data.drop(columns=columns_to_exclude)

# List of components to analyze across diet phases
components_per_diet = {
    'grain': ['grain_meal_size', 'grain_snack_size', 'grain_number_of_meals', 
              'grain_number_of_snacks', 'grain_mega_meal_frequency'],
    'pr': ['pr_meal_size', 'pr_snack_size', 'pr_number_of_meals', 
           'pr_number_of_snacks', 'pr_mega_meal_frequency'],
    'nr': ['nr_meal_size', 'nr_snack_size', 'nr_number_of_meals', 
           'nr_number_of_snacks', 'nr_mega_meal_frequency']
}

anova_results = {}

# Perform ANOVA for each diet phase and each component
for diet, components in components_per_diet.items():
    for component in components:
        try:
            # Fit the ANOVA model with sex, order, and their interaction
            formula = f'{component} ~ C(sex) * C(order)'
            model = ols(formula, data=data).fit()
            anova_table = sm.stats.anova_lm(model, typ=2)
            anova_results[f'{diet}_{component}'] = anova_table
        except Exception as e:
            print(f"Could not process ANOVA for {diet}_{component}: {e}")

# Descriptive statistics for the dataset
descriptive_stats = data.describe()

# Display the descriptive statistics
print("Descriptive Statistics:")
print(descriptive_stats)

# Output ANOVA tables for each component
for component, anova_table in anova_results.items():
    print(f"\nANOVA results for {component}:")
    print(anova_table)


Descriptive Statistics:
           order  grain_meal_size  grain_snack_size  grain_number_of_meals  \
count  23.000000        23.000000              23.0              23.000000   
mean    1.521739         2.896004               1.0              80.478261   
std     0.510754         0.105025               0.0              28.124511   
min     1.000000         2.716216               1.0              31.000000   
25%     1.000000         2.836067               1.0              61.000000   
50%     2.000000         2.895652               1.0              80.000000   
75%     2.000000         2.956617               1.0             104.500000   
max     2.000000         3.098361               1.0             127.000000   

       grain_meal_frequency  grain_number_of_snacks  grain_snack_frequency  \
count             23.000000               23.000000              23.000000   
mean               1.193044               47.695652               0.704957   
std                0.407931            

In [5]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Load the data
file_path = r'../results/FED_PROTEIN_FINAL_ULTIMATE_DATA_WITHOUT_IMI_IPI_LISTS.csv'
data = pd.read_csv(file_path)

# Function to unpack list-like columns into individual day columns
def unpack_days(df, column_prefix):
    unpacked_cols = []
    for col in df.columns:
        if column_prefix in col:
            # Unpack the list into separate columns
            unpacked = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
            max_len = max(unpacked.apply(len))
            for i in range(max_len):
                day_col = f"{col}_day_{i+1}"
                df[day_col] = unpacked.apply(lambda x: x[i] if len(x) > i else np.nan)
                unpacked_cols.append(day_col)
    return df, unpacked_cols

# Unpack relevant list columns for each meal/snack/mega meal per day
columns_to_unpack = [
    'all_mega_meals_per_day', 'all_pellets_per_day',
    'all_meals_per_day', 'all_snacks_per_day'
]

# Unpack the day-wise data
for col in columns_to_unpack:
    data, _ = unpack_days(data, col)

# Exclude hourly columns (filter out columns with 'hourly' in their name)
columns_to_exclude = [col for col in data.columns if 'hourly' in col]
data = data.drop(columns=columns_to_exclude)

# List of components to analyze across diet phases
components_per_diet = {
    'grain': ['grain_meal_size', 'grain_snack_size', 'grain_number_of_meals', 
              'grain_number_of_snacks', 'grain_mega_meal_frequency'],
    'pr': ['pr_meal_size', 'pr_snack_size', 'pr_number_of_meals', 
           'pr_number_of_snacks', 'pr_mega_meal_frequency'],
    'nr': ['nr_meal_size', 'nr_snack_size', 'nr_number_of_meals', 
           'nr_number_of_snacks', 'nr_mega_meal_frequency']
}

anova_results = {}

# Helper function to add significance marks
def significance_mark(p_value):
    if p_value < 0.001:
        return '***'
    elif p_value < 0.01:
        return '**'
    elif p_value < 0.05:
        return '*'
    else:
        return ''

# Perform ANOVA for each diet phase and each component
for diet, components in components_per_diet.items():
    for component in components:
        try:
            # Fit the ANOVA model with sex, order, and their interaction
            formula = f'{component} ~ C(sex) * C(order)'
            model = ols(formula, data=data).fit()
            anova_table = sm.stats.anova_lm(model, typ=2)
            
            # Add p-value significance marks
            anova_table['p_value'] = anova_table['PR(>F)'].apply(significance_mark)
            anova_results[f'{diet}_{component}'] = anova_table
        except Exception as e:
            print(f"Could not process ANOVA for {diet}_{component}: {e}")

# Calculate sum of pellets for each diet phase and group by 'sex' and 'order'
pellets_sum = {}
for phase in ['grain', 'pr', 'nr']:
    pellets_column = f'{phase}_pellets_per_day'
    try:
        # Sum pellets across days for each group (sex and order)
        data[pellets_column] = data[pellets_column].apply(lambda x: sum(eval(x)) if isinstance(x, str) else np.nan)
        pellets_sum[phase] = data.groupby(['sex', 'order'])[pellets_column].sum()
    except Exception as e:
        print(f"Could not calculate pellet sum for {phase}: {e}")

# Descriptive statistics for the dataset
descriptive_stats = data.describe()

# Display the descriptive statistics
print("Descriptive Statistics:")
print(descriptive_stats)

# Output ANOVA tables for each component with significance marks
for component, anova_table in anova_results.items():
    print(f"\nANOVA results for {component}:")
    print(anova_table)

# Output the sum of pellets for each diet phase
print("\nSum of pellets taken in each phase by each group (sex, order):")
for phase, sum_data in pellets_sum.items():
    print(f"\n{phase.capitalize()} Phase Pellet Sum:")
    print(sum_data)


Descriptive Statistics:
           order  grain_meal_size  grain_snack_size  grain_number_of_meals  \
count  23.000000        23.000000              23.0              23.000000   
mean    1.521739         2.896004               1.0              80.478261   
std     0.510754         0.105025               0.0              28.124511   
min     1.000000         2.716216               1.0              31.000000   
25%     1.000000         2.836067               1.0              61.000000   
50%     2.000000         2.895652               1.0              80.000000   
75%     2.000000         2.956617               1.0             104.500000   
max     2.000000         3.098361               1.0             127.000000   

       grain_meal_frequency  grain_number_of_snacks  grain_snack_frequency  \
count             23.000000               23.000000              23.000000   
mean               1.193044               47.695652               0.704957   
std                0.407931            

In [6]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Load the data
file_path = r'../results/FED_PROTEIN_FINAL_ULTIMATE_DATA_WITHOUT_IMI_IPI_LISTS.csv'
data = pd.read_csv(file_path)

# Function to unpack list-like columns into individual day columns
def unpack_days(df, column_prefix):
    unpacked_cols = []
    for col in df.columns:
        if column_prefix in col:
            # Unpack the list into separate columns
            unpacked = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
            max_len = max(unpacked.apply(len))
            for i in range(max_len):
                day_col = f"{col}_day_{i+1}"
                df[day_col] = unpacked.apply(lambda x: x[i] if len(x) > i else np.nan)
                unpacked_cols.append(day_col)
    return df, unpacked_cols

# Unpack relevant list columns for each meal/snack/mega meal per day
columns_to_unpack = [
    'all_mega_meals_per_day', 'all_pellets_per_day',
    'all_meals_per_day', 'all_snacks_per_day'
]

# Unpack the day-wise data
for col in columns_to_unpack:
    data, _ = unpack_days(data, col)

# List of key components to analyze (excluding hourly data and timestamps)
components = [
    'grain_meal_size', 'grain_snack_size', 'grain_number_of_meals', 
    'grain_meal_frequency', 'grain_number_of_snacks', 'grain_snack_frequency', 
    'grain_mega_meal_frequency', 'grain_mega_meal_size', 'grain_number_of_mega_meals', 
    'pr_meal_size', 'pr_snack_size', 'pr_number_of_meals', 'pr_meal_frequency', 
    'pr_number_of_snacks', 'pr_snack_frequency', 'pr_mega_meal_frequency', 
    'pr_mega_meal_size', 'pr_number_of_mega_meals', 'nr_meal_size', 'nr_snack_size', 
    'nr_number_of_meals', 'nr_meal_frequency', 'nr_number_of_snacks', 'nr_snack_frequency', 
    'nr_mega_meal_frequency', 'nr_mega_meal_size', 'nr_number_of_mega_meals', 
    'grain_meals_per_day', 'pr_meals_per_day', 'nr_meals_per_day', 
    'grain_snacks_per_day', 'pr_snacks_per_day', 'nr_snacks_per_day', 
    'grain_mega_meals_per_day', 'pr_mega_meals_per_day', 'nr_mega_meals_per_day', 
    'grain_pellets_per_day', 'pr_pellets_per_day', 'nr_pellets_per_day', 
    'all_pellets_per_day', 'all_meals_per_day', 'all_snacks_per_day', 
    'all_mega_meals_per_day'
]

anova_results = {}

# Helper function to add significance marks
def significance_mark(p_value):
    if p_value < 0.001:
        return '***'
    elif p_value < 0.01:
        return '**'
    elif p_value < 0.05:
        return '*'
    else:
        return ''

# Perform ANOVA for each component with sex, order, and interactions
for component in components:
    try:
        # Fit the ANOVA model with sex, order, and their interaction
        formula = f'{component} ~ C(sex) * C(order)'
        model = ols(formula, data=data).fit()
        anova_table = sm.stats.anova_lm(model, typ=2)
        
        # Add p-value significance marks
        anova_table['p_value'] = anova_table['PR(>F)'].apply(significance_mark)
        anova_results[component] = anova_table
    except Exception as e:
        print(f"Could not process ANOVA for {component}: {e}")

# Calculate sum of key components (like pellets and meals per day) for each group by 'sex' and 'order'
sum_results = {}
for component in ['grain_pellets_per_day', 'pr_pellets_per_day', 'nr_pellets_per_day', 
                  'grain_meals_per_day', 'pr_meals_per_day', 'nr_meals_per_day', 
                  'grain_snacks_per_day', 'pr_snacks_per_day', 'nr_snacks_per_day', 
                  'grain_mega_meals_per_day', 'pr_mega_meals_per_day', 'nr_mega_meals_per_day']:
    try:
        # Sum the component data across days for each group (sex and order)
        data[component] = data[component].apply(lambda x: sum(eval(x)) if isinstance(x, str) else np.nan)
        sum_results[component] = data.groupby(['sex', 'order'])[component].sum()
    except Exception as e:
        print(f"Could not calculate sum for {component}: {e}")

# Descriptive statistics for the dataset
descriptive_stats = data.describe()

# Display the descriptive statistics
print("Descriptive Statistics:")
print(descriptive_stats)

# Output ANOVA tables for each component with significance marks
for component, anova_table in anova_results.items():
    print(f"\nANOVA results for {component}:")
    print(anova_table)

# Output the sum of key components for each group (sex, order)
print("\nSum of key components (pellets, meals, snacks, mega meals) per group (sex, order):")
for component, sum_data in sum_results.items():
    print(f"\n{component.capitalize()} Sum:")
    print(sum_data)


Could not process ANOVA for grain_meals_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for pr_meals_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for nr_meals_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for grain_snacks_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for pr_snacks_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variab

In [7]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Load the data
file_path = r'../results/FED_PROTEIN_FINAL_ULTIMATE_DATA_WITHOUT_IMI_IPI_LISTS.csv'
data = pd.read_csv(file_path)

# Function to unpack list-like columns into individual day columns
def unpack_days(df, column_prefix):
    unpacked_cols = []
    for col in df.columns:
        if column_prefix in col:
            # Unpack the list into separate columns
            unpacked = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
            max_len = max(unpacked.apply(len))
            for i in range(max_len):
                day_col = f"{col}_day_{i+1}"
                df[day_col] = unpacked.apply(lambda x: x[i] if len(x) > i else np.nan)
                unpacked_cols.append(day_col)
    return df, unpacked_cols

# Unpack relevant list columns for each meal/snack/mega meal per day
columns_to_unpack = [
    'all_mega_meals_per_day', 'all_pellets_per_day',
    'all_meals_per_day', 'all_snacks_per_day'
]

# Unpack the day-wise data
for col in columns_to_unpack:
    data, _ = unpack_days(data, col)

# List of key components to analyze (excluding hourly data and timestamps)
components = [
    'grain_meal_size', 'grain_snack_size', 'grain_number_of_meals', 
    'grain_meal_frequency', 'grain_number_of_snacks', 'grain_snack_frequency', 
    'grain_mega_meal_frequency', 'grain_mega_meal_size', 'grain_number_of_mega_meals', 
    'pr_meal_size', 'pr_snack_size', 'pr_number_of_meals', 'pr_meal_frequency', 
    'pr_number_of_snacks', 'pr_snack_frequency', 'pr_mega_meal_frequency', 
    'pr_mega_meal_size', 'pr_number_of_mega_meals', 'nr_meal_size', 'nr_snack_size', 
    'nr_number_of_meals', 'nr_meal_frequency', 'nr_number_of_snacks', 'nr_snack_frequency', 
    'nr_mega_meal_frequency', 'nr_mega_meal_size', 'nr_number_of_mega_meals', 
    'grain_meals_per_day', 'pr_meals_per_day', 'nr_meals_per_day', 
    'grain_snacks_per_day', 'pr_snacks_per_day', 'nr_snacks_per_day', 
    'grain_mega_meals_per_day', 'pr_mega_meals_per_day', 'nr_mega_meals_per_day', 
    'grain_pellets_per_day', 'pr_pellets_per_day', 'nr_pellets_per_day', 
    'all_pellets_per_day', 'all_meals_per_day', 'all_snacks_per_day', 
    'all_mega_meals_per_day'
]

anova_results = {}

# Helper function to add significance marks
def significance_mark(p_value):
    if p_value < 0.001:
        return '***'
    elif p_value < 0.01:
        return '**'
    elif p_value < 0.05:
        return '*'
    else:
        return ''

# Perform ANOVA for each component with sex, order, and interactions
for component in components:
    try:
        # Fit the ANOVA model with sex, order, and their interaction
        formula = f'{component} ~ C(sex) * C(order)'
        model = ols(formula, data=data).fit()
        anova_table = sm.stats.anova_lm(model, typ=2)
        
        # Add p-value significance marks
        anova_table['p_value'] = anova_table['PR(>F)'].apply(significance_mark)
        anova_results[component] = anova_table
    except Exception as e:
        print(f"Could not process ANOVA for {component}: {e}")

# Save ANOVA results to CSV
anova_dfs = pd.concat(anova_results, axis=1)
anova_dfs.to_csv('../results/ULTIMATE_FEDPROTEIN_anova_results.csv', index=True)

# Calculate sum of key components (like pellets and meals per day) for each group by 'sex' and 'order'
sum_results = {}
for component in ['grain_pellets_per_day', 'pr_pellets_per_day', 'nr_pellets_per_day', 
                  'grain_meals_per_day', 'pr_meals_per_day', 'nr_meals_per_day', 
                  'grain_snacks_per_day', 'pr_snacks_per_day', 'nr_snacks_per_day', 
                  'grain_mega_meals_per_day', 'pr_mega_meals_per_day', 'nr_mega_meals_per_day']:
    try:
        # Sum the component data across days for each group (sex and order)
        data[component] = data[component].apply(lambda x: sum(eval(x)) if isinstance(x, str) else np.nan)
        sum_results[component] = data.groupby(['sex', 'order'])[component].sum()
    except Exception as e:
        print(f"Could not calculate sum for {component}: {e}")

# Save sum results to CSV
sum_dfs = pd.concat(sum_results, axis=1)
sum_dfs.to_csv('../results/ULTIMATE_FEDPROTEIN_sum_results.csv', index=True)

# Descriptive statistics for the dataset
descriptive_stats = data.describe()

# Save descriptive statistics to CSV
descriptive_stats.to_csv('../results/ULTIMATE_FEDPROTEIN_descriptive_stats.csv')



Could not process ANOVA for grain_meals_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for pr_meals_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for nr_meals_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for grain_snacks_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for pr_snacks_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variab

In [9]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Load the data
file_path = '../results/FED_PROTEIN_FINAL_ULTIMATE_DATA_WITHOUT_IMI_IPI_LISTS.csv'
data = pd.read_csv(file_path)

# Function to unpack list-like columns into individual day columns
def unpack_days(df, column_prefix):
    unpacked_cols = []
    for col in df.columns:
        if column_prefix in col:
            # Unpack the list into separate columns
            unpacked = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
            max_len = max(unpacked.apply(len))
            for i in range(max_len):
                day_col = f"{col}_day_{i+1}"
                df[day_col] = unpacked.apply(lambda x: x[i] if len(x) > i else np.nan)
                unpacked_cols.append(day_col)
    return df, unpacked_cols

# Unpack relevant list columns for each meal/snack/mega meal per day
columns_to_unpack = [
    'all_mega_meals_per_day', 'all_pellets_per_day',
    'all_meals_per_day', 'all_snacks_per_day'
]

# Unpack the day-wise data
for col in columns_to_unpack:
    data, _ = unpack_days(data, col)

# List of key components to analyze (excluding hourly data and timestamps)
components = [
    'grain_meal_size', 'grain_snack_size', 'grain_number_of_meals', 
    'grain_meal_frequency', 'grain_number_of_snacks', 'grain_snack_frequency', 
    'grain_mega_meal_frequency', 'grain_mega_meal_size', 'grain_number_of_mega_meals', 
    'pr_meal_size', 'pr_snack_size', 'pr_number_of_meals', 'pr_meal_frequency', 
    'pr_number_of_snacks', 'pr_snack_frequency', 'pr_mega_meal_frequency', 
    'pr_mega_meal_size', 'pr_number_of_mega_meals', 'nr_meal_size', 'nr_snack_size', 
    'nr_number_of_meals', 'nr_meal_frequency', 'nr_number_of_snacks', 'nr_snack_frequency', 
    'nr_mega_meal_frequency', 'nr_mega_meal_size', 'nr_number_of_mega_meals', 
    'grain_meals_per_day', 'pr_meals_per_day', 'nr_meals_per_day', 
    'grain_snacks_per_day', 'pr_snacks_per_day', 'nr_snacks_per_day', 
    'grain_mega_meals_per_day', 'pr_mega_meals_per_day', 'nr_mega_meals_per_day', 
    'grain_pellets_per_day', 'pr_pellets_per_day', 'nr_pellets_per_day', 
    'all_pellets_per_day', 'all_meals_per_day', 'all_snacks_per_day', 
    'all_mega_meals_per_day'
]

anova_results = {}
sum_results = {}
descriptive_stats_list = []

# Helper function to add significance marks
def significance_mark(p_value):
    if p_value < 0.001:
        return '***'
    elif p_value < 0.01:
        return '**'
    elif p_value < 0.05:
        return '*'
    else:
        return ''

# Perform ANOVA, sum, and descriptive statistics for each component
for component in components:
    try:
        # Fit the ANOVA model with sex, order, and their interaction
        formula = f'{component} ~ C(sex) * C(order)'
        model = ols(formula, data=data).fit()
        anova_table = sm.stats.anova_lm(model, typ=2)
        
        # Add p-value significance marks
        anova_table['p_value'] = anova_table['PR(>F)'].apply(significance_mark)
        anova_results[component] = anova_table

        # Descriptive statistics for the component
        descriptive_stats = data[component].describe()
        descriptive_stats_list.append(descriptive_stats)

        # Sum results grouped by sex and order
        data[component] = data[component].apply(lambda x: sum(eval(x)) if isinstance(x, str) else x)
        sum_result = data.groupby(['sex', 'order'])[component].sum()
        sum_results[component] = sum_result
    except Exception as e:
        print(f"Could not process ANOVA for {component}: {e}")

# Create DataFrames for ANOVA, sum, and descriptive statistics
anova_dfs = pd.concat(anova_results, axis=1)
sum_dfs = pd.concat(sum_results, axis=1).reset_index()
descriptive_stats_df = pd.concat(descriptive_stats_list, axis=1)

# Combine everything into one DataFrame
combined_df = pd.concat([descriptive_stats_df.T, sum_dfs.set_index(['sex', 'order']), anova_dfs], axis=1)

# Save the combined DataFrame to a CSV file
combined_df.to_csv('../results/ULTIMATE_combined_results.csv')

# Output the path for the saved file
print("Combined results (descriptive stats, sum, ANOVA) saved to: /mnt/data/combined_results.csv")


Could not process ANOVA for grain_meals_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for pr_meals_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for nr_meals_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for grain_snacks_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variable converted to endog is non-numeric (e.g., bool or str).
Could not process ANOVA for pr_snacks_per_day: endog has evaluated to an array with multiple columns that has shape (23, 23). This occurs when the variab

In [15]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Load the dataset
file_path = r"../results/resultallpellets_widePhase.csv"
data = pd.read_csv(file_path)

# Prepare the columns for ANOVA (days G0-G2, NR0-NR6, PR0-PR6)
days_columns = [
    'G0', 'G1', 'G2', 'NR0', 'NR1', 'NR2', 'NR3', 'NR4', 'NR5', 'NR6',
    'PR0', 'PR1', 'PR2', 'PR3', 'PR4', 'PR5', 'PR6'
]

anova_results = {}

# Helper function to add significance marks
def significance_mark(p_value):
    if p_value < 0.001:
        return '***'
    elif p_value < 0.01:
        return '**'
    elif p_value < 0.05:
        return '*'
    else:
        return ''

# Perform ANOVA for each day
for day in days_columns:
    formula = f'{day} ~ C(Sex) + C(Order) + C(Sex):C(Order)'  # Interaction between Sex and Order
    model = ols(formula, data=data).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    
    # Add significance marks to the p-values
    anova_table['p_value'] = anova_table['PR(>F)'].apply(significance_mark)
    anova_results[day] = anova_table

# Generate descriptive statistics for the day columns
descriptive_stats = data[days_columns].describe()

# Combine the ANOVA results into a single DataFrame for better comparison
anova_combined = pd.concat(anova_results, axis=1)

# Save both the ANOVA results and descriptive statistics to CSV files
anova_combined.to_csv('../results/ULTIMATE_DAYS_anova_results.csv')
descriptive_stats.to_csv('../results/ULTIMATE_DAYS_descriptive_stats.csv')



In [17]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import AnovaRM

# Load the dataset
file_path = '../results/FED_PROTEIN_FINAL_ULTIMATE_DATA_WITHOUT_IMI_IPI_LISTS.csv'  # Replace with the path to your file
data = pd.read_csv(file_path)

# List of components for both between and within-subject analysis
components = [
    'grain_meal_size', 'grain_snack_size', 'grain_number_of_meals',
    'grain_meal_frequency', 'grain_number_of_snacks', 'grain_snack_frequency',
    'grain_mega_meal_frequency', 'grain_mega_meal_size', 'grain_number_of_mega_meals',
    'pr_meal_size', 'pr_snack_size', 'pr_number_of_meals', 'pr_meal_frequency',
    'pr_number_of_snacks', 'pr_snack_frequency', 'pr_mega_meal_frequency', 
    'pr_mega_meal_size', 'pr_number_of_mega_meals', 'nr_meal_size', 'nr_snack_size',
    'nr_number_of_meals', 'nr_meal_frequency', 'nr_number_of_snacks', 'nr_snack_frequency',
    'nr_mega_meal_frequency', 'nr_mega_meal_size', 'nr_number_of_mega_meals'
]

# Helper function to add significance marks
def significance_mark(p_value):
    if p_value < 0.001:
        return '***'
    elif p_value < 0.01:
        return '**'
    elif p_value < 0.05:
        return '*'
    else:
        return ''

# 1. Between-subjects ANOVA
anova_results = {}
descriptive_stats = {}

for component in components:
    formula = f'{component} ~ C(sex) + C(order) + C(sex):C(order)'  # Interaction between Sex and Order
    model = ols(formula, data=data).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    
    # Add p-value significance marks
    anova_table['p_value'] = anova_table['PR(>F)'].apply(significance_mark)
    anova_results[component] = anova_table
    
    # Descriptive statistics grouped by Sex and Order
    descriptive_grouped = data.groupby(['sex', 'order'])[component].agg(['mean', 'std', 'count']).reset_index()
    descriptive_stats[component] = descriptive_grouped

# Combine ANOVA results and descriptive statistics into a single output
anova_combined = pd.concat(anova_results, axis=1)
descriptive_combined = pd.concat(descriptive_stats, axis=1)

# Save between-subjects ANOVA and descriptive stats to CSV
anova_combined.to_csv('anova_between_subjects_results.csv')
descriptive_combined.to_csv('descriptive_stats_between_subjects.csv')

# 2. Within-subjects (Repeated Measures) ANOVA for each component
within_subjects_results = {}

# We'll assume each subject (mouse) is identified by 'mouse_id' and there is a column identifying the diet phase
# e.g., 'diet_phase' with values ['NR', 'PR'] for each mouse
# Reshape data to have separate columns for the repeated measures (e.g., NR and PR for meal sizes)
for component in ['meal_size']:  # Replace with actual components you want to analyze within-subjects
    try:
        aovrm = AnovaRM(data, depvar=component, subject='mouse_id', within=['diet_phase'])
        rm_anova_results = aovrm.fit()
        within_subjects_results[component] = rm_anova_results.summary()
    except Exception as e:
        print(f"Could not perform repeated measures ANOVA for {component}: {e}")

# Save within-subjects ANOVA results to a file
with open('anova_within_subjects_results.txt', 'w') as f:
    for component, result in within_subjects_results.items():
        f.write(f'ANOVA Results for {component}\n')
        f.write(result.as_text())
        f.write('\n\n')



Could not perform repeated measures ANOVA for meal_size: Index(['diet_phase'], dtype='object')


In [18]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import AnovaRM

# Load the dataset
file_path = '../results/FED_PROTEIN_FINAL_ULTIMATE_DATA_WITHOUT_IMI_IPI_LISTS.csv'  # Replace with the path to your file
data = pd.read_csv(file_path)

# List of components for both between and within-subject analysis
components = [
    'grain_meal_size', 'grain_snack_size', 'grain_number_of_meals',
    'grain_meal_frequency', 'grain_number_of_snacks', 'grain_snack_frequency',
    'grain_mega_meal_frequency', 'grain_mega_meal_size', 'grain_number_of_mega_meals',
    'pr_meal_size', 'pr_snack_size', 'pr_number_of_meals', 'pr_meal_frequency',
    'pr_number_of_snacks', 'pr_snack_frequency', 'pr_mega_meal_frequency', 
    'pr_mega_meal_size', 'pr_number_of_mega_meals', 'nr_meal_size', 'nr_snack_size',
    'nr_number_of_meals', 'nr_meal_frequency', 'nr_number_of_snacks', 'nr_snack_frequency',
    'nr_mega_meal_frequency', 'nr_mega_meal_size', 'nr_number_of_mega_meals'
]

# Helper function to add significance marks
def significance_mark(p_value):
    if p_value < 0.001:
        return '***'
    elif p_value < 0.01:
        return '**'
    elif p_value < 0.05:
        return '*'
    else:
        return ''

# 1. Between-subjects ANOVA and adding p-values to descriptive stats
anova_results = {}
descriptive_stats = {}

for component in components:
    formula = f'{component} ~ C(sex) + C(order) + C(sex):C(order)'  # Interaction between Sex and Order
    model = ols(formula, data=data).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    
    # Add p-value significance marks
    anova_table['p_value'] = anova_table['PR(>F)'].apply(significance_mark)
    anova_results[component] = anova_table
    
    # Descriptive statistics grouped by Sex and Order
    descriptive_grouped = data.groupby(['sex', 'order'])[component].agg(['mean', 'std', 'count']).reset_index()
    
    # Add the p-values and significance marks to descriptive stats
    p_value_sex = anova_table.loc['C(sex)', 'PR(>F)'] if 'C(sex)' in anova_table.index else None
    p_value_order = anova_table.loc['C(order)', 'PR(>F)'] if 'C(order)' in anova_table.index else None
    p_value_interaction = anova_table.loc['C(sex):C(order)', 'PR(>F)'] if 'C(sex):C(order)' in anova_table.index else None
    
    descriptive_grouped['p_value_sex'] = p_value_sex
    descriptive_grouped['p_value_order'] = p_value_order
    descriptive_grouped['p_value_interaction'] = p_value_interaction
    descriptive_grouped['significance_sex'] = significance_mark(p_value_sex) if p_value_sex else ''
    descriptive_grouped['significance_order'] = significance_mark(p_value_order) if p_value_order else ''
    descriptive_grouped['significance_interaction'] = significance_mark(p_value_interaction) if p_value_interaction else ''

    descriptive_stats[component] = descriptive_grouped

# Combine ANOVA results and descriptive statistics into a single output
anova_combined = pd.concat(anova_results, axis=1)
descriptive_combined = pd.concat(descriptive_stats, axis=1)

# Save between-subjects ANOVA and descriptive stats (with p-values) to CSV
anova_combined.to_csv('anova_between_subjects_results.csv')
descriptive_combined.to_csv('descriptive_stats_with_pvalues.csv')

# 2. Within-subjects (Repeated Measures) ANOVA for each component
within_subjects_results = {}

# Assuming each subject (mouse) is identified by 'mouse_id' and there is a column identifying the diet phase
for component in ['meal_size']:  # Replace with actual components you want to analyze within-subjects
    try:
        aovrm = AnovaRM(data, depvar=component, subject='mouse_id', within=['diet_phase'])
        rm_anova_results = aovrm.fit()
        within_subjects_results[component] = rm_anova_results.summary()
    except Exception as e:
        print(f"Could not perform repeated measures ANOVA for {component}: {e}")

# Save within-subjects ANOVA results to a file
with open('anova_within_subjects_results.txt', 'w') as f:
    for component, result in within_subjects_results.items():
        f.write(f'ANOVA Results for {component}\n')
        f.write(result.as_text())
        f.write('\n\n')




Could not perform repeated measures ANOVA for meal_size: Index(['diet_phase'], dtype='object')
