### Key Features:
- Data Loading:
Uses Pandas to read two Excel files containing batch data and metabolite data.
df holds viable cell counts, while metabolite stores metabolite information.
- Model Fitting:
Defines a function fitted_vcd to model viable cell growth over time.
Uses curve_fit to estimate the model parameters for each batch, and visualizes the actual and predicted data.
- Growth Rate Calculation:
Calculates growth rates for each batch using a defined time interval (delta_time).
Updates the batch DataFrame with a new column 'Growth Rate'.
- Specific Rate Calculation:
Implements the calculate_specific_rate function to compute specific rates of metabolites uptake/secretion.
Loops through each batch and specified time intervals, calculating specific rates for different metabolites.
- Results Storage:
Stores calculated specific rates in a structured format using dictionaries (results and cumulative_results).
Supports further analysis and export.
### Requirements:
Data Files: Excel files with specific sheets and header structures, which must align with the code.
### Usage:
- Adjust File Paths:
Update file paths (file_path and file_path_meta) and file names to point to your actual data files.
- Check Data Structure:
Ensure data files contain the necessary columns and rows, and are grouped by the appropriate batch IDs.
- Run the Code:
Execute the script to load data, fit models, calculate growth rates, and compute specific rates.
- Review Results:
Check the visualizations, results lists, and cumulative results for analysis.
- Extend the Analysis:
Use the results for further analysis, data export, or integration with other tools.

In [1]:
import pickle
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
# Read the excel file (model)
#file_path = r"C:\Users\user\Documents\DC\Manual curation_iCHO\Whole-Cell-Network-Reconstruction-for-CHO-cells_origin\Whole-Cell-Network-Reconstruction-for-CHO-cells\Data\ZeLa Data"
file_name = "ELN_Excel data sheet_Bio141 to Bio148.xlsx"
sheet_name = "All data"


#full_path = f"{file_path}/{file_name}"

df_tmp = pd.read_excel(file_name, sheet_name=sheet_name, header = 17)

df = df_tmp[['Batch ID', 'Sample ID', 'Viable Cells', 'Age (h)','Total Volume','Base Volume','Eff Feed B Volume', 'Gluc Feed Volume']]

start_row = 1
end_row = 129


viable_cells = df.iloc[start_row:end_row]  

In [None]:
# Define function: fitted_vcd model
def fitted_vcd(time_value, A, B, C, D):
    return A / (np.exp(B * time_value) + C * np.exp(-D * time_value))

# Dictionary of DataFrames grouped by Batch ID
batch_dfs = {batch_id: df_group for batch_id, df_group in df.groupby('Batch ID')}

# For each Batch ID, remove the first row, fit the model, and calculate the growth rate
for batch_id in ['U1', 'U2', 'U3', 'U4', 'U5', 'U6', 'U7', 'U8']:
    if batch_id in batch_dfs:  # Check if the Batch ID exists
        # Identify the index of the first row
        first_row_index = batch_dfs[batch_id].index[0]
        # Remove the first row
        batch_dfs[batch_id] = batch_dfs[batch_id].drop(first_row_index)
        
        # Extract valid data (excluding NaN)
        time_values = batch_dfs[batch_id]['Age (h)'].dropna().values  # Array of time values
        viable_cells = (batch_dfs[batch_id]['Viable Cells'])*(batch_dfs[batch_id]['Total Volume']-batch_dfs[batch_id]['Base Volume']-batch_dfs[batch_id]['Eff Feed B Volume']- batch_dfs[batch_id]['Gluc Feed Volume'])/batch_dfs[batch_id]['Total Volume']  # Array of cell counts
        viable_cells = viable_cells.dropna().values
        # Fit model parameters using curve_fit
        params, _ = curve_fit(fitted_vcd, time_values, viable_cells, p0=[100, 0.01, 100, 0.05])

        # Set range of time values for predictions
        time_values_fit = np.linspace(min(time_values), max(time_values), 300)

        # Calculate predictions based on the fitted model
        fitted_values = fitted_vcd(time_values_fit, *params)

        # Plot actual data and fitted model
        plt.figure(figsize=(10, 6))
        plt.scatter(time_values, viable_cells, color='blue', label='Actual Data')  # Actual data
        plt.plot(time_values_fit, fitted_values, color='red', label='Fitted Model')  # Fitted model
        plt.xlabel('Age (h)')
        plt.ylabel('Viable Cell Density')
        plt.title(f'Fit of Viable Cells over Time: {batch_id}')
        plt.legend()
        plt.grid(True)
        plt.show()

        # Calculate growth rate using delta_time
        delta_time = 12 # 0.5 for instant time point
        growth_rates = []
        growth_rates_log = []
        fitted_vcd_value = []
###--------------------------fixed part-----------------------------
        for time in time_values:
            vcd_fit = fitted_vcd(time, *params)
            final_vcd = fitted_vcd(time + delta_time/2, *params)
            fitted_vcd_value.append(vcd_fit)
            initial_vcd = fitted_vcd(time - delta_time/2, *params)
            growth_rate = (final_vcd - initial_vcd) / ((final_vcd + initial_vcd) / 2 * delta_time)
            growth_rates.append(growth_rate)
            if initial_vcd > 0:  # Ensure no division by zero
                growth_rate_log = (np.log(final_vcd) - np.log(initial_vcd)) / delta_time
                growth_rates_log.append(growth_rate_log)
            else:
                growth_rates_log.append(np.nan)  # Handle case where initialvcd is zero or negative

            print(f"at {time}==> initial_vcd:{initial_vcd}; final_vcd:{final_vcd}; delta_time:{delta_time}\n{growth_rate} growth rate")
            print(f"{growth_rate_log} growth rate_log")
###-------------------------------------------------------------------
        # Add results to DataFrame
        batch_dfs[batch_id]['Growth Rate'] = growth_rates
        # Add results to DataFrame
        batch_dfs[batch_id]['Growth Rate_R'] = growth_rate_log
        batch_dfs[batch_id]['fitted_VCD'] = fitted_vcd_value
        print(batch_dfs[batch_id]['fitted_VCD'])

In [3]:
#file_path_meta = r"C:\Users\user\Documents\DC\Manual curation_iCHO\Whole-Cell-Network-Reconstruction-for-CHO-cells_origin\Whole-Cell-Network-Reconstruction-for-CHO-cells\Data\ZeLa Data"
file_name_meta = "R_processing_ELN_Excel data sheet_Bio141 to Bio148.xlsx"
# file_name_meta = "processing_ELN_Excel data sheet_Bio141 to Bio148.xlsx"
sheet_name_meta = "Amount of uptake and secretion"
sheet_name_gr = "Growth_Rate"

#full_path_meta = f"{file_path_meta}/{file_name_meta}"

df_meta = pd.read_excel(file_name_meta, sheet_name=sheet_name_meta, header = 2)
df_gr = pd.read_excel(file_name_meta, sheet_name=sheet_name_gr, header = 0)
df_meta.columns = df_meta.columns.str.strip() # Remove trailing blank spaces from the column names

start_row = 0
end_row = 129


metabolite = df_meta.iloc[start_row:end_row]  
batch_dfs_meta = {batch_id: df_group for batch_id, df_group in metabolite.groupby('Batch ID')}
batch_dfs_raw = {batch_id: df_group for batch_id, df_group in df_gr.groupby('Batch_ID')}

In [4]:
df_gr

Unnamed: 0,Batch_ID,Sample ID,Age (h),Growth Rate,VCD
0,U1,P00,0.000000,,
1,U1,P0,0.000000,,0.000000
2,U1,P1,14.033333,,
3,U1,P2,39.250000,0.034670,0.989499
4,U1,P3,61.133333,,
...,...,...,...,...,...
123,U8,P10,232.916667,-0.004389,34.246910
124,U8,P11,256.750000,,
125,U8,P12,276.466667,-0.005894,23.214931
126,U8,P13,302.216667,,


In [5]:
def calculate_specific_rate(delta_met, growth_rate, initial_time, current_time, VC_t=None, VC_i=None, IVCD=None):
    try:
        time_interval = current_time - initial_time
        if (VC_t is not None and VC_i is not None) and IVCD is None:
            cell_delta = (VC_t + VC_i)/2*(time_interval)
        elif (VC_t is None and VC_i is None) and IVCD is not None:
            cell_delta = IVCD*24
        print(cell_delta)
        

        # Zero division handling for `cell_delta`
        if cell_delta == 0:
            return 0

        # Zero division handling for other parameters
        if time_interval == 0 or growth_rate == 0:
            return float('nan')
        SR = delta_met/ cell_delta /(200/1000)
#         SR = delta_met/ cell_delta * growth_rate/(200/1000)
        return SR
    except Exception as e:
        print(f"Error calculating specific rate: {e}")
        return float('nan')

In [6]:
intervals = [("P0", "P2"), ("P2", "P4"), ("P4", "P6"), ("P6", "P8"), ("P8", "P12"), ("P12", "P14")]
preprocessing = 'raw' # 'smoothened'

# Ensure results structures are initialized
results = []
results_for_check = []
cumulative_results = {}

# Loop through each Batch ID to calculate specific rates
for batch_id in ['U1', 'U2', 'U3', 'U4', 'U5', 'U6', 'U7', 'U8']:
    if batch_id in batch_dfs_meta:
        # Drop the first row
        first_row_index = batch_dfs_meta[batch_id].index[0]
        batch_dfs_meta[batch_id] = batch_dfs_meta[batch_id].drop(first_row_index)

        # Get the list of metabolites
        met_list = batch_dfs_meta[batch_id].columns[2:].tolist()

        # Calculate the specific rates for each interval
        for start, end in intervals:
            end_row = batch_dfs_meta[batch_id][batch_dfs_meta[batch_id]['Sample ID'] == end]
            start_row = batch_dfs_meta[batch_id][batch_dfs_meta[batch_id]['Sample ID'] == start]

            # Proceed only if both rows exist
            if not end_row.empty and not start_row.empty:
                try:
                    if preprocessing == 'smoothened':
                        # Extract necessary values
                        growth_rate = batch_dfs[batch_id][batch_dfs[batch_id]['Sample ID'] == end]['Growth Rate'].values[0]
                        VC_t = batch_dfs[batch_id][batch_dfs[batch_id]['Sample ID'] == end]['fitted_VCD'].values[0]
                        VC_i = batch_dfs[batch_id][batch_dfs[batch_id]['Sample ID'] == start]['fitted_VCD'].values[0]
                        IVCD = None
    #                     VC_t = batch_dfs[batch_id][batch_dfs[batch_id]['Sample ID'] == end]['Viable Cells'].values[0]
    #                     VC_i = batch_dfs[batch_id][batch_dfs[batch_id]['Sample ID'] == start]['Viable Cells'].values[0]
     
                        current_time = batch_dfs[batch_id][batch_dfs[batch_id]['Sample ID'] == end]['Age (h)'].values[0]
                        initial_time = batch_dfs[batch_id][batch_dfs[batch_id]['Sample ID'] == start]['Age (h)'].values[0]

                    elif preprocessing == 'raw':
                        # Extract necessary values
                        growth_rate = batch_dfs_raw[batch_id][batch_dfs_raw[batch_id]['Sample ID'] == end]['Growth Rate'].values[0]
                        IVCD = batch_dfs_raw[batch_id][batch_dfs_raw[batch_id]['Sample ID'] == end]['VCD'].values[0]
                        VC_t = None
                        VC_i = None
     
                        current_time = batch_dfs_raw[batch_id][batch_dfs_raw[batch_id]['Sample ID'] == end]['Age (h)'].values[0]
                        initial_time = batch_dfs_raw[batch_id][batch_dfs_raw[batch_id]['Sample ID'] == start]['Age (h)'].values[0]
                    
                    # Initialize cumulative results structure if missing
                    if batch_id not in cumulative_results:
                        cumulative_results[batch_id] = {}
                    if (start, end) not in cumulative_results[batch_id]:
                        cumulative_results[batch_id][(start, end)] = {}

                    # Save the growth rate in the cumulative results
                    cumulative_results[batch_id][(start, end)]['SGR'] = growth_rate
                    # Loop through each metabolite to calculate specific rates
                    results.append({
                        'Batch ID': batch_id,
                        'Interval': f"{start} to {end}",
                        'metabolite': 'SGR',
                        'Specific Rate': growth_rate
                    })
                    for met_name in met_list:
                        delta_met = end_row[met_name].values[0]

                        # Calculate specific rate
                        specific_rate = calculate_specific_rate(delta_met, growth_rate, initial_time, current_time, VC_t=VC_t, VC_i=VC_i, IVCD=IVCD)
                        # Save the results
                        results.append({
                            'Batch ID': batch_id,
                            'Interval': f"{start} to {end}",
                            'metabolite': met_name,
                            'Specific Rate': specific_rate
                        })
                        results_for_check.append({
                            'check': f"{met_name}/{delta_met}/{growth_rate}/{VC_t}/{VC_i}/{IVCD}/{initial_time}/{current_time}"
                        })

                        # Initialize cumulative results structure if missing
                        if batch_id not in cumulative_results:
                            cumulative_results[batch_id] = {}
                        if (start, end) not in cumulative_results[batch_id]:
                            cumulative_results[batch_id][(start, end)] = {}

                        # Store cumulative results
                        cumulative_results[batch_id][(start, end)][met_name] = specific_rate


                except Exception as e:
                    print(f"Error processing interval {start}-{end} for batch {batch_id}: {e}")

23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
23.747974999999997
160.02991666666665
160.02991666666665
160.02991666666665
160.02991666666665
160.02991666666665
160.02991666666665
160.02991666666665
160.02991666666665
160.02991666666665
160.02991666666665
160.02991666666665
160.02991666666665
160.02991666666665
160.02991666

In [None]:
results_for_check

In [8]:
# Convert the example results to a Pandas DataFrame
results_df = pd.DataFrame(results)

# Create a pivot table: Use Batch ID and Interval as indices and metabolites as columns
pivot_df = results_df.pivot_table(index=['Batch ID', 'Interval'], columns='metabolite', values='Specific Rate')

# Export to an Excel file
pivot_df.to_excel('specific_rates_R2.xlsx', sheet_name='Specific Rates')


In [9]:

# Assuming pivot_df is indexed by ('Batch ID', 'Interval') and the columns are various metabolites including 'SGR'
group1_batches = ['U1', 'U2', 'U3']
group2_batches = ['U4', 'U5', 'U6', 'U7', 'U8']

# Create masks for the groups
mask_group1 = pivot_df.index.get_level_values('Batch ID').isin(group1_batches)
mask_group2 = pivot_df.index.get_level_values('Batch ID').isin(group2_batches)

# Filter data for each group
data_group1 = pivot_df[mask_group1]
data_group2 = pivot_df[mask_group2]

# Calculate means for each group by Interval
# Here, we use groupby on the 'Interval' level of the index, then calculate mean
group1_averages = data_group1.groupby(level='Interval').mean()
group2_averages = data_group2.groupby(level='Interval').mean()


In [10]:
# Use ExcelWriter to add these DataFrames to an existing Excel file as new sheets
with pd.ExcelWriter('specific_rates_R.xlsx', mode='a', engine='openpyxl', if_sheet_exists='replace') as writer:
    group1_averages.to_excel(writer, sheet_name='Avg_U1_U3')
    group2_averages.to_excel(writer, sheet_name='Avg_U4_U8')

In [11]:
# Create a mapping dict to incorporate all the uptake and secretion values to reactions in our reconstruction

mapping_dict = {
    "3-hydroxybutyric acid": "EX_bhb_e",
    "NH4 (Met)": "EX_nh4_e",
    "acetic acid": "EX_ac_e",
    "alanine": "EX_ala_L_e",
    "arginine": "EX_arg_L_e",
    "asparagine": "EX_asn_L_e",
    "aspartic acid": "EX_asp_L_e",
    "butyric & 2-hydroxy- butyric acids": "EX_2hb_e",
    "citric acid": "EX_cit_e",
    "cystine": "EX_cys_L_e",
    "ethanol": "EX_etoh_e",
    "formic acid": "EX_for_e",
    "fumaric acid": "EX_fum_e",
    "glucose": "EX_glc_e",
    "glutamic acid": "EX_glu_L_e",
    "glutamine": "EX_gln_L_e",
    "glycine": "EX_gly_e",
    "histidine": "EX_his_L_e",
    "hydroxyproline": "EX_4hpro_e",
    "isoleucine": "EX_ile_L_e",
    "isovaleric acid": None,
    "lactic acid": "EX_lac_L_e",
    "leucine": "EX_leu_L_e",
    "lysine": "EX_lys_L_e",
    "malic acid": "EX_mal_L_e",
    "methionine": "EX_met_L_e",
    "phenylalanine": "EX_phe_L_e",
    "proline": "EX_pro_L_e",
    "pyroglutamic acid": "EX_5oxpro_e",
    "pyruvic acid": "EX_pyr_e",
    "serine": "EX_ser_L_e",
    "threonine": "EX_thr_L_e",
    "tryptophan": "EX_trp_L_e",
    "tyrosine": "EX_tyr_L_e",
    "valine": "EX_val_L_e",
    "SGR": "exp_growth_rate"
}

In [None]:
# Create a mapping dict to incorporate all the uptake and secretion values to reactions in our reconstruction
# Using glucose value from metabolite analyzer
mapping_dict = {
    "3-hydroxybutyric acid": "EX_bhb_e",
    "acetic acid": "EX_ac_e",
    "alanine": "EX_ala_L_e",
    "arginine": "EX_arg_L_e",
    "asparagine": "EX_asn_L_e",
    "aspartic acid": "EX_asp_L_e",
    "butyric & 2-hydroxy- butyric acids": "EX_2hb_e",
    "citric acid": "EX_cit_e",
    "cystine": "EX_cys_L_e",
    "ethanol": "EX_etoh_e",
    "formic acid": "EX_for_e",
    "fumaric acid": "EX_fum_e",
#     "glucose": "EX_glc_e",
    "glutamic acid": "EX_glu_L_e",
    "glutamine": "EX_gln_L_e",
    "glycine": "EX_gly_e",
    "histidine": "EX_his_L_e",
    "hydroxyproline": "EX_4hpro_e",
    "isoleucine": "EX_ile_L_e",
    "isovaleric acid": None,
    "lactic acid": "EX_lac_L_e",
    "leucine": "EX_leu_L_e",
    "lysine": "EX_lys_L_e",
    "malic acid": "EX_mal_L_e",
    "methionine": "EX_met_L_e",
    "phenylalanine": "EX_phe_L_e",
    "proline": "EX_pro_L_e",
    "pyroglutamic acid": "EX_5oxpro_e",
    "pyruvic acid": "EX_pyr_e",
    "serine": "EX_ser_L_e",
    "threonine": "EX_thr_L_e",
    "tryptophan": "EX_trp_L_e",
    "tyrosine": "EX_tyr_L_e",
    "valine": "EX_val_L_e",
    "SGR": "exp_growth_rate",
    "Glucose (Met)": "EX_glc_e",
#     "Lactate (Met)": "EX_lac_L_e",
#     "Glutamine (Met)": "EX_gln_L_e",
#     "Glutamate (Met)": "EX_glu_L_e",
    "NH4 (Met)": "EX_nh4_e",
}

In [None]:
# Create a mapping dict to incorporate all the uptake and secretion values to reactions in our reconstruction
# Using glucose, lactate, glutamine and glutamate values from metabolite analyzer
mapping_dict = {
    "3-hydroxybutyric acid": "EX_bhb_e",
    "acetic acid": "EX_ac_e",
    "alanine": "EX_ala_L_e",
    "arginine": "EX_arg_L_e",
    "asparagine": "EX_asn_L_e",
    "aspartic acid": "EX_asp_L_e",
    "butyric & 2-hydroxy- butyric acids": "EX_2hb_e",
    "citric acid": "EX_cit_e",
    "cystine": "EX_cys_L_e",
    "ethanol": "EX_etoh_e",
    "formic acid": "EX_for_e",
    "fumaric acid": "EX_fum_e",
#     "glucose": "EX_glc_e",
#     "glutamic acid": "EX_glu_L_e",
#     "glutamine": "EX_gln_L_e",
    "glycine": "EX_gly_e",
    "histidine": "EX_his_L_e",
    "hydroxyproline": "EX_4hpro_e",
    "isoleucine": "EX_ile_L_e",
    "isovaleric acid": None,
#     "lactic acid": "EX_lac_L_e",
    "leucine": "EX_leu_L_e",
    "lysine": "EX_lys_L_e",
    "malic acid": "EX_mal_L_e",
    "methionine": "EX_met_L_e",
    "phenylalanine": "EX_phe_L_e",
    "proline": "EX_pro_L_e",
    "pyroglutamic acid": "EX_5oxpro_e",
    "pyruvic acid": "EX_pyr_e",
    "serine": "EX_ser_L_e",
    "threonine": "EX_thr_L_e",
    "tryptophan": "EX_trp_L_e",
    "tyrosine": "EX_tyr_L_e",
    "valine": "EX_val_L_e",
    "SGR": "exp_growth_rate",
    "Glucose (Met)": "EX_glc_e",,
    "Lactate (Met)": "EX_lac_L_e",
    "Glutamine (Met)": "EX_gln_L_e",
    "Glutamate (Met)": "EX_glu_L_e",
    "NH4 (Met)": "EX_nh4_e",
}

In [None]:
# Create a new dictionary with the name of the reactions as keys, times as subkeys and uptake and secretion rates as values, save as python dict

########################
## ------- WT -------##
#######################

# Raw Results
uptake_secretion_raw_wt_dict = {}
for original_col, new_col in mapping_dict.items():
    if new_col is not None and original_col in data_group1.columns:
        uptake_secretion_raw_wt_dict[new_col] = data_group1[original_col].to_dict()
# Save the dictionary to a file
# with open('../Uptake_Secretion_Rates/uptake_secretion_raw_wt_dict.pkl', 'wb') as file:
with open('../Uptake_Secretion_Rates/R_uptake_secretion_raw_wt_dict.pkl', 'wb') as file:
    pickle.dump(uptake_secretion_raw_wt_dict, file)

# Mean
uptake_secretion_mean_wt_dict = {}
for original_col, new_col in mapping_dict.items():
    if new_col is not None and original_col in group1_averages.columns:
        uptake_secretion_mean_wt_dict[new_col] = group1_averages[original_col].to_dict()
# Save the dictionary to a file
# with open('../Uptake_Secretion_Rates/uptake_secretion_mean_wt_dict.pkl', 'wb') as file:
with open('../Uptake_Secretion_Rates/R_uptake_secretion_mean_wt_dict.pkl', 'wb') as file:
    pickle.dump(uptake_secretion_mean_wt_dict, file)


##########################
## ------- ZeLa -------##
#########################

# Raw Results
uptake_secretion_raw_zela_dict = {}
for original_col, new_col in mapping_dict.items():
    if new_col is not None and original_col in data_group2.columns:
        uptake_secretion_raw_zela_dict[new_col] = data_group2[original_col].to_dict()
# Save the dictionary to a file
# with open('../Uptake_Secretion_Rates/uptake_secretion_raw_zela_dict.pkl', 'wb') as file:
with open('../Uptake_Secretion_Rates/R_uptake_secretion_raw_zela_dict.pkl', 'wb') as file:
    pickle.dump(uptake_secretion_raw_zela_dict, file)

uptake_secretion_mean_zela_dict = {}
for original_col, new_col in mapping_dict.items():
    if new_col is not None and original_col in group2_averages.columns:
        uptake_secretion_mean_zela_dict[new_col] = group2_averages[original_col].to_dict()
# with open('../Uptake_Secretion_Rates/uptake_secretion_mean_zela_dict.pkl', 'wb') as file:
with open('../Uptake_Secretion_Rates/R_uptake_secretion_mean_zela_dict.pkl', 'wb') as file:
    pickle.dump(uptake_secretion_mean_zela_dict, file)

In [None]:
uptake_secretion_raw_wt_dict

In [None]:
# Save as CSV file

########################
## ------- WT -------##
#######################

# data_group1.drop(columns=['isovaleric acid'], inplace=True)
data_group1.rename(columns=mapping_dict, inplace=True)
# data_group1.to_csv('../Uptake_Secretion_Rates/uptake_secretion_rates_wt.csv')
data_group1.to_csv('../Uptake_Secretion_Rates/R_uptake_secretion_rates_wt.csv')


##########################
## ------- ZeLa -------##
#########################

# data_group2.drop(columns=['isovaleric acid'], inplace=True)
data_group2.rename(columns=mapping_dict, inplace=True)
# data_group2.to_csv('../Uptake_Secretion_Rates/uptake_secretion_rates_zela.csv')
data_group2.to_csv('../Uptake_Secretion_Rates/R_uptake_secretion_rates_zela.csv')

### Generate Uptake and Secretion Rate Intervals
Here we calculate the mean and the std for each utake and secretion rate. We then generate intervals going from mean-std to mean+std for each metabolite. We then save the results in the 'specific_rates_R.xlsx' in new sheets called 'Int_U1_U3' and 'Int_U4_U8'

In [None]:
# Assuming pivot_df is indexed by ('Batch ID', 'Interval') and the columns are various metabolites including 'SGR'
group1_batches = ['U1', 'U2', 'U3']
group2_batches = ['U4', 'U5', 'U6', 'U7', 'U8']

# Create masks for the groups
mask_group1 = pivot_df.index.get_level_values('Batch ID').isin(group1_batches)
mask_group2 = pivot_df.index.get_level_values('Batch ID').isin(group2_batches)

# Filter data for each group
data_group1 = pivot_df[mask_group1]
data_group2 = pivot_df[mask_group2]

# Calculate means and standard deviations for each group by Interval
group1_stats = data_group1.groupby(level='Interval').agg(['mean', 'std'])
group2_stats = data_group2.groupby(level='Interval').agg(['mean', 'std'])


def create_interval_df(group_stats):
    mean_df = group_stats.xs('mean', axis=1, level=1)
    std_df = group_stats.xs('std', axis=1, level=1)
    lower_bound_df = mean_df - (1*std_df)
    upper_bound_df = mean_df + (1*std_df)
    
    interval_df = mean_df.copy()  # Create a DataFrame to hold the intervals with the same index
    for col in mean_df.columns:
        interval_df[f'{col}'] = list(zip(lower_bound_df[col], upper_bound_df[col]))
    return interval_df

# Create intervals DataFrame for each group
group1_intervals_df = create_interval_df(group1_stats)
group2_intervals_df = create_interval_df(group2_stats)

In [None]:
# Use ExcelWriter to add these DataFrames to an existing Excel file as new sheets
with pd.ExcelWriter('specific_rates_R2.xlsx', mode='a', engine='openpyxl', if_sheet_exists='replace') as writer:
    group1_intervals_df.to_excel(writer, sheet_name='Int_U1_U3')
    group2_intervals_df.to_excel(writer, sheet_name='Int_U4_U8')

In [None]:
# Create a new dictionary with the name of the reactions as keys, times as subkeys and uptake and secretion rates as values

# WT
uptake_secretion_intrvl_wt_dict = {}
for original_col, new_col in mapping_dict.items():
    if new_col is not None and original_col in group1_intervals_df.columns:
        uptake_secretion_intrvl_wt_dict[new_col] = group1_intervals_df[original_col].to_dict()
# Save the dictionary to a file
with open('../Uptake_Secretion_Rates/uptake_secretion_intrvl_wt_dict.pkl', 'wb') as file:
    pickle.dump(uptake_secretion_intrvl_wt_dict, file)

# ZeLa
uptake_secretion_intrvl_zela_dict = {}
for original_col, new_col in mapping_dict.items():
    if new_col is not None and original_col in group2_intervals_df.columns:
        uptake_secretion_intrvl_zela_dict[new_col] = group2_intervals_df[original_col].to_dict()
with open('../Uptake_Secretion_Rates/uptake_secretion_intrvl_zela_dict.pkl', 'wb') as file:
    pickle.dump(uptake_secretion_intrvl_zela_dict, file)

In [None]:
import pickle
import scipy.io

with open('../Uptake_Secretion_Rates/uptake_secretion_intrvl_wt_dict.pkl', 'rb') as file:
    uptake_secretion_intrvl_wt_dict = pickle.load(file)

with open('../Uptake_Secretion_Rates/uptake_secretion_intrvl_zela_dict.pkl', 'rb') as file:
    uptake_secretion_intrvl_zela_dict = pickle.load(file)

scipy.io.savemat('../Uptake_Secretion_Rates/uptake_secretion_wt.mat', uptake_secretion_intrvl_wt_dict)
scipy.io.savemat('../Uptake_Secretion_Rates/uptake_secretion_zela.mat', uptake_secretion_intrvl_zela_dict)


In [None]:
file_path = '../Uptake_Secretion_Rates/uptake_secretion_intrvl_wt_dict.pkl'

# File open and load
with open(file_path, 'rb') as file:
    data = pickle.load(file)

In [None]:
print(type(data))

if isinstance(data, dict):
    for key, value in data.items():
        print(f"Key: {key}")
        print(f"Value: {value}")
        