<a href="https://colab.research.google.com/github/Palaeoprot/IPA/blob/main/racemization_simulator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

def initialize_simulation_state(real_DL, params):
    """Initialize the simulation state based on real data and parameters."""
    initial_state = {}
    total_aa = 0
    for aa in ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']:
        aa_count = (real_DL[f'{aa}_Conc_THAA_Mean'].iloc[0] * params['N']).round().astype(int)
        total_aa += aa_count
        dl_ratio = real_DL[f'{aa}_D/L_THAA_Mean'].iloc[0]

        d_count = int(aa_count * dl_ratio / (1 + dl_ratio))
        l_count = aa_count - d_count

        initial_state[f'L_int_{aa}'] = int(l_count * params['initial_int_ratio'])
        initial_state[f'D_int_{aa}'] = int(d_count * params['initial_int_ratio'])
        initial_state[f'L_term_{aa}'] = int(l_count * params['initial_term_ratio'])
        initial_state[f'D_term_{aa}'] = int(d_count * params['initial_term_ratio'])
        initial_state[f'L_free_{aa}'] = int(l_count * params['initial_free_ratio'])
        initial_state[f'D_free_{aa}'] = int(d_count * params['initial_free_ratio'])

    initial_state['water'] = int(total_aa * params['fold_water'])
    return initial_state


def initialize_simulation_state(real_DL, params):
    """Initialize the simulation state based on real data and parameters."""
    initial_state = {}
    total_aa = 0
    for aa in ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']:
        aa_count = (real_DL[f'{aa}_Conc_THAA_Mean'].iloc[0] * params['N']).round().astype(int)
        total_aa += aa_count
        dl_ratio = real_DL[f'{aa}_D/L_THAA_Mean'].iloc[0]

        d_count = int(aa_count * dl_ratio / (1 + dl_ratio))
        l_count = aa_count - d_count

        initial_state[f'L_int_{aa}'] = int(l_count * params['initial_int_ratio'])
        initial_state[f'D_int_{aa}'] = int(d_count * params['initial_int_ratio'])
        initial_state[f'L_term_{aa}'] = int(l_count * params['initial_term_ratio'])
        initial_state[f'D_term_{aa}'] = int(d_count * params['initial_term_ratio'])
        initial_state[f'L_free_{aa}'] = int(l_count * params['initial_free_ratio'])
        initial_state[f'D_free_{aa}'] = int(d_count * params['initial_free_ratio'])

    initial_state['water'] = int(total_aa * params['fold_water'])
    return initial_state

def calculate_racemization(real_DL, amino_acids):
    """Calculate racemization rates."""
    racemization_rates = pd.DataFrame(index=real_DL.index)
    for aa in amino_acids:
        racemization_rates[f'{aa}_D/L_Ratio'] = real_DL[f'{aa}_D/L_Total']
    return racemization_rates

def get_initial_state(real_DL, params):
    """Calculate initial state based on real_DL data."""
    initial_state = {}
    total_aa = 0
    for aa in ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']:
        aa_count = (real_DL[f'{aa}_Conc_THAA_Mean'].iloc[0] * params['N']).round().astype(int)
        total_aa += aa_count
        dl_ratio = real_DL[f'{aa}_D/L_THAA_Mean'].iloc[0]

        d_count = int(aa_count * dl_ratio / (1 + dl_ratio))
        l_count = aa_count - d_count

        initial_state[f'L_int_{aa}'] = int(l_count * 0.9)
        initial_state[f'D_int_{aa}'] = int(d_count * 0.9)
        initial_state[f'L_term_{aa}'] = int(l_count * 0.09)
        initial_state[f'D_term_{aa}'] = int(d_count * 0.09)
        initial_state[f'L_free_{aa}'] = int(l_count * 0.01)
        initial_state[f'D_free_{aa}'] = int(d_count * 0.01)

    initial_state['water'] = int(total_aa * params['fold_water'])
    return initial_state

def simulate_racemization(initial_state, params, water_input):
    results = {key: [value] for key, value in initial_state.items()}
    results['time'] = [0]
    state = initial_state.copy()

    for t in range(1, params['num_intervals'] + 1):
        # Add water from dehydration
        state['water'] += water_input[t-1]

        # Simulate hydrolysis and racemization
        for aa in ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']:
            # Internal to terminal hydrolysis
            total_int = state[f'L_int_{aa}'] + state[f'D_int_{aa}']
            if total_int > 0 and state['water'] > 0:
                water_conc = state['water'] / (sum(state[key] for key in state if key.startswith('L_') or key.startswith('D_')) * params['fold_water'])
                hydrolyzed_fast = np.random.binomial(int(total_int * (1 - params['slow_internal_hydrolysis_fraction'])), params['k_internal'] * water_conc)
                hydrolyzed_slow = np.random.binomial(int(total_int * params['slow_internal_hydrolysis_fraction']), params['slow_internal_hydrolysis_rate'] * water_conc)
                hydrolyzed = hydrolyzed_fast + hydrolyzed_slow
                for chirality in ['L', 'D']:
                    moved = int(hydrolyzed * (state[f'{chirality}_int_{aa}'] / total_int))
                    state[f'{chirality}_int_{aa}'] -= moved
                    state[f'{chirality}_term_{aa}'] += moved
                state['water'] -= hydrolyzed

            # Terminal to free hydrolysis
            total_term = state[f'L_term_{aa}'] + state[f'D_term_{aa}']
            if total_term > 0 and state['water'] > 0:
                hydrolyzed = np.random.binomial(total_term, params['k_terminal'] * water_conc)
                for chirality in ['L', 'D']:
                    moved = int(hydrolyzed * (state[f'{chirality}_term_{aa}'] / total_term))
                    state[f'{chirality}_term_{aa}'] -= moved
                    state[f'{chirality}_free_{aa}'] += moved
                state['water'] -= hydrolyzed

            # Racemization
            for location, rate in [('int', params['racemization_rate_polymer']),
                                   ('term', params['racemization_rate_terminal']),
                                   ('free', params['racemization_rate_free'])]:
                if location in ['int', 'term']:
                    fast_fraction = 1 - params['slow_racemization_rate_BAA_fraction']
                    slow_rate = params['slow_racemization_rate_BAA_rate']
                else:
                    fast_fraction = 1 - params['slow_racemization_rate_FAA_fraction']
                    slow_rate = params['slow_racemization_rate_FAA_rate']

                for source, target in [('L', 'D'), ('D', 'L')]:
                    total = state[f'{source}_{location}_{aa}']
                    racemized_fast = np.random.binomial(int(total * fast_fraction), rate)
                    racemized_slow = np.random.binomial(int(total * (1 - fast_fraction)), slow_rate)
                    racemized = racemized_fast + racemized_slow
                    state[f'{source}_{location}_{aa}'] -= racemized
                    state[f'{target}_{location}_{aa}'] += racemized

            # Loss of free amino acids
            for chirality in ['L', 'D']:
                lost = np.random.binomial(state[f'{chirality}_free_{aa}'], params['k_loss'])
                state[f'{chirality}_free_{aa}'] -= lost

        for key, value in state.items():
            results[key].append(value)
        results['time'].append(t * params['user_defined_max_time'] / params['num_intervals'])

    return results

def run_racemization_analysis(real_DL, amino_acid_colors, water_input, params):
    amino_acids = ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']
    racemization_rates = calculate_racemization(real_DL, amino_acids)

    # Set up initial state
    initial_state = get_initial_state(real_DL, params)

    # Run simulation
    simulation_results = simulate_racemization(initial_state, params, water_input)

    # Plot results
    plot_racemization_results(real_DL, racemization_rates, amino_acid_colors)
    plot_simulation_results(simulation_results, amino_acid_colors)

    return racemization_rates, simulation_results

In [None]:
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt

# def calculate_racemization(real_DL, amino_acids):
#     """Calculate racemization rates."""
#     racemization_rates = pd.DataFrame(index=real_DL.index)
#     for aa in amino_acids:
#         racemization_rates[f'{aa}_D/L_Ratio'] = real_DL[f'{aa}_D/L_Total']
#     return racemization_rates

# def get_initial_state(real_DL, N):
#     """Calculate initial state based on real_DL data."""
#     initial_state = {}
#     total_aa = 0
#     for aa in ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']:
#         aa_count = (real_DL[f'{aa}_Conc_THAA_Mean'].iloc[0] * N).round().astype(int)
#         total_aa += aa_count
#         dl_ratio = real_DL[f'{aa}_D/L_THAA_Mean'].iloc[0]

#         d_count = int(aa_count * dl_ratio / (1 + dl_ratio))
#         l_count = aa_count - d_count

#         initial_state[f'L_int_{aa}'] = int(l_count * 0.9)
#         initial_state[f'D_int_{aa}'] = int(d_count * 0.9)
#         initial_state[f'L_term_{aa}'] = int(l_count * 0.09)
#         initial_state[f'D_term_{aa}'] = int(d_count * 0.09)
#         initial_state[f'L_free_{aa}'] = int(l_count * 0.01)
#         initial_state[f'D_free_{aa}'] = int(d_count * 0.01)

#     initial_state['water'] = int(total_aa * fold_water)
#     return initial_state

# def simulate_racemization(initial_state, rates, time_steps, N, fold_water, water_input):
#     results = {key: [value] for key, value in initial_state.items()}
#     results['time'] = [0]
#     state = initial_state.copy()

#     for t in range(1, time_steps + 1):
#         # Add water from dehydration
#         state['water'] += water_input[t-1]

#         # Simulate hydrolysis
#         for aa in ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']:
#             for source, target in [('int', 'term'), ('term', 'free')]:
#                 total = state[f'L_{source}_{aa}'] + state[f'D_{source}_{aa}']
#                 if total > 0 and state['water'] > 0:
#                     water_conc = state['water'] / (sum(state[key] for key in state if key.startswith('L_') or key.startswith('D_')) * fold_water)
#                     effective_rate = rates[f'k_{source}'] * water_conc
#                     hydrolyzed = np.random.binomial(total, effective_rate)
#                     for chirality in ['L', 'D']:
#                         moved = int(hydrolyzed * (state[f'{chirality}_{source}_{aa}'] / total))
#                         state[f'{chirality}_{source}_{aa}'] -= moved
#                         state[f'{chirality}_{target}_{aa}'] += moved
#                     state['water'] -= hydrolyzed

#         # Simulate racemization
#         for aa in ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']:
#             for location in ['int', 'term', 'free']:
#                 rate = rates[f'racemization_rate_{location}']
#                 for source, target in [('L', 'D'), ('D', 'L')]:
#                     racemized = np.random.binomial(state[f'{source}_{location}_{aa}'], rate)
#                     state[f'{source}_{location}_{aa}'] -= racemized
#                     state[f'{target}_{location}_{aa}'] += racemized

#         # Simulate loss and extra reactions
#         for aa in ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']:
#             for chirality in ['L', 'D']:
#                 lost = np.random.binomial(state[f'{chirality}_free_{aa}'], rates['k_loss'])
#                 extra_reacted = np.random.binomial(state[f'{chirality}_free_{aa}'], rates['k_free'])
#                 state[f'{chirality}_free_{aa}'] -= (lost + extra_reacted)

#         for key, value in state.items():
#             results[key].append(value)
#         results['time'].append(t)

#     return results


# def plot_simulation_results(results, ratios):
#     """Plot simulation results and D/L ratios."""
#     fig, axs = plt.subplots(2, 3, figsize=(18, 12))

#     # Plot internal amino acids
#     ax = axs[0, 0]
#     ax.plot(results['time'], np.array(results['L_int']) + np.array(results['L_term']), label='Internal L')
#     ax.plot(results['time'], np.array(results['D_int']) + np.array(results['D_term']), label='Internal D')
#     ax.set_xlabel('Time')
#     ax.set_ylabel('Number of Amino Acids')
#     ax.set_title('Internal Amino Acids')
#     ax.legend()

#     # Plot terminal amino acids
#     ax = axs[0, 1]
#     ax.plot(results['time'], results['L_term'], label='Terminal L')
#     ax.plot(results['time'], results['D_term'], label='Terminal D')
#     ax.set_xlabel('Time')
#     ax.set_ylabel('Number of Amino Acids')
#     ax.set_title('Terminal Amino Acids')
#     ax.legend()

#     # Plot free amino acids
#     ax = axs[0, 2]
#     ax.plot(results['time'], results['L_free'], label='Free L')
#     ax.plot(results['time'], results['D_free'], label='Free D')
#     ax.set_xlabel('Time')
#     ax.set_ylabel('Number of Amino Acids')
#     ax.set_title('Free Amino Acids')
#     ax.legend()

#     # Plot D/L ratios
#     ax = axs[1, 0]
#     ax.plot(results['time'], ratios['int_D_L_ratio'], label='Internal')
#     ax.plot(results['time'], ratios['term_D_L_ratio'], label='Terminal')
#     ax.plot(results['time'], ratios['free_D_L_ratio'], label='Free')
#     ax.plot(results['time'], ratios['overall_D_L_ratio'], label='Overall', linewidth=2)
#     ax.set_xlabel('Time')
#     ax.set_ylabel('D/L Ratio')
#     ax.set_title('D/L Ratios')
#     ax.legend()

#     # Plot water consumption
#     ax = axs[1, 1]
#     ax.plot(results['time'], results['water'], label='Water Molecules')
#     ax.set_xlabel('Time')
#     ax.set_ylabel('Number of Water Molecules')
#     ax.set_title('Water Consumption')
#     ax.legend()

#     # Plot total amino acids
#     ax = axs[1, 2]
#     total_amino_acids = (np.array(results['L_int']) + np.array(results['D_int']) +
#                          np.array(results['L_term']) + np.array(results['D_term']) +
#                          np.array(results['L_free']) + np.array(results['D_free']))
#     ax.plot(results['time'], total_amino_acids, label='Total Amino Acids')
#     ax.set_xlabel('Time')
#     ax.set_ylabel('Number of Amino Acids')
#     ax.set_title('Total Amino Acids')
#     ax.legend()

#     plt.tight_layout()
#     plt.show()


# def plot_simulation_results(simulation_results):
#     # ... (update this function to plot results for each amino acid)

# def run_racemization_analysis(real_DL, amino_acid_colors, water_input, N, fold_water):
#     amino_acids = ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']
#     racemization_rates = calculate_racemization(real_DL, amino_acids)

#     # Set up simulation parameters
#     initial_state = get_initial_state(real_DL, N)
#     rates = {
#         'k_int': 0.01, 'k_term': 0.02, 'k_free': 0.03, 'k_loss': 0.001,
#         'racemization_rate_int': 0.001, 'racemization_rate_term': 0.002, 'racemization_rate_free': 0.003
#     }
#     time_steps = len(water_input)

#     # Run simulation
#     simulation_results = simulate_racemization(initial_state, rates, time_steps, N, fold_water, water_input)

#     # Plot results
#     plot_racemization_results(real_DL, racemization_rates, amino_acid_colors)
#     plot_simulation_results(simulation_results)

#     return racemization_rates, simulation_results

In [None]:
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# import sys
# sys.path.append('/content/drive/MyDrive/Colab_Notebooks/MoDuLAAR')

# def calculate_total_amino_acids(real_DL, amino_acids):
#     """Calculate total amino acids by summing FAA and BAA."""
#     for aa in amino_acids:
#         real_DL[f'{aa}_Conc_Total'] = real_DL[f'{aa}_Conc_FAA_Mean'] + real_DL[f'{aa}_Conc_BAA_Mean']
#         real_DL[f'{aa}_D/L_Total'] = (real_DL[f'{aa}_Conc_FAA_Mean'] * real_DL[f'{aa}_D/L_FAA_Mean'] +
#                                       real_DL[f'{aa}_Conc_BAA_Mean'] * real_DL[f'{aa}_D/L_BAA_Mean']) / real_DL[f'{aa}_Conc_Total']
#     return real_DL

# def calculate_racemization(real_DL, amino_acids):
#     """Calculate racemization rates."""
#     racemization_rates = pd.DataFrame(index=real_DL.index)
#     for aa in amino_acids:
#         racemization_rates[f'{aa}_D/L_Ratio'] = real_DL[f'{aa}_D/L_Total']
#     return racemization_rates

# def plot_racemization_results(real_DL, racemization_rates, amino_acid_colors):
#     """Plot racemization results with error bars and experimental data."""
#     temperatures = real_DL['temp (K)'].unique()
#     temperatures_c = temperatures - 273.15
#     real_DL_grouped = real_DL.groupby('temp (K)')

#     for temp_k, temp_c in zip(temperatures, temperatures_c):
#         data = real_DL_grouped.get_group(temp_k)
#         fig, ax = plt.subplots(figsize=(12, 8))

#         for aa in amino_acid_colors.keys():
#             if f'{aa}_D/L_Ratio' in racemization_rates.columns:
#                 # Plot calculated ratios
#                 ax.plot(data['time'], racemization_rates.loc[data.index, f'{aa}_D/L_Ratio'],
#                         label=f'{aa} (calc)', color=amino_acid_colors[aa])

#                 # Plot experimental data with error bars
#                 ax.errorbar(data['time'], data[f'{aa}_D/L_THAA_Mean'],
#                             yerr=data[f'{aa}_D/L_THAA_Std'], fmt='o',
#                             label=f'{aa} (exp)', color=amino_acid_colors[aa], alpha=0.5)

#         ax.set_xlabel('Time (hours)')
#         ax.set_ylabel('D/L Ratio')
#         ax.set_title(f'Racemization Over Time ({temp_c:.1f} °C)')
#         ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
#         ax.set_xscale('log')

#         plt.tight_layout()
#         plt.show()


# def second_order_hydrolysis(peptide_conc, water_conc, rate_constant, dt):
#     """Calculate the amount of hydrolysis based on second-order reaction kinetics."""
#     effective_rate = rate_constant * water_conc * dt
#     return peptide_conc * effective_rate / (1 + effective_rate)


# def simulate_racemization(initial_state, rates, time_steps, N, fold_water, water_input):
#     results = {key: [value] for key, value in initial_state.items()}
#     results['time'] = [0]
#     state = initial_state.copy()

#     for t in range(1, time_steps + 1):
#         # Add water from dehydration
#         state['water'] += water_input[t-1]


# def simulate_step(state, rates, N, fold_water):
#     """Simulate one time step of hydrolysis, racemization, and loss."""
#     new_state = state.copy()
#     for source, target in [('int', 'term'), ('term', 'free')]:
#         total = state[f'L_{source}'] + state[f'D_{source}']
#         if total > 0 and state['water'] > 0:
#             water_conc = state['water'] / (N * fold_water)
#             effective_rate = rates[f'k_{source}'] * water_conc
#             hydrolyzed = np.random.binomial(total, effective_rate)
#             for chirality in ['L', 'D']:
#                 moved = int(hydrolyzed * (state[f'{chirality}_{source}'] / total))
#                 new_state[f'{chirality}_{source}'] -= moved
#                 new_state[f'{chirality}_{target}'] += moved
#             new_state['water'] -= hydrolyzed
#     for location in ['int', 'term', 'free']:
#         rate = rates[f'racemization_rate_{location}']
#         for source, target in [('L', 'D'), ('D', 'L')]:
#             racemized = np.random.binomial(state[f'{source}_{location}'], rate)
#             new_state[f'{source}_{location}'] -= racemized
#             new_state[f'{target}_{location}'] += racemized
#     for chirality in ['L', 'D']:
#         lost = np.random.binomial(state[f'{chirality}_free'], rates['k_loss'])
#         extra_reacted = np.random.binomial(state[f'{chirality}_free'], rates['k_free'])
#         new_state[f'{chirality}_free'] -= (lost + extra_reacted)
#     return new_state


#     return racemization_rates, simulation_results

# def run_simulation(initial_state, rates, time_steps, N, fold_water):
#     """Run the full simulation."""
#     results = {key: [value] for key, value in initial_state.items()}
#     results['time'] = [0]
#     state = initial_state.copy()
#     for t in range(1, time_steps + 1):
#         state = simulate_step(state, rates, N, fold_water)
#         for key, value in state.items():
#             results[key].append(value)
#         results['time'].append(t)
#     return results

# def calculate_ratios(results):
#     """Calculate D/L ratios for each amino acid state and overall."""
#     ratios = {}
#     locations = ['int', 'term', 'free']
#     for location in locations:
#         L = np.array(results[f'L_{location}'])
#         D = np.array(results[f'D_{location}'])
#         ratios[f'{location}_D_L_ratio'] = np.divide(D, L, where=L != 0)
#     total_L = sum(np.array(results[f'L_{loc}']) for loc in locations)
#     total_D = sum(np.array(results[f'D_{loc}']) for loc in locations)
#     ratios['overall_D_L_ratio'] = np.divide(total_D, total_L, where=total_L != 0)
#     return ratios

# def plot_simulation_results(results, ratios):
#     """Plot simulation results and D/L ratios."""
#     fig, axs = plt.subplots(2, 3, figsize=(18, 12))

#     # Plot internal amino acids
#     ax = axs[0, 0]
#     ax.plot(results['time'], np.array(results['L_int']) + np.array(results['L_term']), label='Internal L')
#     ax.plot(results['time'], np.array(results['D_int']) + np.array(results['D_term']), label='Internal D')
#     ax.set_xlabel('Time')
#     ax.set_ylabel('Number of Amino Acids')
#     ax.set_title('Internal Amino Acids')
#     ax.legend()

#     # Plot terminal amino acids
#     ax = axs[0, 1]
#     ax.plot(results['time'], results['L_term'], label='Terminal L')
#     ax.plot(results['time'], results['D_term'], label='Terminal D')
#     ax.set_xlabel('Time')
#     ax.set_ylabel('Number of Amino Acids')
#     ax.set_title('Terminal Amino Acids')
#     ax.legend()

#     # Plot free amino acids
#     ax = axs[0, 2]
#     ax.plot(results['time'], results['L_free'], label='Free L')
#     ax.plot(results['time'], results['D_free'], label='Free D')
#     ax.set_xlabel('Time')
#     ax.set_ylabel('Number of Amino Acids')
#     ax.set_title('Free Amino Acids')
#     ax.legend()

#     # Plot D/L ratios
#     ax = axs[1, 0]
#     ax.plot(results['time'], ratios['int_D_L_ratio'], label='Internal')
#     ax.plot(results['time'], ratios['term_D_L_ratio'], label='Terminal')
#     ax.plot(results['time'], ratios['free_D_L_ratio'], label='Free')
#     ax.plot(results['time'], ratios['overall_D_L_ratio'], label='Overall', linewidth=2)
#     ax.set_xlabel('Time')
#     ax.set_ylabel('D/L Ratio')
#     ax.set_title('D/L Ratios')
#     ax.legend()

#     # Plot water consumption
#     ax = axs[1, 1]
#     ax.plot(results['time'], results['water'], label='Water Molecules')
#     ax.set_xlabel('Time')
#     ax.set_ylabel('Number of Water Molecules')
#     ax.set_title('Water Consumption')
#     ax.legend()

#     # Plot total amino acids
#     ax = axs[1, 2]
#     total_amino_acids = (np.array(results['L_int']) + np.array(results['D_int']) +
#                          np.array(results['L_term']) + np.array(results['D_term']) +
#                          np.array(results['L_free']) + np.array(results['D_free']))
#     ax.plot(results['time'], total_amino_acids, label='Total Amino Acids')
#     ax.set_xlabel('Time')
#     ax.set_ylabel('Number of Amino Acids')
#     ax.set_title('Total Amino Acids')
#     ax.legend()

#     plt.tight_layout()
#     plt.show()


# def run_racemization_analysis(real_DL, amino_acid_colors, water_input):
#     # ... (keep existing code for calculating racemization rates)

#     # Set up simulation parameters



# def run_racemization_analysis(input_data_path, output_data_path, amino_acid_colors):
#     """Run racemization analysis on the input data and save the results."""
#     # Load input data
#     real_DL = pd.read_csv(input_data_path)

#     # Calculate total amino acids and D/L ratios
#     amino_acids = ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']
#     real_DL = calculate_total_amino_acids(real_DL, amino_acids)

#     # Calculate racemization rates
#     racemization_rates = calculate_racemization(real_DL, amino_acids)

#     # Plot results
#     plot_racemization_results(real_DL, racemization_rates, amino_acid_colors)

#     # Save results to CSV
#     racemization_rates.to_csv(output_data_path.replace('.csv', '_racemization_rates.csv'), index=False)

#     initial_state = {
#         'L_int': 1000, 'D_int': 0, 'L_term': 500, 'D_term': 0, 'L_free': 100, 'D_free': 0, 'water': 10000
#     }
#     rates = {
#         'k_int': 0.01, 'k_term': 0.02, 'k_free': 0.03, 'k_loss': 0.001,
#         'racemization_rate_int': 0.001, 'racemization_rate_term': 0.002, 'racemization_rate_free': 0.003
#     }
#     N = 10000
#     fold_water = 10
#     time_steps = len(water_input)



#     simulation_results = run_simulation(initial_state, rates, time_steps, N, fold_water)
#     simulation_ratios = calculate_ratios(simulation_results)

#     # Plot simulation results
#     plot_simulation_results(simulation_results, simulation_ratios)

#     return real_DL, racemization_rates


In [None]:
# # vew versiob of racemization_module.ipynb

# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# import sys
# sys.path.append('/content/drive/MyDrive/Colab_Notebooks/MoDuLAAR')

# # Remove the import of setup_amino_acid_colors
# # from config_utils import load_color_dictionary, setup_amino_acid_colors

# def calculate_total_amino_acids(real_DL, amino_acids):
#     """Calculate total amino acids by summing FAA and BAA."""
#     for aa in amino_acids:
#         real_DL[f'{aa}_Conc_Total'] = real_DL[f'{aa}_Conc_FAA_Mean'] + real_DL[f'{aa}_Conc_BAA_Mean']
#         real_DL[f'{aa}_D/L_Total'] = (real_DL[f'{aa}_Conc_FAA_Mean'] * real_DL[f'{aa}_D/L_FAA_Mean'] +
#                                       real_DL[f'{aa}_Conc_BAA_Mean'] * real_DL[f'{aa}_D/L_BAA_Mean']) / real_DL[f'{aa}_Conc_Total']
#     return real_DL

# def calculate_racemization(real_DL, amino_acids):
#     """Calculate racemization rates."""
#     racemization_rates = pd.DataFrame(index=real_DL.index)

#     for aa in amino_acids:
#         racemization_rates[f'{aa}_D/L_Ratio'] = real_DL[f'{aa}_D/L_Total']

#     return racemization_rates

# def plot_racemization_results(real_DL, racemization_rates, amino_acid_colors):
#     """Plot racemization results."""
#     temperatures = real_DL['temp (K)'].unique()  # Changed from 'Temperature' to 'temp (K)'
#     temperatures_c = temperatures - 273.15

#     real_DL_grouped = real_DL.groupby('temp (K)')  # Changed from 'Temperature' to 'temp (K)'

#     for temp_k, temp_c in zip(temperatures, temperatures_c):
#         data = real_DL_grouped.get_group(temp_k)

#         fig, ax = plt.subplots(figsize=(12, 8))

#         for aa in amino_acid_colors.keys():
#             if f'{aa}_D/L_Ratio' in racemization_rates.columns:
#                 ax.plot(data['time'], racemization_rates.loc[data.index, f'{aa}_D/L_Ratio'], label=aa, color=amino_acid_colors[aa])

#         ax.set_xlabel('Time (hours)')
#         ax.set_ylabel('D/L Ratio')
#         ax.set_title(f'Racemization Over Time ({temp_c:.1f} °C)')
#         ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
#         ax.set_xscale('log')

#         plt.tight_layout()
#         plt.show()

# def run_racemization_analysis(input_data_path, output_data_path, amino_acid_colors):
#     # Load input data
#     real_DL = pd.read_csv(input_data_path)

#     # Calculate total amino acids and D/L ratios
#     amino_acids = ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']
#     real_DL = calculate_total_amino_acids(real_DL, amino_acids)

#     # Calculate racemization rates
#     racemization_rates = calculate_racemization(real_DL, amino_acids)

#     # Plot results
#     plot_racemization_results(real_DL, racemization_rates, amino_acid_colors)

#     # Save results to CSV
#     racemization_rates.to_csv(output_data_path.replace('.csv', '_racemization_rates.csv'), index=False)

#     return real_DL, racemization_rates

# # Remove the example usage section