<a href="https://colab.research.google.com/github/Palaeoprot/ModulAAR/blob/main/dehydration_analyser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def calculate_water_generation(real_DL):
    """Calculate water generation based on Ser decline, Ala increase, and free Glx."""
    water_generation = pd.DataFrame(index=real_DL.index)

    # Calculate Serine decline
    initial_ser = real_DL['Ser_Conc_THAA_Mean'].iloc[0]
    ser_decline = initial_ser - real_DL['Ser_Conc_THAA_Mean']

    # Calculate Alanine increase
    initial_ala = real_DL['Ala_Conc_THAA_Mean'].iloc[0]
    ala_increase = real_DL['Ala_Conc_THAA_Mean'] - initial_ala

    # Calculate water from Serine dehydration
    water_generation['Ser_water'] = ser_decline - ala_increase

    # Account for free Glutamic acid (Glx)
    water_generation['Glx_water'] = real_DL['Glx_Conc_FAA_Mean']

    # Total water generation
    water_generation['total_water'] = water_generation['Ser_water'] + water_generation['Glx_water']

    return water_generation

def estimate_dehydration_rate(real_DL):
    """Estimate dehydration rate based on Serine decline."""
    time = real_DL['time']
    ser_conc = real_DL['Ser_Conc_THAA_Mean']

    # Perform linear regression
    slope, intercept, r_value, p_value, std_err = stats.linregress(time, np.log(ser_conc))

    # The slope is the negative of the dehydration rate
    dehydration_rate = -slope

    return dehydration_rate

def generate_water_input(water_generation, num_intervals):
    """Generate water input for racemization module based on dehydration data."""
    total_time = water_generation.index[-1]
    time_points = np.linspace(0, total_time, num_intervals)

    # Interpolate water generation data to match the number of intervals
    interpolated_water = np.interp(time_points, water_generation.index, water_generation['total_water'])

    # Calculate the difference between consecutive points to get water input at each step
    water_input = np.diff(interpolated_water, prepend=0)

    return water_input.tolist()

def plot_dehydration_results(real_DL, water_generation, amino_acid_colors):
    """Plot dehydration results."""
    temperatures = real_DL['temp (K)'].unique()
    temperatures_c = temperatures - 273.15

    real_DL_grouped = real_DL.groupby('temp (K)')

    for temp_k, temp_c in zip(temperatures, temperatures_c):
        data = real_DL_grouped.get_group(temp_k)

        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(24, 8))

        # Plot 1: Amino Acid Composition
        bottom = np.zeros(len(data))
        for aa in ['Ser', 'Ala', 'Glx']:
            if f'{aa}_Conc_THAA_Mean' in data.columns:
                ax1.fill_between(data['time'], bottom, bottom + data[f'{aa}_Conc_THAA_Mean'],
                                 label=aa, color=amino_acid_colors[aa])
                bottom += data[f'{aa}_Conc_THAA_Mean']

        ax1.set_xlabel('Time (hours)')
        ax1.set_ylabel('Concentration')
        ax1.set_title(f'Ser, Ala, Glx Composition Over Time ({temp_c:.1f} °C)')
        ax1.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        ax1.set_xscale('log')

        # Plot 2: Water Generation
        ax2.plot(data['time'], water_generation.loc[data.index, 'total_water'],
                 label='Total Water', color='black', linewidth=2)
        ax2.plot(data['time'], water_generation.loc[data.index, 'Ser_water'],
                 label='Ser Water', color=amino_acid_colors['Ser'])
        ax2.plot(data['time'], water_generation.loc[data.index, 'Glx_water'],
                 label='Glx Water', color=amino_acid_colors['Glx'])

        ax2.set_xlabel('Time (hours)')
        ax2.set_ylabel('Water Generated')
        ax2.set_title(f'Water Generation from Dehydration ({temp_c:.1f} °C)')
        ax2.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        ax2.set_xscale('log')

        # Plot 3: Serine Decline and Alanine Increase
        ax3.scatter(data['time'], data['Ser_Conc_THAA_Mean'], label='Serine', color=amino_acid_colors['Ser'])
        ax3.scatter(data['time'], data['Ala_Conc_THAA_Mean'], label='Alanine', color=amino_acid_colors['Ala'])

        ax3.set_xlabel('Time (hours)')
        ax3.set_ylabel('Concentration')
        ax3.set_title(f'Serine Decline and Alanine Increase ({temp_c:.1f} °C)')
        ax3.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        ax3.set_xscale('log')

        plt.tight_layout()
        plt.show()

def run_dehydration_analysis(real_DL, amino_acid_colors, num_intervals):
    """Run dehydration analysis and generate water input for racemization."""
    water_generation = calculate_water_generation(real_DL)
    dehydration_rate = estimate_dehydration_rate(real_DL)
    plot_dehydration_results(real_DL, water_generation, amino_acid_colors)

    water_input = generate_water_input(water_generation, num_intervals)

    print(f"Estimated dehydration rate: {dehydration_rate:.6f} per hour")

    return water_generation, water_input, dehydration_rate

In [None]:
# import pandas as pd
# import numpy as np

# def calculate_water_generation(real_DL, amino_acids):
#     water_generation = pd.DataFrame(index=real_DL.index)
#     for aa in amino_acids:
#         total_initial = real_DL[f'{aa}_Conc_FAA_Mean'].iloc[0] + real_DL[f'{aa}_Conc_BAA_Mean'].iloc[0]
#         water_generation[f'{aa}_water'] = total_initial - (real_DL[f'{aa}_Conc_FAA_Mean'] + real_DL[f'{aa}_Conc_BAA_Mean'])
#     water_generation['total_water'] = water_generation.sum(axis=1)
#     return water_generation

# def generate_water_input(water_generation, num_intervals):
#     total_time = water_generation.index[-1]
#     time_points = np.linspace(0, total_time, num_intervals)
#     interpolated_water = np.interp(time_points, water_generation.index, water_generation['total_water'])
#     water_input = np.diff(interpolated_water, prepend=0)
#     return water_input.tolist()

# def run_dehydration_analysis(real_DL, num_intervals):
#     amino_acids = ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']
#     water_generation = calculate_water_generation(real_DL, amino_acids)
#     water_input = generate_water_input(water_generation, num_intervals)
#     return water_generation, water_input

In [None]:
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# import sys
# sys.path.append('/content/drive/MyDrive/Colab_Notebooks/MoDuLAAR')
# # Functions load_color_dictionary and setup_amino_acid_colors are defined in the main script

# # def calculate_water_generation(real_DL, amino_acids):
# #     """Calculate water generation from amino acid decomposition."""
# #     water_generation = pd.DataFrame(index=real_DL.index)

# #     for aa in amino_acids:
# #         total_initial = real_DL[f'{aa}_Conc_FAA_Mean'].iloc[0] + real_DL[f'{aa}_Conc_BAA_Mean'].iloc[0]
# #         water_generation[f'{aa}_water'] = total_initial - (real_DL[f'{aa}_Conc_FAA_Mean'] + real_DL[f'{aa}_Conc_BAA_Mean'])

# #     water_generation['total_water'] = water_generation.sum(axis=1)
# #     return water_generation
# def calculate_water_generation(real_DL, amino_acids):
#     """Calculate water generation from amino acid decomposition."""
#     water_generation = pd.DataFrame(index=real_DL.index)

#     for aa in amino_acids:
#         total_initial = real_DL[f'{aa}_Conc_FAA_Mean'].iloc[0] + real_DL[f'{aa}_Conc_BAA_Mean'].iloc[0]
#         water_generation[f'{aa}_water'] = total_initial - (real_DL[f'{aa}_Conc_FAA_Mean'] + real_DL[f'{aa}_Conc_BAA_Mean'])

#     water_generation['total_water'] = water_generation.sum(axis=1)
#     return water_generation

# def generate_water_input(water_generation, num_intervals):
#     """Generate water input for racemization module based on dehydration data."""
#     total_time = water_generation.index[-1]
#     time_points = np.linspace(0, total_time, num_intervals)

#     # Interpolate water generation data to match the number of intervals
#     interpolated_water = np.interp(time_points, water_generation.index, water_generation['total_water'])

#     # Calculate the difference between consecutive points to get water input at each step
#     water_input = np.diff(interpolated_water, prepend=0)

#     return water_input.tolist()

# def plot_dehydration_results(real_DL, water_generation, amino_acid_colors):
#     """Plot dehydration results."""
#     temperatures = real_DL['temp (K)'].unique()
#     temperatures_c = temperatures - 273.15

#     real_DL_grouped = real_DL.groupby('temp (K)')

#     for temp_k, temp_c in zip(temperatures, temperatures_c):
#         data = real_DL_grouped.get_group(temp_k)

#         fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(24, 8))

#         bottom = np.zeros(len(data))
#         for aa in amino_acid_colors.keys():
#             if f'{aa}_Conc_FAA_Mean' in data.columns and f'{aa}_Conc_BAA_Mean' in data.columns:
#                 total_aa = data[f'{aa}_Conc_FAA_Mean'] + data[f'{aa}_Conc_BAA_Mean']
#                 ax1.fill_between(data['time'], bottom, bottom + total_aa, label=aa, color=amino_acid_colors[aa])
#                 bottom += total_aa

#         ax1.set_xlabel('Time (hours)')
#         ax1.set_ylabel('Concentration')
#         ax1.set_title(f'Amino Acid Composition Over Time ({temp_c:.1f} °C)')
#         ax1.legend(loc='center left', bbox_to_anchor=(1, 0.5))
#         ax1.set_xscale('log')

#         ax2.plot(data['time'], water_generation.loc[data.index, 'total_water'], label='Total Water', color='black', linewidth=2)

#         for aa in amino_acid_colors.keys():
#             if f'{aa}_water' in water_generation.columns:
#                 ax2.plot(data['time'], water_generation.loc[data.index, f'{aa}_water'], label=f'{aa} Water', color=amino_acid_colors[aa])

#         ax2.set_xlabel('Time (hours)')
#         ax2.set_ylabel('Water Generated')
#         ax2.set_title(f'Water Generation from Amino Acid Decomposition ({temp_c:.1f} °C)')
#         ax2.legend(loc='center left', bbox_to_anchor=(1, 0.5))
#         ax2.set_xscale('log')

#         if 'Ser_Conc_FAA_Mean' in data.columns and 'Ser_Conc_BAA_Mean' in data.columns:
#             ax3.scatter(data['time'], data['Ser_Conc_FAA_Mean'] + data['Ser_Conc_BAA_Mean'], label='Serine', color=amino_acid_colors['Ser'])
#         if 'Ala_Conc_FAA_Mean' in data.columns and 'Ala_Conc_BAA_Mean' in data.columns:
#             ax3.scatter(data['time'], data['Ala_Conc_FAA_Mean'] + data['Ala_Conc_BAA_Mean'], label='Alanine', color=amino_acid_colors['Ala'])

#         ax3.set_xlabel('Time (hours)')
#         ax3.set_ylabel('Concentration')
#         ax3.set_title(f'Serine Decline and Alanine Increase ({temp_c:.1f} °C)')
#         ax3.legend(loc='center left', bbox_to_anchor=(1, 0.5))
#         ax3.set_xscale('log')

#         plt.tight_layout()
#         plt.show()


# def run_dehydration_analysis(real_DL, amino_acid_colors, num_intervals):
#     """Run dehydration analysis and generate water input for racemization."""
#     amino_acids = ['Asx', 'Glx', 'Ser', 'Ala', 'Val', 'Phe', 'Ile']
#     water_generation = calculate_water_generation(real_DL, amino_acids)
#     plot_dehydration_results(real_DL, water_generation, amino_acid_colors)

#     water_input = generate_water_input(water_generation, num_intervals)

#     return water_generation, water_input
