# Setup

Typical imports...

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit

Define our Michaelis-Menten equations

In [None]:
def michaelis_menten(s, km, vmax):
    '''The Michaelis-Menton function.'''
    s = np.array(s)
    return (vmax*s) / (km + s)

def michaelis_menten_ha(s, kd1, kd2, alpha, beta, vmax):
    '''The Michaelis-Menton function including homotropic allosterism.'''
    s = np.array(s)
    numerator = vmax * (s/kd1) + (beta * vmax * (s**2)/(alpha * kd1 *kd2))
    denominator = 1 + (s/kd1) + (s/kd2) + (s**2)/(alpha * kd1 * kd2)
    
    return numerator/denominator

Setup some constants

In [None]:
MM_VMAX = 10
MM_KM = 0.3
SUBSTRATE_CONCENTRATIONS = [0, 0.1, 0.3, 0.6, 1, 2]
SMOOTH_X = np.linspace(min(SUBSTRATE_CONCENTRATIONS), max(SUBSTRATE_CONCENTRATIONS), 1000)

Create a dataset with a single replicate (ooh, FAKE DATA!)

In [None]:
mm_data = pd.DataFrame({'[s]': SUBSTRATE_CONCENTRATIONS,
                        'v':           michaelis_menten(SUBSTRATE_CONCENTRATIONS, MM_KM, MM_VMAX)}
                      )
mm_data

... add some noise.

In [None]:
def make_noisy(y, first_no_noise=True):
    rng = np.random.default_rng()
    noisy = y + rng.normal(0, 0.3, len(y))
    if first_no_noise:
        noisy[0] = y[0]
    return noisy

mm_data['v_noisy'] = make_noisy(mm_data['v'])
mm_data

### ALWAYS LOOK AT YOUR DATA!

In [None]:
plt.plot(mm_data['[s]'],mm_data['v'], label='ground_truth', marker='x')
plt.scatter(mm_data['[s]'],mm_data['v_noisy'], label='with noise', color='orange')
plt.legend();

In [None]:
mm_fitted, mm_covariance = curve_fit(f=michaelis_menten,
                                     xdata=mm_data['[s]'],
                                     ydata=mm_data['v_noisy'],
                                     bounds=(0, np.inf)  # Force both Km and Vmax to be positive
                                    )
print('The theoretical K_m and V_max are: ', MM_KM, MM_VMAX)
print('With my noisy data, the K_m and V_max are:', mm_fitted)

In [None]:
plt.plot(SMOOTH_X,
         michaelis_menten(SMOOTH_X, MM_KM, MM_VMAX),
         label='Ground Truth')
plt.plot(SMOOTH_X,
         michaelis_menten(SMOOTH_X, mm_fitted[0], mm_fitted[1]),
         label='Fitted')
plt.scatter(mm_data['[s]'], mm_data['v_noisy'], label='with noise', color='orange')

standard_deviations = np.sqrt(np.diag(mm_covariance)) 
plus_1sd_km  = mm_fitted[0] + 1 * standard_deviations[0]
minus_1sd_km = mm_fitted[0] - 1 * standard_deviations[0]
plus_1sd_vmax  = mm_fitted[1] + 1 * standard_deviations[1]
minus_1sd_vmax = mm_fitted[1] - 1 * standard_deviations[1]

bound_upper = michaelis_menten(SMOOTH_X, minus_1sd_km, plus_1sd_vmax)
bound_lower = michaelis_menten(SMOOTH_X, plus_1sd_km, minus_1sd_vmax)
plt.fill_between(SMOOTH_X, bound_upper, bound_lower, color = 'blue', alpha = 0.1, label='±1SD')

plt.legend();

# Bootstrap
Here, we demonstrate The Bootstrap.  The function `one_bootstrap_fit` uses *Pandas* `.sample()` method to generate our bootstrap sample (with replacement.)

In [None]:
def one_bootstrap_fit(df):
    sampled = df.sample(n=len(df), replace=True)
    mm_fitted, mm_covariance = curve_fit(f=michaelis_menten,
                                     xdata=sampled['[s]'],
                                     ydata=sampled['v_noisy'],
                                     bounds=(0, np.inf)  # Force both Km and Vmax to be positive
                                    )
    return mm_fitted
one_bootstrap_fit(mm_data)

In [None]:
bootstrap_fits_100 = []
for _ in range(100):
    bootstrap_fits_100.append(one_bootstrap_fit(mm_data))

We can now plot a histogram of values...

In [None]:
kms, vmaxs = [], []
for bootstrap_fit in bootstrap_fits_100:
    kms.append(bootstrap_fit[0])
    vmaxs.append(bootstrap_fit[1])

fig, axs = plt.subplots(1,2, figsize=(8,4))
axs[0].hist(kms, bins=100)
axs[0].set_title('$K_m$');
axs[1].hist(vmaxs, bins=100)
axs[1].set_title('$V_{max}$');

... And zoom-in to look at any low-occurance samples.

In [None]:
fig, axs = plt.subplots(1,2, figsize=(8,4))
axs[0].hist(kms, bins=100)
axs[0].set_title('$K_m$');
axs[0].set_ylim(0, 10)
axs[1].hist(vmaxs, bins=100)
axs[1].set_title('$V_{max}$');
axs[1].set_ylim(0, 10)

The rather messy code below removes bootstrap samples outside the 5% highest and lowest values

In [None]:
sorted_kms = sorted(kms)
five_percent_sorted_kms = int((len(sorted_kms)/20))  # 5% = 1/20
sorted_kms_min = sorted_kms[five_percent_sorted_kms]
sorted_kms_max = sorted_kms[-five_percent_sorted_kms]

sorted_vmaxs = sorted(vmaxs)
five_percent_sorted_vmaxs = int((len(sorted_vmaxs)/20))  # 5% = 1/20
sorted_vmaxs_min = sorted_vmaxs[five_percent_sorted_vmaxs]
sorted_vmaxs_max = sorted_vmaxs[-five_percent_sorted_vmaxs]

truncated_fits = []
for bootstrap_fit in bootstrap_fits_100:
    if sorted_kms_min < bootstrap_fit[0] < sorted_kms_max:
        if sorted_vmaxs_min < bootstrap_fit[1] < sorted_vmaxs_max:
            truncated_fits.append(bootstrap_fit)
            
truncated_kms, truncated_vmaxs = [], []
for bootstrap_fit in truncated_fits:
    truncated_kms.append(bootstrap_fit[0])
    truncated_vmaxs.append(bootstrap_fit[1])

fig, axs = plt.subplots(1,2, figsize=(8,4))
axs[0].hist(truncated_kms, bins=100)
axs[0].set_title('$K_m$');
axs[1].hist(truncated_vmaxs, bins=100)
axs[1].set_title('$v_{max}$');

And finally, we *look at our data!*

In [None]:
plt.scatter(truncated_kms, truncated_vmaxs, alpha=0.2)
plt.xlabel('$K_m$')
plt.ylabel('$V_{max}$')

... That's interesting.  There seems to be a correlation between the two values.  Let's do lots more bootstrap samples and see what we get.

In [None]:
bootstrap_fits_10000 = []
for _ in range(10000):
    bootstrap_fits_10000.append(one_bootstrap_fit(mm_data))

In [None]:
kms, vmaxs = [], []
for bootstrap_fit in bootstrap_fits_10000:
    kms.append(bootstrap_fit[0])
    vmaxs.append(bootstrap_fit[1])

fig, axs = plt.subplots(1,2, figsize=(8,4))
axs[0].hist(kms, bins=100)
axs[0].set_title('$K_m$');
axs[1].hist(vmaxs, bins=100)
axs[1].set_title('$V_{max}$');

In [None]:
sorted_kms = sorted(kms)
five_percent_sorted_kms = int((len(sorted_kms)/20))  # 5% = 1/20
sorted_kms_min = sorted_kms[five_percent_sorted_kms]
sorted_kms_max = sorted_kms[-five_percent_sorted_kms]

sorted_vmaxs = sorted(vmaxs)
five_percent_sorted_vmaxs = int((len(sorted_vmaxs)/20))  # 5% = 1/20
sorted_vmaxs_min = sorted_vmaxs[five_percent_sorted_vmaxs]
sorted_vmaxs_max = sorted_vmaxs[-five_percent_sorted_vmaxs]

truncated_fits = []
for bootstrap_fit in bootstrap_fits_10000:
    if sorted_kms_min < bootstrap_fit[0] < sorted_kms_max:
        if sorted_vmaxs_min < bootstrap_fit[1] < sorted_vmaxs_max:
            truncated_fits.append(bootstrap_fit)
            
truncated_kms, truncated_vmaxs = [], []
for bootstrap_fit in truncated_fits:
    truncated_kms.append(bootstrap_fit[0])
    truncated_vmaxs.append(bootstrap_fit[1])

fig, axs = plt.subplots(1,2, figsize=(8,4))
axs[0].hist(truncated_kms, bins=100)
axs[0].set_title('$K_m$');
axs[1].hist(truncated_vmaxs, bins=100)
axs[1].set_title('$v_{max}$')

plt.figure()
plt.scatter(truncated_kms, truncated_vmaxs, alpha=0.01)
plt.xlabel('$K_m$')
plt.ylabel('$V_{max}$');

Ok, so now we can see very clearly that there's a correlation across our samples.  What happens if we do replicates of our experiment?

In [None]:
mm_rep1 = michaelis_menten(SUBSTRATE_CONCENTRATIONS, MM_KM, MM_VMAX)
mm_rep2 = michaelis_menten(SUBSTRATE_CONCENTRATIONS, MM_KM, MM_VMAX)
mm_rep3 = michaelis_menten(SUBSTRATE_CONCENTRATIONS, MM_KM, MM_VMAX)

mm_rep1_noisy = make_noisy(mm_rep1)
mm_rep2_noisy = make_noisy(mm_rep2)
mm_rep3_noisy = make_noisy(mm_rep3)

mm_s_rep = SUBSTRATE_CONCENTRATIONS*3  # Multiply a normal list gives three copies of list
mm_v_rep = list(mm_rep1) + list(mm_rep2) + list(mm_rep3)  # Add a normal list add entries
mm_v_rep_noisy = list(mm_rep1_noisy) + list(mm_rep2_noisy) + list(mm_rep3_noisy)  # Add a normal list add entries

In [None]:
mm_data_rep = pd.DataFrame({'[s]': mm_s_rep,
                            'v':   mm_v_rep,
                            'v_noisy': mm_v_rep_noisy}
                          )
mm_data_rep

In [None]:
mm_fitted, mm_covariance = curve_fit(f=michaelis_menten,
                                     xdata=mm_data_rep['[s]'],
                                     ydata=mm_data_rep['v_noisy'],
                                     bounds=(0, np.inf)  # Force both Km and Vmax to be positive
                                    )
print('The theoretical K_m and V_max are: ', MM_KM, MM_VMAX)
print('With my noisy data, the K_m and V_max are:', mm_fitted)

plt.plot(SMOOTH_X,
         michaelis_menten(SMOOTH_X, MM_KM, MM_VMAX),
         label='Ground Truth')
plt.plot(SMOOTH_X,
         michaelis_menten(SMOOTH_X, mm_fitted[0], mm_fitted[1]),
         label='Fitted')
plt.scatter(mm_data_rep['[s]'],mm_data_rep['v_noisy'], label='with noise', color='orange', alpha=0.3)
plt.legend();

In [None]:
bootstrap_fits_rep_100 = []
for _ in range(100):
    bootstrap_fits_rep_100.append(one_bootstrap_fit(mm_data_rep))

In [None]:
kms, vmaxs = [], []
for bootstrap_fit in bootstrap_fits_rep_100:
    kms.append(bootstrap_fit[0])
    vmaxs.append(bootstrap_fit[1])

fig, axs = plt.subplots(1,2, figsize=(8,4))
axs[0].hist(kms, bins=100)
axs[0].set_title('$K_m$');
axs[1].hist(vmaxs, bins=100)
axs[1].set_title('$V_{max}$');

In [None]:
fig, axs = plt.subplots(1,2, figsize=(8,4))
axs[0].hist(kms, bins=100)
axs[0].set_title('$K_m$');
axs[0].set_ylim(0,10)
axs[1].hist(vmaxs, bins=100)
axs[1].set_title('$V_{max}$');
axs[1].set_ylim(0,10)

In [None]:
sorted_kms = sorted(kms)
five_percent_sorted_kms = int((len(sorted_kms)/20))  # 5% = 1/20
sorted_kms_min = sorted_kms[five_percent_sorted_kms]
sorted_kms_max = sorted_kms[-five_percent_sorted_kms]

sorted_vmaxs = sorted(vmaxs)
five_percent_sorted_vmaxs = int((len(sorted_vmaxs)/20))  # 5% = 1/20
sorted_vmaxs_min = sorted_vmaxs[five_percent_sorted_vmaxs]
sorted_vmaxs_max = sorted_vmaxs[-five_percent_sorted_vmaxs]

truncated_fits = []
for bootstrap_fit in bootstrap_fits_rep_100:
    if sorted_kms_min < bootstrap_fit[0] < sorted_kms_max:
        if sorted_vmaxs_min < bootstrap_fit[1] < sorted_vmaxs_max:
            truncated_fits.append(bootstrap_fit)
            
truncated_kms, truncated_vmaxs = [], []
for bootstrap_fit in truncated_fits:
    truncated_kms.append(bootstrap_fit[0])
    truncated_vmaxs.append(bootstrap_fit[1])

fig, axs = plt.subplots(1,2, figsize=(8,4))
axs[0].hist(truncated_kms, bins=100)
axs[0].set_title('$K_m$');
axs[1].hist(truncated_vmaxs, bins=100)
axs[1].set_title('$v_{max}$');

In [None]:
bootstrap_fits_rep_10000 = []
for _ in range(10000):
    bootstrap_fits_rep_10000.append(one_bootstrap_fit(mm_data_rep))

In [None]:
kms, vmaxs = [], []
for bootstrap_fit in bootstrap_fits_rep_10000:
    kms.append(bootstrap_fit[0])
    vmaxs.append(bootstrap_fit[1])

fig, axs = plt.subplots(1,2, figsize=(8,4))
axs[0].hist(kms, bins=100)
axs[0].set_title('$K_m$');
axs[1].hist(vmaxs, bins=100)
axs[1].set_title('$V_{max}$');

In [None]:
fig, axs = plt.subplots(1,2, figsize=(8,4))
axs[0].hist(kms, bins=100)
axs[0].set_title('$K_m$');
axs[0].set_ylim(0,10)
axs[1].hist(vmaxs, bins=100)
axs[1].set_title('$V_{max}$');
axs[1].set_ylim(0,10)

In [None]:
sorted_kms = sorted(kms)
five_percent_sorted_kms = int((len(sorted_kms)/20))  # 5% = 1/20
sorted_kms_min = sorted_kms[five_percent_sorted_kms]
sorted_kms_max = sorted_kms[-five_percent_sorted_kms]

sorted_vmaxs = sorted(vmaxs)
five_percent_sorted_vmaxs = int((len(sorted_vmaxs)/20))  # 5% = 1/20
sorted_vmaxs_min = sorted_vmaxs[five_percent_sorted_vmaxs]
sorted_vmaxs_max = sorted_vmaxs[-five_percent_sorted_vmaxs]

truncated_fits = []
for bootstrap_fit in bootstrap_fits_rep_10000:
    if sorted_kms_min < bootstrap_fit[0] < sorted_kms_max:
        if sorted_vmaxs_min < bootstrap_fit[1] < sorted_vmaxs_max:
            truncated_fits.append(bootstrap_fit)
            
truncated_kms, truncated_vmaxs = [], []
for bootstrap_fit in truncated_fits:
    truncated_kms.append(bootstrap_fit[0])
    truncated_vmaxs.append(bootstrap_fit[1])

fig, axs = plt.subplots(1,2, figsize=(8,4))
axs[0].hist(truncated_kms, bins=100)
axs[0].set_title('$K_m$');
axs[1].hist(truncated_vmaxs, bins=100)
axs[1].set_title('$v_{max}$');

In [None]:
plt.scatter(truncated_kms, truncated_vmaxs, alpha=0.05)
plt.xlabel('$K_m$')
plt.ylabel('$V_{max}$')

Ok, so now we can see the true distribution of our errors.  Note that they're correlated.

# Model selection
The first, simplest way to see if you model is any good, is to simply plot the residuals: subtract your predicted values from your measured values, and look at the results.

## plotting residuals

In [None]:
mm_fitted, mm_covariance = curve_fit(f=michaelis_menten,
                                     xdata=mm_data_rep['[s]'],
                                     ydata=mm_data_rep['v_noisy'],
                                     bounds=(0, np.inf)  # Force both Km and Vmax to be positive
                                    )


fig, axs = plt.subplots(1,2, figsize=(8,4))

axs[0].plot(SMOOTH_X, michaelis_menten(SMOOTH_X, mm_fitted[0], mm_fitted[1]), label='Fitted')
axs[0].scatter(mm_data_rep['[s]'], mm_data_rep['v_noisy'], color='orange', alpha=0.5)
axs[0].set_title('Fit')
y_range = axs[0].get_ylim()[1] - axs[0].get_ylim()[0]

fit_values = michaelis_menten(mm_data_rep['[s]'], mm_fitted[0], mm_fitted[1])
subtracted_values = mm_data_rep['v_noisy'] - fit_values
axs[1].scatter(mm_data_rep['[s]'], subtracted_values)
axs[1].set_title('Data - Fit')
axs[1].set_ylim(-y_range/2, y_range/2)
axs[1].hlines(0, mm_data_rep['[s]'].min(), mm_data_rep['[s]'].max(), linestyles='dashed');

Do you see any 'structure' in the data?  If it looks randomly-distributed, then you *may* have a good model.

# Cross Validation
Start with some convenience functions.

In [None]:
def make_fold_ids(folds, length):
    fold_id = np.arange(0, length)
    fold_id = fold_id % folds
    rng = np.random.default_rng()
    rng.shuffle(fold_id)
    return fold_id

def cross_val_error_comparison(df, f1, f2, folds=10):
    cross_validation_error1, cross_validation_error2 = 0, 0
    df = df.copy(deep=True)
    df['__fold id__'] = make_fold_ids(folds, len(df))
    for fold_id in sorted(df['__fold id__'].unique()):
        df_validate = df[df['__fold id__'] == fold_id]
        df_train = df[df['__fold id__'] != fold_id]

        fitted1, covariance1 = curve_fit(f=f1,
                                         xdata=df_train['[s]'],
                                         ydata=df_train['v_noisy'],
                                         max_nfev=10000,
                                         bounds=(0, np.inf)  # Force both Km and Vmax to be positive
                                        )
        fitted_predictions1 = f1(df_validate['[s]'], *fitted1)
        cross_validation_error1 += sum((df_validate['v_noisy'] - fitted_predictions1)**2)
        
        fitted2, covariance2 = curve_fit(f=f2,
                                         xdata=df_train['[s]'],
                                         ydata=df_train['v_noisy'],
                                         max_nfev=10000,
                                         bounds=(0, np.inf)  # Force both Km and Vmax to be positive
                                        )
        fitted_predictions2 = f2(df_validate['[s]'], *fitted2)
        cross_validation_error2 += sum((df_validate['v_noisy'] - fitted_predictions2)**2)               
        
    return cross_validation_error1, cross_validation_error2

For completeness, we fake some data from the augmented MM equation.

In [None]:
MM_KD1 = 0.2
MM_KD2 = 0.4
MM_ALPHA = 0.9
MM_BETA = 0.1

mm_ha_rep1 = michaelis_menten_ha(SUBSTRATE_CONCENTRATIONS, MM_KD1, MM_KD1, MM_ALPHA, MM_BETA, MM_VMAX)
mm_ha_rep2 = michaelis_menten_ha(SUBSTRATE_CONCENTRATIONS, MM_KD1, MM_KD1, MM_ALPHA, MM_BETA, MM_VMAX)
mm_ha_rep3 = michaelis_menten_ha(SUBSTRATE_CONCENTRATIONS, MM_KD1, MM_KD1, MM_ALPHA, MM_BETA, MM_VMAX)

mm_ha_rep1_noisy = make_noisy(mm_ha_rep1)
mm_ha_rep2_noisy = make_noisy(mm_ha_rep2)
mm_ha_rep3_noisy = make_noisy(mm_ha_rep3)

mm_ha_s_rep = SUBSTRATE_CONCENTRATIONS*3  # Multiply a normal list gives three copies of list
mm_ha_v_rep = list(mm_ha_rep1) + list(mm_ha_rep2) + list(mm_ha_rep3)  # Add a normal list add entries
mm_ha_v_rep_noisy = list(mm_ha_rep1_noisy) + list(mm_ha_rep2_noisy) + list(mm_ha_rep3_noisy)  # Add a normal list add entries

mm_ha_data_rep = pd.DataFrame({'[s]': mm_ha_s_rep,
                            'v':   mm_ha_v_rep,
                            'v_noisy': mm_ha_v_rep_noisy}
                          )
mm_ha_data_rep

... And now we see how the fitting does.

In [None]:
mm_fitted, mm_covariance = curve_fit(f=michaelis_menten,
                                     xdata=mm_ha_data_rep['[s]'],
                                     ydata=mm_ha_data_rep['v_noisy'],
                                     bounds=(0, np.inf)  # Force both Km and Vmax to be positive
                                    )
mm_ha_fitted, mm_ha_covariance = curve_fit(f=michaelis_menten_ha,
                                     xdata=mm_ha_data_rep['[s]'],
                                     ydata=mm_ha_data_rep['v_noisy'],
                                     # p0=[1,1,1,1,1],
                                     bounds=(0, np.inf)  # Force both Km and Vmax to be positive
                                    )
print('With a standard Michaelis-Menten, the fitted K_m and V_max are:', mm_fitted)
print('With a Homotropic Allosterism Michaelis-Menten, the fitted K_d1, K_d2, alpha and V_max are:', mm_ha_fitted)


*ALWAYS LOOK AT YOUR DATA*

In [None]:
# Plot fits and residuals
fig, axs = plt.subplots(1,3, figsize=(12,4))

axs[0].plot(SMOOTH_X, michaelis_menten(SMOOTH_X, mm_fitted[0], mm_fitted[1]), label='MM')
axs[0].plot(SMOOTH_X, michaelis_menten_ha(SMOOTH_X, mm_ha_fitted[0], mm_ha_fitted[1], mm_ha_fitted[2], mm_ha_fitted[3], mm_ha_fitted[4]), label='MM HA')

axs[0].scatter(mm_ha_data_rep['[s]'], mm_ha_data_rep['v_noisy'], color='orange', alpha=0.5)
axs[0].set_title('Fit')
y_range = axs[0].get_ylim()[1] - axs[0].get_ylim()[0]

fit_values_mm = michaelis_menten(mm_ha_data_rep['[s]'], mm_fitted[0], mm_fitted[1])
subtracted_values_mm = mm_ha_data_rep['v_noisy'] - fit_values_mm
axs[1].scatter(mm_ha_data_rep['[s]'], subtracted_values_mm)
axs[1].set_title('Data - Fit (MM)')
axs[1].set_ylim(-y_range/2, y_range/2)
axs[1].hlines(0, mm_ha_data_rep['[s]'].min(), mm_ha_data_rep['[s]'].max(), linestyles='dashed')

fit_values_mm_ha = michaelis_menten_ha(mm_ha_data_rep['[s]'], mm_ha_fitted[0], mm_ha_fitted[1], mm_ha_fitted[2], mm_ha_fitted[3], mm_ha_fitted[4])
subtracted_values_mm_ha = mm_ha_data_rep['v_noisy'] - fit_values_mm_ha
axs[2].scatter(mm_ha_data_rep['[s]'], subtracted_values_mm_ha)
axs[2].set_title('Data - Fit (MM-HA)')
axs[2].set_ylim(-y_range/2, y_range/2)
axs[2].hlines(0, mm_ha_data_rep['[s]'].min(), mm_ha_data_rep['[s]'].max(), linestyles='dashed');

Note that fitting MM-HA data with a standard MM equation gives a bit of strcture to your residuals.  MM-HA data fitted with the MM-HA equation looks more random.  

Finally, we can compare the cross validation errors for the two models:

In [None]:
xval_mm, xval_mm_ha = cross_val_error_comparison(mm_data_rep, michaelis_menten, michaelis_menten_ha, 5)
print('The cross validation error for MM data with the sandard and HA equations are:',xval_mm, 'and', xval_mm_ha)

In [None]:
xval_ratio = xval_mm / xval_mm_ha

if xval_ratio < 0.95:
    print('Michaelis-Menton is the prefered model.')
elif 1/xval_ratio < 0.95:
    print('Michaelis-Menton with Homotropic Alostery is the prefered model.')
else:
    print("The cross validation scores are too close, we can't descriminiate between the models")

## Exercise:
Do the above cross validation analysis for the MM-HA data
