### File for the keyboard data analysis in the online-study

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import pingouin as pg
import matplotlib.pyplot as plt
from scipy.stats import sem
plt.style.use("seaborn-deep")

# SK Learn imports
from sklearn.pipeline import Pipeline
# from sklearn.model_selection import permutation_test_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import RidgeCV

# Create custom classes for data transformation in the sklearn pipeline
from sklearn.base import TransformerMixin, BaseEstimator

# Imports for SK Learn RepeatedGroupKFold and Permutation Test Customizations
from sklearn.utils.validation import _deprecate_positional_args

from joblib import Parallel, delayed
from sklearn.model_selection._split import check_cv
from sklearn.base import is_classifier, clone
from sklearn.utils import (indexable, check_random_state, _safe_indexing)
from sklearn.metrics import check_scoring
from sklearn.utils.metaestimators import _safe_split

In [None]:
# --- Import the datafile ---
dataset = pd.read_csv("Online_Study_Keyboard_Features.csv", sep="\t", encoding="utf-8", index_col=0)


#### --- get the names of all non keyboard features ---

In [None]:
# to exclude them from keyboard feature analysis
non_keyboard_features = ["Pr_samValence", "Pr_samArousal", "Con_samValence", "Con_samArousal", "condition"]


#### --- Get the descriptive statistics about the dataset ---

In [None]:
# only get the desc stats of the keyboard features
descriptive_hs_data = dataset.loc[dataset["condition"] == 0].drop(non_keyboard_features, axis=1).describe().sort_index(axis=1)
descriptive_ls_data = dataset.loc[dataset["condition"] == 1].drop(non_keyboard_features, axis=1).describe().sort_index(axis=1)


#### --- Data visualizations ---

In [None]:
# Plot the low-stress and high-stress distribution of each variable in one plot
for col in dataset.columns:
    # get the high-stress and low stress data
    hs_data = dataset.loc[dataset["condition"] == 0]
    ls_data = dataset.loc[dataset["condition"] == 1]

    sns.distplot(hs_data[col], hist=True, kde=True, kde_kws={"linewidth": 3}, label="HS")
    sns.distplot(ls_data[col], hist=True, kde=True, kde_kws={"linewidth": 3}, label="LS")
    plt.legend(loc="upper right")
    plt.title(col)
    plt.show()
    break

In [None]:
# Plot each Variable in a distplot
for col in dataset.drop(non_keyboard_features, axis=1).columns:
    sns.distplot(dataset[col], hist=True, kde=True, hist_kws={'edgecolor':'black'},
                 kde_kws={"linewidth": 3})
    plt.title(col)
    plt.show()
    break

In [None]:
# Plot a pairplot with all relevant variables (condition phase data only)

sns.pairplot(dataset.filter(regex="Con_"), corner=True)
plt.show()

In [None]:
# 3 Plot a correlation matrix between the features

# Compute the correlation matrix (for the condition phase data only)
corr = dataset.filter(regex="Con_").corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, center=0, square=True, linewidths=.5,
            cbar_kws={"shrink": .5}, annot=True)

plt.show()

### -- Compare each keyboard feature with a mixed ANOVA with experimental phase as the within-subject factor and condition as the between-subject factor  --

#### --- ANOVA helper functions ---

In [None]:
# prepare the dataset for the ANOVA

# helper to split the prepare the dataset for the anova and change the data from a wide to a long format
def create_anova_df(df, variable):

    anova_df = df.loc[:, ["Con_" + variable, "Pr_" + variable, "condition"]]

    # assign a subject number to each subject
    anova_df["subject"] = np.arange(len(anova_df))
    # replace the condition number with a string
    anova_df["condition"].replace({0: "HS", 1: "LS"}, inplace=True)

    # change the format
    anova_df = pd.melt(anova_df, id_vars=["subject", "condition"], value_vars=["Pr_" + variable, "Con_" + variable],
                 var_name="Pr-Con", value_name=variable)

    # change the name of the column to practice and condition for clearer reading of the results
    anova_df["Pr-Con"].replace({"Pr_" + variable: "Pr", "Con_" + variable: "Con"}, inplace=True)

    return anova_df


# Create PointPlots to visualize the manipulation check results
# Tutorial here: https://raphaelvallat.com/pingouin.html
def plot_anova(data, variable):

    # data visualization
    sns.set()
    sns.pointplot(data=data, x="Pr-Con", y=variable, hue="condition", dodge=True, markers=['o', 's'],
                  capsize=.1, errwidth=1, palette='colorblind')

    plt.title("Pointplot with " + variable)
    # show the plot
    plt.show()
    # save the plot
    # plt.savefig('ANOVA' + variable + '.png')


# calculate the anova results
def calc_anova(data, variable):

    # calculate the anova
    aov = pg.mixed_anova(dv=variable, within="Pr-Con", between="condition", subject="subject", data=data)
    print("Repeated measures ANOVA with " + variable)
    pg.print_table(aov)
    posthocs = pg.pairwise_ttests(dv=variable, within='Pr-Con', between='condition',
                                  subject='subject', data=data, return_desc=True)
    pg.print_table(posthocs)
    print("\n" + "\n")

    # return the anova and post hoc dataframes with an added index layer that is the variable name
    return pd.concat([aov], keys=[variable]), pd.concat([posthocs], keys=[variable])


# helper function to get a dataframe with all anova results per task
def get_anova_results(dataframe, variable_list):

    # initialize dataframes
    anova_df = pd.DataFrame()
    posthoc_df = pd.DataFrame()

    # loop the variable list and add the result dataframes to the initialized dataframes
    for variable in variable_list:

        anova_data = create_anova_df(dataframe, variable)

        # plot the anova
        plot_anova(anova_data, variable)
        # calc the anova
        anova, posthoc = calc_anova(anova_data, variable)

        anova_df = pd.concat([anova_df, anova])
        posthoc_df = pd.concat([posthoc_df, posthoc])

    return anova_df, posthoc_df

#### Calculate the mixed ANOVA for each keyboard feature and save the results in a dataframe

In [None]:
# get a list of all keyboard features, which will be compared in the mixed ANOVA
keyboard_features = [i.replace("Con_", "") for i in dataset.columns if "Con_" in i and i not in non_keyboard_features]

# Calculate the mixed ANOVA for each keyboard feature and save the results in one dataframe
anova_results_df, anova_posthocs_df = get_anova_results(dataset, keyboard_features)

### -- Machine learning analysis --

#### --- Custom Helper classes for data transformation in the sk-learn pipeline ---

In [None]:
# Helper class for custom standardization in the sk learn pipeline (only transforms the data, fit returns nothing)

# select what mouse data should be used (only the condition data, the difference between the condition and baseline,
# etc...)
class CustomStandardization(BaseEstimator, TransformerMixin):

    """
    does custom transformation on the condition data by using the practice data to take individual differences into
    account
    1. Only use the condition trial data
    2. use the difference score between the condition and practice trial
    """

    def __init__(self, method="ignore_practice"):
        self.method = method

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):

        # get the names of all keyboard features without a prefix
        feat_names = [i.replace("Con_", "") for i in X.columns if "Con_" in i]

        if self.method == "ignore_practice":

            # get only the names of the condition columns
            condition_cols = [col for col in list(X.columns) if "Con_" in col]

            return X.loc[:, condition_cols]

        elif self.method == "difference_score":

            # calculate the difference between the condition phase and baseline data
            diff_df = pd.DataFrame()
            for feat in feat_names:
                diff_df["Diff_" + feat] = X["Con_" + feat] - X["Pr_" + feat]

            return diff_df

        else:
            print("Chosen method " + self.method + " does not exist. Defaulted to ignore_practice")

            condition_cols = [col for col in list(X.columns) if "Con_" in col]

            return X.loc[:, condition_cols]

#### --- Modified Permutation Test Score from SK Learn ---

In [None]:
# copied the permutation test score function from SK Learn and adopted it so it returns the permutation test scores
# for each fold in cross validation in order to calculate the standard deviation of the permutation test score

@_deprecate_positional_args
def permutation_test_score(estimator, X, y, *, groups=None, cv=None,
                           n_permutations=100, n_jobs=None, random_state=0,
                           verbose=0, scoring=None):
    """Evaluate the significance of a cross-validated score with permutations
    Read more in the :ref:`User Guide <cross_validation>`.
    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.
    X : array-like of shape at least 2D
        The data to fit.
    y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
        The target variable to try to predict in the case of
        supervised learning.
    groups : array-like of shape (n_samples,), default=None
        Labels to constrain permutation within groups, i.e. ``y`` values
        are permuted among samples with the same group identifier.
        When not specified, ``y`` values are permuted among all samples.
        When a grouped cross-validator is used, the group labels are
        also passed on to the ``split`` method of the cross-validator. The
        cross-validator uses them for grouping the samples  while splitting
        the dataset into train/test set.
    scoring : str or callable, default=None
        A single str (see :ref:`scoring_parameter`) or a callable
        (see :ref:`scoring`) to evaluate the predictions on the test set.
        If None the estimator's score method is used.
    cv : int, cross-validation generator or an iterable, default=None
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:
        - None, to use the default 5-fold cross validation,
        - int, to specify the number of folds in a `(Stratified)KFold`,
        - :term:`CV splitter`,
        - An iterable yielding (train, test) splits as arrays of indices.
        For int/None inputs, if the estimator is a classifier and ``y`` is
        either binary or multiclass, :class:`StratifiedKFold` is used. In all
        other cases, :class:`KFold` is used.
        Refer :ref:`User Guide <cross_validation>` for the various
        cross-validation strategies that can be used here.
        .. versionchanged:: 0.22
            ``cv`` default value if None changed from 3-fold to 5-fold.
    n_permutations : int, default=100
        Number of times to permute ``y``.
    n_jobs : int, default=None
        The number of CPUs to use to do the computation.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.
    random_state : int, RandomState instance or None, default=0
        Pass an int for reproducible output for permutation of
        ``y`` values among samples. See :term:`Glossary <random_state>`.
    verbose : int, default=0
        The verbosity level.
    Returns
    -------
    score : float
        The true score without permuting targets.
    permutation_scores : array of shape (n_permutations,)
        The scores obtained for each permutations.
    pvalue : float
        The p-value, which approximates the probability that the score would
        be obtained by chance. This is calculated as:
        `(C + 1) / (n_permutations + 1)`
        Where C is the number of permutations whose score >= the true score.
        The best possible p-value is 1/(n_permutations + 1), the worst is 1.0.
    Notes
    -----
    This function implements Test 1 in:
        Ojala and Garriga. Permutation Tests for Studying Classifier
        Performance.  The Journal of Machine Learning Research (2010)
        vol. 11
        `[pdf] <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_.
    """
    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    scorer = check_scoring(estimator, scoring=scoring)
    random_state = check_random_state(random_state)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    score = _permutation_test_score(clone(estimator), X, y, groups, cv, scorer)
    permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
        delayed(_permutation_test_score)(
            clone(estimator), X, _shuffle(y, groups, random_state),
            groups, cv, scorer)
        for _ in range(n_permutations))
    permutation_scores = np.array(permutation_scores)
    pvalue = (np.sum(np.mean(permutation_scores, axis=1) >= np.mean(score)) + 1.0) / (n_permutations + 1)
    return score, permutation_scores, pvalue

def _permutation_test_score(estimator, X, y, groups, cv, scorer):
    """Auxiliary function for permutation_test_score"""
    avg_score = []
    for train, test in cv.split(X, y, groups):
        X_train, y_train = _safe_split(estimator, X, y, train)
        X_test, y_test = _safe_split(estimator, X, y, test, train)
        estimator.fit(X_train, y_train)
        avg_score.append(scorer(estimator, X_test, y_test))
    return avg_score

def _shuffle(y, groups, random_state):
    """Return a shuffled copy of y eventually shuffle among same groups."""
    if groups is None:
        indices = random_state.permutation(len(y))
    else:
        indices = np.arange(len(groups))
        for group in np.unique(groups):
            this_mask = (groups == group)
            indices[this_mask] = random_state.permutation(indices[this_mask])
    return _safe_indexing(y, indices)

#### --- Helper Functions to carry out the machine learning anysis and plot the results of the permutation test ---

In [None]:
# plot the results of the permutation procedure
def plot_permutation_test_results(permutation_scores, model_scores, pvalue, p_tresh, method, plot_title):

    # set the default colors to the seaborn color codes
    sns.set_color_codes()

    # initialize a figure with two subplots (one above the other with a 15%, 85% ratio)
    f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (.15, .85)})

    # plot a boxplot of the
    sns.boxplot(model_scores, color="LightYellow", showmeans=True, meanline=True,
                meanprops={"linestyle": "-", "linewidth": "3", "color": "DarkKhaki"},
                medianprops={"linestyle": "None", "linewidth": "0"},
                flierprops={"marker": "o", "markerfacecolor": "LightYellow"},
                ax=ax_box)

    # set the x_label of the Boxplot to None
    ax_box.set_xlabel("")
    # Remove the borders of the "graph" around the boxplot
    ax_box.axis("off")

    # plot a histogram of the permutation scores
    sns.histplot(permutation_scores, color="b", bins=15, ax=ax_hist, label="Permutation Scores")

    # plot a vertical line of the mean cv score
    ax_hist.axvline(np.mean(model_scores), color="DarkKhaki", linewidth=3,
                    label="Model Score: %.2f (%.2f), \np = %.3f" % (
                    np.round(np.mean(model_scores), 3), np.round(np.std(model_scores), 3), pvalue))
    # plot a vertical line of the mean permutation score
    ax_hist.axvline(np.mean(permutation_scores), color="Navy", linewidth=3,
               label="Permutation Mean: " "%.2f" % (np.round(np.mean(permutation_scores), 3)))
    # plot a vertical line of the significance threshold
    ax_hist.axvline(p_tresh,
             label="Sig. Threshold: " "%.2f" % p_tresh, color="darkgreen", linestyle="--", linewidth=3)

    # remove the top and left corner of the
    sns.despine(top=True, right=True, left=False, bottom=False)

    # set title
    ax_box.set(title=plot_title)
    # set the axis labels (depends on the score used for classification or regression)
    if method == "Classification":
        score_label = "Accuracy Score"
    else:
        score_label = "R² Score"
    plt.xlabel(score_label)
    plt.ylabel("Frequency")
    # loc legend to the top left
    plt.legend(loc="upper left")
    plt.savefig(plot_title + ".png")
    plt.show()
    plt.close()


# calculates a k-fold cross val score and compares the mean performance score with a distribution of n models that
# were trained with permutated class labels
# more information can be found here:
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.permutation_test_score.html
def ml_permutation_test(iv, dv, algorithm, method, standard_method, cv_repeats, permutation_repeats, procedure_title):

    # save the results of the permutation test
    results = {}

    # initiate the pipeline and select the desired configurations for handling multicollinearity and
    # the feature selection procedure
    # make a pipeline that does the preprocessing and outputs the cross validation score
    pipeline = Pipeline([
        ("custom_transformation", CustomStandardization(method=standard_method)),
        ("standardization", StandardScaler()),
        ("clf", algorithm)
    ])

    # initialize the repeated k-fold iterator (use stratified k-fold cv to get approximately equal group sizes during
    # each fold)
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=cv_repeats)

    # get the scoring function
    if method == "Classification":
        scorer = "accuracy"
    else:
        scorer = "r2"

    # get the cross validation scores the permuation scores and the p-value of the permutation test
    scores, permutation_scores, pvalue = permutation_test_score(pipeline, iv, dv, scoring=scorer,
                                                                cv=cv, n_permutations=permutation_repeats)

    # get the statistical values of the model scores
    model_score = np.mean(scores)
    model_std = np.std(scores)
    model_std_err = sem(scores)

    # get the statistical values of the permutation (in repeated cv, its n_splits * n_repeats scores for each
    # permutation run --> num of permutation_scores = n_splits * n_repeats scores * n_permutations)
    perm_score = np.mean(permutation_scores)
    perm_std = np.std(np.mean(permutation_scores, axis=1))
    # Get the significance threshold (the classification model must be better than 95% of the permutated models)
    sig_tresh = np.percentile(np.mean(permutation_scores, axis=1), 95.0)

    # save the results
    results["Mean_score"] = np.round(model_score, 4)
    results["SD_score"] = np.round(model_std, 4)
    results["SE_score"] = np.round(model_std_err, 4)
    results["p_value"] = np.round(pvalue, 5)
    results["Mean_Permutation_score"] = np.round(perm_score, 4)
    results["Std_Permutation_score"] = np.round(perm_std, 4)
    results["Sig_Treshold"] = sig_tresh

    plot_permutation_test_results(permutation_scores=np.mean(permutation_scores, axis=1),
                                  model_scores=scores,
                                  pvalue=np.round(pvalue, 3),
                                  p_tresh=np.round(sig_tresh, 3),
                                  method=method,
                                  plot_title=procedure_title)

    # output the results and scores
    return results, scores


# helper function for repeated k-fold cross validation without a permutation test
def rep_kfold_cv(iv, dv, algorithm, method, standard_method, cv_repeats):

    # save the results of the permutation test
    results = {}

    # initiate the pipeline and select the desired configurations for handling multicollinearity and
    # the feature selection procedure
    # make a pipeline that does the preprocessing and outputs the cross validation score
    pipeline = Pipeline([
        ("custom_transformation", CustomStandardization(method=standard_method)),
        ("standardization", StandardScaler()),
        ("clf", algorithm)
    ])

    # initialize the repeated k-fold iterator with non-overlapping groups
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=cv_repeats)

    # get the scoring function
    if method == "Classification":
        scorer = "accuracy"
    else:
        scorer = "r2"

    # get the scores of repeated 5-fold cross validation
    scores = cross_val_score(pipeline, X=iv, y=dv, scoring=scorer, cv=cv)
    print(scores)

    # calculate and save the results
    results["Mean_score"] = np.round(np.mean(scores), 2)
    results["SD_score"] = np.round(np.std(scores), 2)
    results["SE_score"] = np.round(sem(scores), 2)

    return results

#### --- Helper function to run the machine learning analysis with all relevant settings for classification and regression with the raw data and difference score data ---

In [None]:
# Run the permutation test with the "final settings" and save the results in a pandas dataframe
def get_ML_results(data, method, diff_score_method, permutation_test, dv):

    permutation_results = {}

    # dict with different algorithms for classification/regression
    algorithms = {
        "Classification": {
            "kNN_class": KNeighborsClassifier(n_neighbors=3)},
        "Regression": {
            "RidgeReg": RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1])},
    }

    # get the dependent and independent variable of the machine learning analysis
    predictors = data.drop(non_keyboard_features, axis=1)

    if method == "Regression":
        if diff_score_method == "difference_score":
            dependent_variable = data["Con_" + dv] - data["Pr_" + dv]
        else:
            dependent_variable = data["Con_" + dv]
    else:
        dependent_variable = data[dv]

    # perform the permuation test with all algorithms
    for alg in algorithms[method]:
        if permutation_test:
            analysis_title = method + " Permutation test with target " + dv + " using " + alg + " and " + diff_score_method
            print(analysis_title)
            # get the results of the permutation test
            result_dic, model_scores = ml_permutation_test(iv=predictors,
                                                           dv=dependent_variable,
                                                           algorithm=algorithms[method][alg],
                                                           method=method,
                                                           standard_method=diff_score_method,
                                                           cv_repeats=5,
                                                           permutation_repeats=1000,
                                                           procedure_title=analysis_title)
        else:
            analysis_title = method + " repeated cv with target " + dv + " using " + alg + " and " + diff_score_method
            print(analysis_title)
            result_dic = rep_kfold_cv(iv=predictors,
                                      dv=dependent_variable,
                                      algorithm=algorithms[method][alg],
                                      method=method,
                                      standard_method=diff_score_method,
                                      cv_repeats=5)
            print(result_dic)

        # save the results with labeled index in the results dictionary
        permutation_results[(method, dv, alg)] = result_dic
        break

    # create a multiindexed dataframe from the results dictionary
    permutation_results_df = pd.DataFrame(permutation_results).T

    return permutation_results_df

#### --- Run the Machine Learning Analysis---

##### Use the Raw Data

Condition Classification

In [None]:
cond_classification_results = get_ML_results(dataset, method="Classification",
                                             diff_score_method="ignore_practice", dv="condition",
                                             permutation_test=True)

Regression on Valence and Arousal

In [None]:
# Valence
valence_regression_results = get_ML_results(dataset, method="Regression",
                                            diff_score_method="ignore_practice", dv="samValence",
                                            permutation_test=False)

In [None]:
# Arousal
arousal_regression_results = get_ML_results(dataset, method="Regression", diff_score_method="ignore_practice",
                                            dv="samArousal", permutation_test=False)

##### Use the Difference score data

Condition Classification

In [None]:
# Perform the same analysis using the difference score between the condition phase and practice phase to account for
# individual differences in typing behavior
diff_cond_classification_results = get_ML_results(dataset, method="Classification",
                                                  diff_score_method="difference_score", dv="condition",
                                                  permutation_test=True)

Regression on Valence and Arousal

In [None]:
# Valence
diff_valence_regression_results = get_ML_results(dataset, method="Regression",
                                                 diff_score_method="difference_score", dv="samValence",
                                                 permutation_test=False)

In [None]:
# Arousal
diff_arousal_regression_results = get_ML_results(dataset, method="Regression",
                                                 diff_score_method="difference_score",
                                                 dv="samArousal", permutation_test=False)

#### --- merge all machine learning results together into one dataframe ---

In [None]:
# condition classification
ml_classification_results = pd.concat([pd.concat([cond_classification_results], keys=["Raw_data"]),
                        pd.concat([diff_cond_classification_results], keys=["Diff_data"])])
# regression
ml_reg1 = pd.concat([pd.concat([valence_regression_results, arousal_regression_results])], keys=["Raw_data"])
ml_reg2 = pd.concat([pd.concat([diff_valence_regression_results, diff_arousal_regression_results])], keys=["Diff_data"])

ml_regression_results = pd.concat([ml_reg1, ml_reg2])

### -- Save the results of all keyboard feature analysis (mixed ANOVA, ML & des. stats) in one excel file --

In [None]:
# save the results in one excel file
# save the dataframe plus the descriptive stats as an excel file
with pd.ExcelWriter("Online_Study_Keyboard_Feature_Analysis.xlsx") as writer:
    # save the descriptive stats
    descriptive_hs_data.to_excel(writer, float_format="%.3f", sheet_name="Descriptive Stats High-Stress")
    descriptive_ls_data.to_excel(writer, float_format="%.3f", sheet_name="Descriptive Stats Low-Stress")
    # save the anova results
    anova_results_df.to_excel(writer, float_format="%.4f", sheet_name="Mixed Anova Results")
    anova_posthocs_df.to_excel(writer, float_format="%.4f", sheet_name="Anova Posthocs Result")
    # save the machine learning classification results
    ml_classification_results.to_excel(writer, float_format="%.4f", sheet_name="Machine_Learning_Classification")
    # save the machine learning regression results
    ml_regression_results.to_excel(writer, float_format="%.4f", sheet_name="Machine_Learning_Regression")
