In [None]:
from alphadia.calibration.property import calibration_model_provider, Calibration
from alphadia.workflow.managers.calibration_manager import CalibrationManager
%reload_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Calibration Models
By default, alphaDIA comes with different calibration models all provided by the `calibration.calibration_model_provider`.
We can print the model provider to see what models are available.

In [None]:
calibration_model_provider

# Calibrating a Single Property

In [None]:
mz_library = np.linspace(100, 1000, 1000)
mz_observed = mz_library + np.random.normal(0, 0.1, 1000) + mz_library * 0.001
mz_df = pd.DataFrame({"mz_library": mz_library, "mz_observed": mz_observed})

# We will calculate the difference between the observed and library mz values only for plotting purposes
mz_residual = mz_df["mz_observed"] - mz_df["mz_library"]
plt.scatter(mz_df["mz_library"], mz_residual, s=1)
plt.xlabel("mz_library")
plt.ylabel("mz_observed - mz_library")

In [None]:
# we will use a scikit-learn model to calibrate the mz values
from sklearn.linear_model import LinearRegression

# We will use a linear regression model to calibrate the mz values
mz_calibration = Calibration(
    name="mz_calibration",
    model=LinearRegression(),
    input_columns=["mz_library"],
    target_columns=["mz_observed"],
    output_columns=["mz_calibrated"],
    transform_deviation=1e6,
)

# The model is then fit to the data
mz_calibration.fit(mz_df, plot=True)

In [None]:
# The model can then be used to predict the calibrated mz values
# This happens in place by default
mz_calibration.predict(mz_df)
print(mz_df.head())

In [None]:
# We can also get the 99% percentile error of the model
# It will return an numpy array with an element for every input column
print(mz_calibration.ci(mz_df, 0.95))

In [None]:
import tempfile
import os

path = os.path.join(tempfile.tempdir, "mz_calibration.pkl")
mz_calibration.save(path)

mz_calibration_loaded = Calibration.from_file(path)
mz_calibration_loaded.predict(mz_df)
print(mz_df.head())

# Calibrating a Single Property using Multiple Inputs

In [None]:
rt_library = np.linspace(0, 100, 1000)
mz_library = np.random.uniform(100, 1000, 1000)
mz_observed = (
    mz_library
    + np.random.normal(0, 0.01, 1000)
    + mz_library * 0.0001
    - rt_library * 0.0001
)
mz_df = pd.DataFrame(
    {"rt_library": rt_library, "mz_library": mz_library, "mz_observed": mz_observed}
)

mz_residual = mz_df["mz_observed"] - mz_df["mz_library"]
plt.scatter(mz_df["rt_library"], mz_residual, s=1)
plt.xlabel("rt_library")
plt.ylabel("mz_observed - mz_library")
plt.show()

plt.scatter(mz_df["mz_library"], mz_residual, s=1)
plt.xlabel("mz_library")
plt.ylabel("mz_observed - mz_library")
plt.show()

In [None]:
# We will use a linear regression model to calibrate the mz values
mz_calibration = Calibration(
    name="mz_calibration",
    model=LinearRegression(),
    input_columns=["mz_library", "rt_library"],
    target_columns=["mz_observed"],
    output_columns=["mz_calibrated"],
    transform_deviation=1e6,
)

# The model is then fit to the data
mz_calibration.fit(mz_df, plot=True)

# Using the Calibration Manager to handle more complexe cases

In [None]:
# initialize the calibration manager
calibration_manager = CalibrationManager()

# load the config from a dictionary. The dictionary could be loaded from a yaml file
calibration_manager.setup_estimator_groups(
    [
        {
            "name": "mz_calibration",
            "estimators": [
                {
                    "name": "mz",
                    "model": "LinearRegression",
                    "input_columns": ["mz_library"],
                    "target_columns": ["mz_observed"],
                    "output_columns": ["mz_calibrated"],
                    "transform_deviation": 1e6,
                },
                {
                    "name": "rt",
                    "model": "PolynomialRegression",
                    "model_args": {"degree": 3},
                    "input_columns": ["rt_library"],
                    "target_columns": ["rt_observed"],
                    "output_columns": ["rt_calibrated"],
                    "transform_deviation": None,
                },
            ],
        }
    ]
)

In [None]:
# The calibration manager contains a single estimator group with two calibration estimators
calibration_manager.estimator_groups

In [None]:
# lets create some test data with mz and rt values
mz_library = np.linspace(100, 1000, 1000)
mz_observed = (
    mz_library + np.random.normal(0, 0.001, 1000) + mz_library * 0.00001 + 0.005
)

rt_library = np.linspace(0, 100, 1000)
rt_observed = rt_library + np.random.normal(0, 0.5, 1000) + np.sin(rt_library * 0.05)

df = pd.DataFrame(
    {
        "mz_library": mz_library,
        "mz_observed": mz_observed,
        "rt_library": rt_library,
        "rt_observed": rt_observed,
    }
)

In [None]:
# First, we will manually retrieve the mz estimators and fit them to the data
groups = calibration_manager.CALIBRATION_GROUPS_CONFIG.keys()
print(f"groups: {groups}")
estimators = [estimator.name for estimator in groups[0]]
print(f"estimators: {estimators}")

# collect the estimator instance and fit it to the data
estimator = calibration_manager.get_estimator(groups[0], estimators[0])
estimator.fit(df, plot=True)
# predict the calibrated values
estimator.predict(df)

In [None]:
# Now we will perform the calibration on the group level
# All estimators in the group will be fit to the data
calibration_manager.fit(df, groups[0], plot=True)

# we can then predict the calibrated values for all estimators in the group
calibration_manager.predict(df, groups[0])

In [None]:
temp_path = path = os.path.join(tempfile.tempdir, "calibration.pkl")
calibration_manager._path = temp_path
calibration_manager.save()

calibration_manager_loaded = CalibrationManager(temp_path)
calibration_manager_loaded.load()

# We can also get the 99% percentile error of the model for all estimators in the group
for estimator in calibration_manager_loaded.estimator_groups[0].values():
    ci = estimator.ci(df, 0.95)
    print(f" 95% CI for {estimator}: {ci}")