In [4]:
%load_ext autotime
import seaborn as sns
import pandas as pd
import numpy as np


from gscore.parsers import sqmass
from gscore.parsers.osw import OSWFile
from gscore.parsers import queries


time: 182 ms (started: 2022-01-29 14:02:24 +01:00)


In [5]:
sqmass_file_path = "/home/aaron/projects/ghost/data/spike_in/chromatograms/AAS_P2009_167.sqMass"
osw_file_path = "/home/aaron/projects/ghost/data/spike_in/openswath/AAS_P2009_167.osw"
with sqmass.SqMassFile(sqmass_file_path) as sqmass_file:
    chromatograms = sqmass_file.parse()
with OSWFile(osw_file_path) as osw_file:
    precursors = osw_file.parse_to_precursors(
        query=queries.SelectPeakGroups.FETCH_CHROMATOGRAM_TRAINING_RECORDS
    )
precursors.set_chromatograms(chromatograms)

Cleaning unused score columns.
time: 1min 49s (started: 2022-01-29 14:02:26 +01:00)


In [6]:
labels = []
scores = []
chroms = []

for precursor in precursors:

    precursor.peakgroups.sort(key=lambda x: x.scores["PROBABILITY"], reverse=True)

    peakgroup = precursor.peakgroups[0]

    if peakgroup.chromatograms:

        pg_scores = []

        for score in peakgroup.scores:

            if score.startswith("VAR") or score == "PROBABILITY":

                pg_scores.append(peakgroup.scores[score])

        scores.append(np.array(pg_scores))

        labels.append(peakgroup.target)

        chroms.append(
            peakgroup.get_chromatogram_intensity_arrays(
                scaled=False, interpolated=True, use_relative_intensities=True
            )
        )


labels = np.array(labels).reshape(-1, 1)
scores = np.array(scores)
chroms = np.array(chroms).reshape((-1, 1, 6, 25))

time: 25.1 s (started: 2022-01-29 14:04:15 +01:00)


In [9]:
from gscore.models.deep_chrom_feature_classifier import DeepChromFeatureScorer

time: 251 µs (started: 2022-01-29 14:05:23 +01:00)


In [10]:
from gscore.scaler import Scaler

time: 207 µs (started: 2022-01-29 14:05:25 +01:00)


In [11]:
scaler = Scaler()

scores = scaler.fit_transform(scores)

time: 66.8 ms (started: 2022-01-29 14:05:26 +01:00)


In [12]:
model = DeepChromFeatureScorer(
    num_features=32,
    threads=10,
    max_epochs=5
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


time: 49.9 ms (started: 2022-01-29 14:05:35 +01:00)


In [13]:
model.fit(
    chroms,
    scores,
    labels
)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type       | Params
---------------------------------------------
0 | conv_layers   | Sequential | 10.9 K
1 | linear_layers | Sequential | 2.1 M 
---------------------------------------------
2.1 M     Trainable params
0         Non-trainable params
2.1 M     Total params
8.313     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

time: 29.2 s (started: 2022-01-29 14:05:37 +01:00)


In [14]:
scaler.save("/home/aaron/projects/ghost/data/spike_in/deepchrom/dc_v_t_1_29.scaler")

time: 1.67 ms (started: 2022-01-29 14:06:12 +01:00)


In [15]:
model.save("/home/aaron/projects/ghost/data/spike_in/deepchrom/dc_v_t_1_29.model")

time: 36.7 ms (started: 2022-01-29 14:06:12 +01:00)


In [16]:
model.evaluate(
    chroms,
    scores,
    labels
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]



0.8329125281417511

time: 1.05 s (started: 2022-01-29 14:06:39 +01:00)
