In [1]:
import pandas as pd
import numpy as np

from gscore.osw.peakgroups import fetch_peak_groups
from gscore.osw.queries import (
    FETCH_UNSCORED_PEAK_GROUPS
)
from gscore.models.denoiser import DenoizingClassifier
from gscore.osw.connection import OSWConnection

from gscore.models.preprocess import STANDARD_SCALAR_PIPELINE
from gscore.models.distributions import build_false_target_protein_distributions, ScoreDistribution

from gscore.workflows.model_single_run import format_model_distribution
from gscore.osw.peakgroups import PeakGroupList

In [2]:
import seaborn as sns 

In [4]:
osw_path = '/home/aaron/projects/gscorer/data/openswath/    '

In [7]:
peak_groups = fetch_peak_groups(
    host=osw_path,
    query=FETCH_UNSCORED_PEAK_GROUPS
)

all_peak_groups = peak_groups.select_peak_group(
    return_all=True
)

In [9]:
all_peak_groups.columns

Index(['transition_group_id', 'feature_id', 'exp_rt', 'norm_rt', 'delta_rt',
       'mz', 'charge', 'decoy', 'peptide_sequence',
       'modified_peptide_sequence', 'protein_accession', 'protein_decoy',
       'area_intensity', 'apex_intensity', 'ms2_area_intensity',
       'total_area_intensity', 'var_massdev_score_ms1',
       'var_isotope_correlation_score_ms1', 'var_isotope_overlap_score_ms1',
       'var_xcorr_coelution_contrast_ms1', 'var_xcorr_coelution_combined_ms1',
       'var_xcorr_shape_contrast_ms1', 'var_xcorr_shape_combined_ms1',
       'var_bseries_score', 'var_dotprod_score', 'var_intensity_score',
       'var_isotope_correlation_score', 'var_isotope_overlap_score',
       'var_library_corr', 'var_library_dotprod', 'var_library_manhattan',
       'var_library_rmsd', 'var_library_rootmeansquare', 'var_library_sangle',
       'var_log_sn_score', 'var_manhattan_score', 'var_massdev_score',
       'var_massdev_score_weighted', 'var_norm_rt_score',
       'var_xcorr_coeluti

In [8]:
peak_groups.ml_features

['var_massdev_score_ms1',
 'var_isotope_correlation_score_ms1',
 'var_isotope_overlap_score_ms1',
 'var_xcorr_coelution_contrast_ms1',
 'var_xcorr_coelution_combined_ms1',
 'var_xcorr_shape_contrast_ms1',
 'var_xcorr_shape_combined_ms1',
 'var_bseries_score',
 'var_dotprod_score',
 'var_intensity_score',
 'var_isotope_correlation_score',
 'var_isotope_overlap_score',
 'var_library_corr',
 'var_library_dotprod',
 'var_library_manhattan',
 'var_library_rmsd',
 'var_library_rootmeansquare',
 'var_library_sangle',
 'var_log_sn_score',
 'var_manhattan_score',
 'var_massdev_score',
 'var_massdev_score_weighted',
 'var_norm_rt_score',
 'var_xcorr_coelution',
 'var_xcorr_coelution_weighted',
 'var_xcorr_shape',
 'var_xcorr_shape_weighted',
 'var_yseries_score']

In [11]:
test = sns.pairplot(
    all_peak_groups[
        [
            'target',
            'var_massdev_score_ms1', 
            'var_isotope_correlation_score_ms1', 
            'var_isotope_overlap_score_ms1',
            'var_xcorr_coelution_contrast_ms1',
            'var_xcorr_coelution_combined_ms1',
            'var_xcorr_shape_contrast_ms1',
            'var_xcorr_shape_combined_ms1',
        ]
    ],
    hue="target"
)

<seaborn.axisgrid.PairGrid at 0x7f107f9a99d0>

In [None]:
test.savfig('ms1_scores.pdf')

In [None]:
sns.pairplot(
    all_peak_groups[
        [
            'target',
            'var_bseries_score',
            'var_yseries_score',
            'var_intensity_score',
            'var_isotope_correlation_score',
            'var_isotope_overlap_score',
            'var_dotprod_score',
            'var_log_sn_score',
            'var_manhattan_score',
            'var_manhattan_score',
            'var_massdev_score',
            'var_massdev_score_weighted',
            'var_norm_rt_score'
        ]
    ],
    hue="target"
)

In [None]:
sns.pairplot(
    all_peak_groups[
        [
            'target',
            'var_library_corr',
            'var_library_dotprod',
            'var_library_manhattan',
            'var_library_rmsd',
            'var_library_rootmeansquare',
            'var_library_sangle',
        ]
    ],
    hue="target"
)

In [None]:
sns.pairplot(
    all_peak_groups[
        [
            'target',
            'var_xcorr_coelution',
            'var_xcorr_coelution_weighted',
            'var_xcorr_shape',
            'var_xcorr_shape_weighted',
        ]
    ],
    hue="target"
)

In [None]:
del all_peak_groups