In [1]:
%reload_ext autoreload
%autoreload 2

# AlphaViz tutorial

This tutorial covers the basics of using AlphaViz as a Python package. It includes the following sections:
1. [**Setup**](#ISetup)
2. [**Data upload**](#Data-upload)
3. [**Analysis**](#Analysis)  
    a) [**Protein level**](#Protein-level)  
    b) [**Peptide level**](#Peptide-level)
4. [**Quality control of the entire sample**](#Quality-control-of-the-entire-sample)

# Setup

### Import all necessary libraries

In [2]:
import os
import logging
import pandas as pd
from io import StringIO

import alphatims.bruker
import alphaviz.third_party.thermo
import alphatims.utils

# visualization
import panel as pn
import bokeh.server.views.ws
from bokeh.models.widgets.tables import NumberFormatter
import holoviews as hv
from bokeh.io import export_svgs

from holoviews import opts
from holoviews.operation.datashader import dynspread, rasterize, shade, datashade

# local
import alphaviz
import alphaviz.utils
import alphaviz.io
import alphaviz.preprocessing
import alphaviz.plotting



### Set paths to raw data, software analysis output folder (DIA-NN), fasta file

In [3]:
# path to the .raw folder or .hdf file
experimental_file = '/Users/zengwenfeng/Workspace/Data/Thermo_iRT/DIA/20200911_EXPL0_IgPa_6min_DIA_2.raw'
# path to the DIA-NN output folder
diann_output_folder = '/Users/zengwenfeng/Workspace/Data/Thermo_iRT/DIA'
# path to the fasta file
fasta_file = '/Users/zengwenfeng/Workspace/Data/Thermo_iRT/DIA/irtfusion.fasta'

# Data upload

### 1) Load the raw file

In [4]:
# Thermo
raw_data = alphaviz.third_party.thermo.RawFile(experimental_file)

100%|██████████| 4381/4381 [00:03<00:00, 1095.54it/s]


finish loading raw


### 3) Load the files required for visualisation from the DIA-NN output folder

In [5]:
df = pd.read_table(os.path.join(diann_output_folder, 'diann-iRT.tsv'))
df = alphaviz.io.create_diann_peptides_table(df)
df

Unnamed: 0,Modified.Sequence,Length,RT,Predicted.RT,Charge,IM,Predicted.IM,CScore,Decoy.CScore,Decoy.Evidence,...,Q.Value,Quantity.Quality,RT.Start,RT.Stop,Sequence,Sequence_AP_mod,Spectrum.Similarity,Translated.Q.Value,iIM,iRT
0,ADVTPADFSEWSK,13,3.65029,3.59249,1,0,0,14.8232,-10000000.0,0.0,...,0.006098,0.842998,3.56589,3.77683,ADVTPADFSEWSK,ADVTPADFSEWSK,0.433771,0,1.68773,51.9226
1,ADVTPADFSEWSK,13,3.63654,3.59249,2,0,0,19.11,4.58392,2.92875,...,0.004357,0.95626,3.55215,3.76308,ADVTPADFSEWSK,ADVTPADFSEWSK,0.449132,0,0.998866,51.9226
2,ADVTPADFSEWSK,13,3.66377,3.59249,3,0,0,15.1635,2.38364,1.39201,...,0.00541,0.924666,3.53722,3.83262,ADVTPADFSEWSK,ADVTPADFSEWSK,0.636543,0,0.77516,51.9226
3,DGLDAASYYAPVR,13,3.18626,3.18956,1,0,0,16.3338,-10000000.0,0.0,...,0.004357,0.911319,3.10194,3.31277,DGLDAASYYAPVR,DGLDAASYYAPVR,0.619046,0,1.71296,40.8416
4,DGLDAASYYAPVR,13,3.21351,3.18956,2,0,0,19.1554,-10000000.0,0.0,...,0.004357,0.894287,3.08704,3.34004,DGLDAASYYAPVR,DGLDAASYYAPVR,0.591196,0,1.01213,40.8416
5,DGLDAASYYAPVR,13,3.19858,3.18956,3,0,0,16.3489,0.492205,0.397316,...,0.004357,0.857342,3.11428,3.32512,DGLDAASYYAPVR,DGLDAASYYAPVR,0.62724,0,0.787908,40.8416
6,GAGSSEPVTGLDAK,14,1.96155,1.95832,1,0,0,15.9179,-10000000.0,0.0,...,0.004472,0.806529,1.87708,2.04593,GAGSSEPVTGLDAK,GAGSSEPVTGLDAK,0.273914,0,1.63777,-1.65993
7,GAGSSEPVTGLDAK,14,1.94433,1.95832,2,0,0,17.513,-10000000.0,0.0,...,0.004357,0.917229,1.85987,2.11314,GAGSSEPVTGLDAK,GAGSSEPVTGLDAK,0.321651,0,0.958155,-1.65993
8,GAGSSEPVTGLDAK,14,1.97387,1.95832,3,0,0,15.8321,-10000000.0,0.0,...,0.004472,0.865148,1.84723,2.1005,GAGSSEPVTGLDAK,GAGSSEPVTGLDAK,0.269124,0,0.749613,-1.65993
9,GAGSSEPVTGLDAKTPVISGGPYEYR,26,2.78937,3.14369,4,0,0,13.5504,-10000000.0,0.0,...,0.00866,0.724788,2.57854,2.91593,GAGSSEPVTGLDAKTPVISGGPYEYR,GAGSSEPVTGLDAKTPVISGGPYEYR,0.343409,0,0.960329,39.5117


# Analysis

To start the analysis, show the "Chromatograms" plot that visualises the total ion chromatograms and the base peak chromatograms for MS1 and MS2 data.

In [6]:
chromatograms_plot = alphaviz.plotting.plot_chrom(raw_data)
chromatograms_plot

## Peptide level

From this point onwards, we are going to assess the individual quality of each peptide.

In [7]:
selected_peptide_index = 0
peptides_table = df
scan_number = [int(scan) for scan in [peptides_table.loc[selected_peptide_index, 'MS/MS scan number']]]
ms2_frame = raw_data.fragment_frames[raw_data.fragment_frames.index.isin(scan_number)].Frame.values[0]
raw_data.fragment_frames[raw_data.fragment_frames.index.isin(scan_number)]

Unnamed: 0,Frame,ScanNumBegin,ScanNumEnd,IsolationWidth,IsolationMz,Precursor
2578,2664,0,0,173.0,1392.0,29


In [8]:
ms1_frame = raw_data.frames[(raw_data.frames.MsMsType == 0) & (raw_data.frames.Id < ms2_frame)].iloc[-1, 0]
# information about the MS1 frames as keys and (MS2 frames and precursor ID) as values
ms1_ms2_frames = {ms1_frame: ms2_frame}
ms1_ms2_frames

{0: 2664}

In [9]:
mass_dict = alphaviz.utils.get_mass_dict(
    modfile=os.path.join(
        alphaviz.utils.DATA_PATH,
        'modifications.tsv'
    ),
    aasfile=os.path.join(
        alphaviz.utils.DATA_PATH,
        'amino_acids.tsv'
    ),
    verbose=False,
)

For the selected peptide visualize its 2D and 3D elution profiles.

In [10]:
peptide = {
    "sequence": peptides_table.loc[selected_peptide_index, 'Sequence_AP_mod'],
    "charge": peptides_table.loc[selected_peptide_index, 'Charge'],
    "im": peptides_table.loc[selected_peptide_index, 'IM'],
    "rt": peptides_table.loc[selected_peptide_index, 'RT'] * 60      
}

print(f"The quality score of the peptide: {peptides_table.loc[selected_peptide_index, 'Quantity.Quality']}.")
peptide['mz'] = alphaviz.utils.calculate_mz(
    prec_mass=alphaviz.utils.get_precmass(
        alphaviz.utils.parse(peptide['sequence']), 
        mass_dict
    ), 
    charge=peptide['charge']
)
alphaviz.plotting.plot_elution_profile(
    raw_data, 
    peptide,
    mass_dict,
    mz_tol=50,
    rt_tol=30,
    im_tol=0.05,
    title=f"Precursor/fragments elution profile of {peptides_table.loc[selected_peptide_index, 'Modified.Sequence']} in RT dimension ({peptide['rt'] / 60: .2f} min)",
    colorscale_qualitative="Alphabet", 
    colorscale_sequential="Viridis",
)#.show(config=alphaviz.utils.config)

The quality score of the peptide: 0.842998.


In [None]:
current_frame = list(ms1_ms2_frames.keys())[0]
ms_spectra_plot = alphaviz.plotting.plot_mass_spectra(
    data_ions,
    title=f'MS2 spectrum for Precursor: {ms1_ms2_frames[current_frame][1]}',
    sequence=peptides_table.loc[selected_peptide_index, 'Sequence']
)
ms_spectra_plot