pwiz_tools/BiblioSpec/tests/reference/tables.check

-- BiblioSpec format documentation
-- These commands will create an empty BiblioSpec library when used with SQLite3. You can use them as a starting point for your own files.
-- This file was generated by the "blibbuild -d" command during the ProteoWizard build process. Do not edit, it may be overwritten and your changes will be lost.


CREATE TABLE LibInfo( -- gives top level information about library, including whether it is redundant or non-redundant (nr). Redundant libraries may have more than one spectrum per precursor.
libLSID TEXT, -- LSID of form urn:lsid:<authority>:spectral_library:bibliospec:<type:redundant|nr>:<library name> e.g. urn:lsid:proteome.gs.washington.edu:spectral_library:bibliospec:redundant:byonic.blib
createTime TEXT, -- local creation time in ctime() format e.g. Thu Nov 16 17:02:18 2017
numSpecs INTEGER, -- number of spectra in this library (-1 means not yet counted)
majorVersion INTEGER, -- revision number for this library (count starts at 1)
minorVersion INTEGER -- Schema version number:
-- Version 10 adds TIC as a column
-- Version 9 adds Proteins and RefSpectraProteins tables
-- Version 8 adds startTime and endTime
-- Version 7 adds peak annotations
-- Version 6 generalized ion mobility to value, high energy offset, and type (currently drift time msec, and inverse reduced ion mobility Vsec/cm2)
-- Version 5 added small molecule columns
-- Version 4 added collisional cross section for ion mobility, still supports drift time only
-- Version 3 added product ion mobility offset information for Waters Mse IMS
-- Version 2 added ion mobility information
)

INSERT INTO LibInfo values('urn:lsid:proteome.gs.washington.edu:spectral_library:bibliospec:redundant:example','Fri Feb 21 16:04:56 2020',-1,0,10)

CREATE TABLE RefSpectra ( -- spectrum metadata - actual mz/intensity pairs in RefSpectraPeaks
id INTEGER primary key autoincrement not null, -- lookup key for RefSpectraPeaks
peptideSeq VARCHAR(150), -- unmodified peptide sequence, can be left blank for small molecule use
precursorMZ REAL, -- mz of the precursor that produced this spectrum
precursorCharge INTEGER, -- should agree with adduct if provided
peptideModSeq VARCHAR(200), -- modified peptide sequence, can be left blank for small molecule use
prevAA CHAR(1), -- position of peptide in its parent protein (can be left blank)
nextAA CHAR(1),  -- position of peptide in its parent protein (can be left blank)
copies INTEGER, -- number of copies this spectrum was chosen from if it is in a filtered library
numPeaks INTEGER, -- number of peaks, should agree with corresponding entry in RefSpectraPeaks
ionMobility REAL, -- ion mobility value, if known (see ionMobilityType for units)
collisionalCrossSectionSqA REAL, -- precursor CCS in square Angstroms for ion mobility, if known
ionMobilityHighEnergyOffset REAL, -- ion mobility value increment for fragments (see ionMobilityType for units)
ionMobilityType TINYINT, -- ion mobility units (required if ionMobility is used, see IonMobilityTypes table for key)
retentionTime REAL, -- chromatographic retention time in minutes, if known
startTime REAL, -- start retention time in minutes, if known
endTime REAL, -- end retention time in minutes, if known
totalIonCurrent REAL, -- total ion current of spectrum
moleculeName VARCHAR(128), -- precursor molecule's name (not needed for peptides)
chemicalFormula VARCHAR(128), -- precursor molecule's neutral formula (not needed for peptides)
precursorAdduct VARCHAR(128), -- ionizing adduct e.g. [M+Na], [2M-H2O+2H] etc (not needed for peptides)
inchiKey VARCHAR(128), -- molecular identifier for structure retrieval (not needed for peptides)
otherKeys VARCHAR(128), -- alternative molecular identifiers for structure retrieval, tab separated name:value pairs e.g. cas:58-08-2\thmdb:01847 (not needed for peptides)
fileID INTEGER, -- index into SpectrumSourceFiles table for source file information
SpecIDinFile VARCHAR(256), -- original spectrum label, id, or description in source file
score REAL, -- spectrum score, typically a probability score (see scoreType)
scoreType TINYINT -- spectrum score type, see ScoreTypes table for meaning
)

CREATE TABLE Modifications ( -- modification masses and positions (peptide use only)
id INTEGER primary key autoincrement not null,
RefSpectraID INTEGER, -- the RefSpectra in which this modification occurs
position INTEGER, -- position of the modified AA in the peptide (1-based)
mass REAL -- incremental mass of the modification
)

CREATE TABLE RefSpectraPeaks( -- mz and intensity values
RefSpectraID INTEGER, -- ID of the RefSpectra containing these peaks
peakMZ BLOB, -- mz values encoded as little-endian 64 bit doubles, length is determined by the numPeaks value in the corresponding RefSpectra. Usually zlib-compressed if compressed size is less than original size.
peakIntensity BLOB -- mz values encoded as little-endian 32 bit floats, length is determined by the numPeaks value in the corresponding RefSpectra.  Usually zlib-compressed if compressed size is less than original size.
)

CREATE TABLE Proteins -- protein information for RefSpectra.
(id INTEGER primary key autoincrement not null,
accession VARCHAR(200) -- protein accession number
)

CREATE TABLE RefSpectraProteins -- mapping of proteins between RefSpectra and Proteins tables.
(RefSpectraId INTEGER not null, -- the RefSpectra being mapped to a protein
ProteinId INTEGER not null -- the Protein for the RefSpectra
)

CREATE TABLE RefSpectraPeakAnnotations -- optional annotations for peaks in RefSpectra. There may be more than one annotation per peak, and not every peak in a RefSpectra has to be annotated.
(id INTEGER primary key autoincrement not null,
RefSpectraID INTEGER not null, -- the RefSpectra containing the peak being annotated
peakIndex INTEGER not null, -- index into the mz/intensity list for the RefSpectra
name VARCHAR(256), -- fragment molecule name
formula VARCHAR(256), -- fragment neutral chemical formula
inchiKey VARCHAR(256), -- fragment molecular identifier for structure retrieval
otherKeys VARCHAR(256), -- alternative molecular identifiers for fragment structure retrieval, tab separated e.g. cas:58-08-2\thmdb:01847 
charge INTEGER, -- integer charge value, must agree with fragment adduct
adduct VARCHAR(256), -- fragment adduct description, can include neutral loss e.g. [M+H] or [M-H2O+] 
comment VARCHAR(256), -- freetext comment
mzTheoretical REAL not null, -- calculated mz, should agree with formula and adduct if any
mzObserved REAL not null -- actual measured mz, should agree with the indexed mz found in the RefSpectra
)

CREATE TABLE SpectrumSourceFiles ( -- information about the file or files from which this spectral library was derived
id INTEGER PRIMARY KEY autoincrement not null,
fileName VARCHAR(512), -- source spectrum file; same as idFilename if embedded spectra were used, otherwise the path to the external spectrum file (mzML/mzXML)
idFileName VARCHAR(512), -- identification file, typically some kind of search tool output
cutoffScore REAL -- filter threshold used when converting the source file to a BiblioSpec library. See RefSpectra scoreType field for information about the type of cutoff.
)

CREATE TABLE ScoreTypes ( -- information about the various kinds of cutoff scores understood by BiblioSpec
id INTEGER PRIMARY KEY,  -- as used in scoreType field of RefSpectra
scoreType VARCHAR(128), -- name of the score type, 
probabilityType VARCHAR(128) -- detail about the cutoff logic used by each score type, PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT, PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT, or NOT_A_PROBABILITY_VALUE
)

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(0, 'UNKNOWN', 'NOT_A_PROBABILITY_VALUE')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(1, 'PERCOLATOR QVALUE', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(2, 'PEPTIDE PROPHET SOMETHING', 'PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(3, 'SPECTRUM MILL', 'NOT_A_PROBABILITY_VALUE')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(4, 'IDPICKER FDR', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(5, 'MASCOT IONS SCORE', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(6, 'TANDEM EXPECTATION VALUE', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(7, 'PROTEIN PILOT CONFIDENCE', 'PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(8, 'SCAFFOLD SOMETHING', 'PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(9, 'WATERS MSE PEPTIDE SCORE', 'NOT_A_PROBABILITY_VALUE')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(10, 'OMSSA EXPECTATION SCORE', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(11, 'PROTEIN PROSPECTOR EXPECTATION SCORE', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(12, 'SEQUEST XCORR', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(13, 'MAXQUANT SCORE', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(14, 'MORPHEUS SCORE', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(15, 'MSGF+ SCORE', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(16, 'PEAKS CONFIDENCE SCORE', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(17, 'BYONIC SCORE', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(18, 'PEPTIDE SHAKER CONFIDENCE', 'PROBABILITY_THAT_IDENTIFICATION_IS_CORRECT')

INSERT INTO ScoreTypes(id, scoreType, probabilityType) VALUES(19, 'GENERIC Q-VALUE', 'PROBABILITY_THAT_IDENTIFICATION_IS_INCORRECT')

CREATE TABLE IonMobilityTypes ( -- table of known ion mobility units
id INTEGER PRIMARY KEY, -- as used in ionMobilityType field of RefSpectra
ionMobilityType VARCHAR(128) -- text description of ion mobility units
)

INSERT INTO IonMobilityTypes(id, ionMobilityType) VALUES(0, 'none')

INSERT INTO IonMobilityTypes(id, ionMobilityType) VALUES(1, 'driftTime(msec)')

INSERT INTO IonMobilityTypes(id, ionMobilityType) VALUES(2, 'inverseK0(Vsec/cm^2)')

INSERT INTO IonMobilityTypes(id, ionMobilityType) VALUES(3, 'compensation(V)')