From 6efd1337e9209f8a101173becfd4409e7fb3aa95 Mon Sep 17 00:00:00 2001 From: alchemistmatt Date: Thu, 12 Nov 2020 18:01:12 -0800 Subject: [PATCH] Allow the unique identifier column to be to the right of the data columns --- Inferno/DataIO/frmDAnTE.MiscFileIO.cs | 76 +++++++++++++++++---------- Inferno/Forms/frmDAnTE.Main.cs | 2 +- 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/Inferno/DataIO/frmDAnTE.MiscFileIO.cs b/Inferno/DataIO/frmDAnTE.MiscFileIO.cs index 7a6a70c..a90ea0a 100644 --- a/Inferno/DataIO/frmDAnTE.MiscFileIO.cs +++ b/Inferno/DataIO/frmDAnTE.MiscFileIO.cs @@ -242,55 +242,75 @@ private void ShowSaveFileWindow(string filter) /// Checks for and removes any columns with duplicate column names /// /// - /// - /// + /// Column with the unique identifier for each row + /// + /// When loadingProteinToPeptideMapInfo is false, these are columns with data (one per dataset) + /// When loadingProteinToPeptideMapInfo is true, this should be the protein name column, plus optionally protein metadata columns + /// + /// False when we first read the file; true when reading it again to load the protein to peptide mapping + /// + /// Multiple rows can have the same unique identifier only if there is an accession column (aka protein) + /// Now two rows should have the same accession and unique identifier + /// /// - private DataTable ArrangeDataTable(DataTable sourceDataTable, string keyColumnName, IEnumerable dataCols) + private DataTable ArrangeDataTable( + DataTable sourceDataTable, + string keyColumnName, + IList dataCols, + bool loadingProteinToPeptideMapInfo) { var dtEset = sourceDataTable.Copy(); var columns = dtEset.Columns; // Clone dataCols so that we can sort it - var sortedColumns = new SortedSet(StringComparer.InvariantCultureIgnoreCase); + var sortedDataColumns = new SortedSet(StringComparer.OrdinalIgnoreCase); foreach (var item in dataCols) { - sortedColumns.Add(item); + sortedDataColumns.Add(item); } - var columnsToRemove = new SortedSet(StringComparer.InvariantCultureIgnoreCase); + var columnsToRemove = new SortedSet(StringComparer.OrdinalIgnoreCase); foreach (DataColumn column in columns) { - if (sortedColumns.Contains(column.ColumnName) || column.ColumnName.Equals(keyColumnName)) + if (!string.IsNullOrWhiteSpace(keyColumnName) && column.ColumnName.Equals(keyColumnName)) + { + continue; + } + + if (sortedDataColumns.Contains(column.ColumnName)) continue; if (!columnsToRemove.Contains(column.ColumnName)) columnsToRemove.Add(column.ColumnName); } + // Remove columns that are not data columns or the key column foreach (var s in columnsToRemove) { dtEset.Columns.Remove(s); } - return dtEset; - } + if (loadingProteinToPeptideMapInfo) + { + var proteinColumnName = dataCols.First(); + if (dtEset.Columns[proteinColumnName].Ordinal > 0) + { + dtEset.Columns[proteinColumnName].SetOrdinal(0); + } + } + else + { + if (!string.IsNullOrWhiteSpace(keyColumnName) && dtEset.Columns[keyColumnName].Ordinal > 0) + { + // Rearrange the data so that the key column is first + dtEset.Columns[keyColumnName].SetOrdinal(0); + } + } - /// - /// Overloaded method of the above to create a table with only two columns. - /// Checks for and removes any columns with duplicate column names - /// - /// - /// - /// - /// - private DataTable ArrangeDataTable(DataTable sourceDataTable, string keyColumnName, string dataColumn) - { - var dataCols = new List { dataColumn }; - return ArrangeDataTable(sourceDataTable, keyColumnName, dataCols); + return dtEset; } - /// /// Rearrange the protein info columns in a data table, /// then send it to R and 'clean' it. @@ -307,19 +327,19 @@ private DataTable ArrangeDataTable(DataTable sourceDataTable, string keyColumnNa try { + var dataColumns = new List { proteinIdentifierColumn }; if (metaDataColumns.Count > 0) { - var allColumns = new List { proteinIdentifierColumn }; - allColumns.AddRange(metaDataColumns); + dataColumns.AddRange(metaDataColumns); // Table will have more than two columns: rowID and proteinIdentifierColumn, then metaDataColumns // ProteinID, ProteinMetadata, and rowID (the key column name in the Eset table) - proteinDataTable = ArrangeDataTable(sourceDataTable, rowID, allColumns); + proteinDataTable = ArrangeDataTable(sourceDataTable, rowID, dataColumns, true); } else { // Table will have two columns: rowID and proteinIdentifierColumn - proteinDataTable = ArrangeDataTable(sourceDataTable, rowID, proteinIdentifierColumn); + proteinDataTable = ArrangeDataTable(sourceDataTable, rowID, dataColumns, true); } @@ -487,7 +507,7 @@ private DataTable OpenFile_test(string filename) var dataCols = columnSelectionForm.DataColumns.ToList(); try { - dtSelectedEset1 = ArrangeDataTable(loadedData, rowID, dataCols); // create the expression set data table + dtSelectedEset1 = ArrangeDataTable(loadedData, rowID, dataCols, false); // create the expression set data table dtSelectedEset1.TableName = "Eset"; } catch (Exception ex) @@ -588,7 +608,7 @@ private bool OpenFile(string filePath) var dataCols = columnSelectionForm.DataColumns.ToList(); try { - var filteredDataTable = ArrangeDataTable(esetTable, rowID, dataCols); + var filteredDataTable = ArrangeDataTable(esetTable, rowID, dataCols, false); // Rename the first column from MassTagID (or whatever the user-supplied name is) to Row_ID filteredDataTable.Columns[0].ColumnName = "Row_ID"; diff --git a/Inferno/Forms/frmDAnTE.Main.cs b/Inferno/Forms/frmDAnTE.Main.cs index ede26e6..7ecd390 100644 --- a/Inferno/Forms/frmDAnTE.Main.cs +++ b/Inferno/Forms/frmDAnTE.Main.cs @@ -15,7 +15,7 @@ public partial class frmDAnTE : Form { #region Other Variables - public const string PROGRAM_DATE = "October 6, 2020"; + public const string PROGRAM_DATE = "November 12, 2020"; public const int SUGGESTED_DATASETS_TO_SELECT = 30; public const int MAX_DATASETS_TO_SELECT = 60;