Skip to content
Permalink
Browse files

Adding support for importing CNV data from PURPLE into the GBS.

  • Loading branch information...
VelNZ committed Jan 25, 2018
1 parent cd8ec5c commit 3b6ddd040cd24702395bbb97a80c8f6c50b2f184
@@ -279,6 +279,70 @@
gbs_administration_page_redirect();
#############################################
# PURPLE INPUT FILE
#############################################
} elseif ($_POST["method"] == "PURPLE") {
// If no sample name was submitted
if (!isset($_POST["import_sample_purple"])) {
gbs_administration_page_redirect("missing_posts");
}
// If no sample name was entered
if (strlen($_POST["import_sample_purple"]) == 0) {
gbs_administration_page_redirect("missing_sample_name");
}
// Check whether there is already data for the sample and method in the GBS and fail if so
$already_in_gbs = is_sample_and_software_in_gbs($_POST["import_sample_purple"], $_POST["method"]);
if ($already_in_gbs === true) {
gbs_administration_page_redirect("gbs_data_already_exists");
} elseif ($already_in_gbs === false) {
gbs_administration_page_redirect("cant_tell_if_gbs_data_already_exists");
}
// Check file extension - only allow "*purple.cnv" format from PURPLE
if (!preg_match("/purple.cnv$/", $_FILES["genomeblocks"]["name"])) {
gbs_administration_page_redirect("invalid_purple_output_file_format");
}
// Open the genome blocks file for parsing
$genome_blocks_file = fopen($_FILES['genomeblocks']['tmp_name'], "r");
// If the genome blocks file couldn't be opened
if ($genome_blocks_file === false) {
gbs_administration_page_redirect("cant_open_genome_blocks_file");
}
// Parse the data file and save the blocks
$genome_block_store = gbs_import_purple($genome_blocks_file, $_POST["import_sample_purple"]);
// If there was a failure parsing
if ($genome_block_store === false) {
gbs_administration_page_redirect();
}
// If no blocks were found
if (count($genome_block_store) == 0) {
gbs_administration_page_redirect("no_valid_data");
}
// Save the genome blocks
log_gbs_import_info("genome_blocks", $genome_block_store);
// Save the the unique annotation tags
log_gbs_import_info("unique_annotation_tags", $GLOBALS['default_purple_columns']);
// Save the sample name
log_gbs_import_info("samples", array($_POST["import_sample_purple"]));
// Save the method
log_gbs_import_info("method", $_POST["method"]);
gbs_administration_page_redirect();
#############################################
# VarpipeSV INPUT FILE
#############################################
@@ -262,7 +262,7 @@
}
// Make sure a sample name has been supplied for the methods that need it
if (!isset($_GET["sample_name"]) && in_array($_GET["method"], array("CNVnator", "ROHmer", "Sequenza", "CNVkit"))) {
if (!isset($_GET["sample_name"]) && in_array($_GET["method"], array("CNVnator", "ROHmer", "Sequenza", "PURPLE", "CNVkit"))) {
echo "Fail: missing sample name for GBS method that requires one";
exit;
@@ -306,6 +306,12 @@
if (!preg_match("/segments.txt$/", $_GET["url"]) && !preg_match("/segments.txt.gz$/", $_GET["url"])) {
echo "Fail: URL does not end with segments.txt/segments.txt.gz for Sequenza file import";
exit;
}
} elseif ($_GET["method"] == "PURPLE") {
if (!preg_match("/purple.cnv$/", $_GET["url"]) && !preg_match("/purple.cnv.gz$/", $_GET["url"])) {
echo "Fail: URL does not end with purple.cnv/purple.cnv.gz for PURPLE file import";
exit;
}
} elseif ($_GET["method"] == "VarpipeSV") {
@@ -516,6 +522,9 @@
} elseif ($_GET["method"] == "Sequenza") {
// Parse the data file and save the blocks
$genome_block_store = gbs_import_sequenza($genome_blocks_file, $samples[0]);
} elseif ($_GET["method"] == "PURPLE") {
// Parse the data file and save the blocks
$genome_block_store = gbs_import_purple($genome_blocks_file, $samples[0]);
} elseif ($_GET["method"] == "CNVkit") {
// Parse the data file and save the blocks
$genome_block_store = gbs_import_cnvkit($genome_blocks_file, $samples[0]);
@@ -148,8 +148,8 @@ function import_block_into_gbs($chromosome, $start, $end, $method, $block_value)
// If the block is from ROHmer which only calls RoH events
} elseif ($block_value == "RoH") {
array_push($parameter_values, "roh");
// If the block is from CNVnator or Sequenza, work out the event type from the block copy number
} elseif (in_array($method, array("CNVnator", "Sequenza", "CNVkit"))) {
// If the block is from a method that includes a copy number estimate, work out the event type from the block copy number
} elseif (in_array($method, array("CNVnator", "Sequenza", "CNVkit", "PURPLE"))) {
// If the event is a deletion
if ($block_value < 2) {
array_push($parameter_values, "deletion");
@@ -169,7 +169,7 @@ function import_block_into_gbs($chromosome, $start, $end, $method, $block_value)
if (in_array($method, array("LUMPY", "VarpipeSV", "Manta", "ROHmer"))) {
array_push($parameter_values, NULL);
// If the method is produces a copy number estimate
} elseif (in_array($method, array("CNVnator", "Sequenza", "CNVkit"))) {
} elseif (in_array($method, array("CNVnator", "Sequenza", "CNVkit", "PURPLE"))) {
// Store the numeric copy number estimate
array_push($parameter_values, $block_value);
} else {
@@ -279,6 +279,83 @@ function gbs_import_sequenza($open_data_file_handle, $sample) {
return $genome_block_store;
}
#############################################
# GBS IMPORT FROM PURPLE FILE
#############################################
function gbs_import_purple($open_data_file_handle, $sample) {
// Create an array to store block information
$genome_block_store = array();
// Go through the file line by line
while (($line = fgets($open_data_file_handle)) !== false) {
// Split the row into an array by column
$columns = explode("\t", $line);
// If it's an empty line, ignore it
if (count($columns) == 0) {
continue;
}
// Ignore the header line
if ($columns[0] == "#chromosome") {
continue;
}
// If the line does not have the expected number of columns, quit out and display it to the user
if (count($columns) != 10) {
// Save the line to display to the user
log_gbs_import_info("error", "A row in your PURPLE output file did not contain the right amount of columns, it should be 10. Line: ".$line);
return false;
}
// Remove space/newline characters from the 10th column (the last one which has \n at the end)
$columns[9] = preg_replace("/[\n\r\s]*/", "", $columns[9]);
// Make sure the coordinate and copy number columns are numeric
if (!is_numeric($columns[1]) || !is_numeric($columns[2]) || !is_numeric($columns[3])) {
// Save the line to display to the user
log_gbs_import_info("error", "Found a line which contains one or more non-numeric values for coordinates or copy number. Line: ".$line);
return false;
}
// Ignore all events that are too close to a copy number of 2
if ($columns[3] < 2.5 && $columns[3] > 1.5) {
continue;
}
// Save the current genome block ID, this is zero based so using count(<current block ids>) gets the next new one as count is 1 based
$current_genome_block_id = count($genome_block_store);
// Save the block coordinates
$genome_block_store[$current_genome_block_id]["chromosome"] = $columns[0];
$genome_block_store[$current_genome_block_id]["start"] = $columns[1];
$genome_block_store[$current_genome_block_id]["end"] = $columns[2];
// Format: $genome_block_store[block id][chromosome/start/end] = <value>
// Save the sample for the block
$genome_block_store[$current_genome_block_id]["samples"][] = $sample;
// Format: $genome_block_store[block id]["samples"] = <array of samples>
// Save the copy number for the block (round to 2dp)
$genome_block_store[$current_genome_block_id]["event"] = round($columns[3], 2);
// Format: $genome_block_store[block id]["event"] = <value>
// Save the annotations for the block
$genome_block_store[$current_genome_block_id]["annotations"]["bafCount"][$sample] = $columns[4];
$genome_block_store[$current_genome_block_id]["annotations"]["observedBAF"][$sample] = $columns[5];
$genome_block_store[$current_genome_block_id]["annotations"]["actualBAF"][$sample] = $columns[6];
$genome_block_store[$current_genome_block_id]["annotations"]["segmentStartSupport"][$sample] = $columns[7];
$genome_block_store[$current_genome_block_id]["annotations"]["segmentEndSupport"][$sample] = $columns[8];
$genome_block_store[$current_genome_block_id]["annotations"]["method"][$sample] = $columns[9];
// Format: $genome_block_store[block id]["annotations"][tag][sample] = <value>
}
return $genome_block_store;
}
#############################################
# GBS IMPORT FROM VarpipeSV FILE
#############################################
@@ -435,7 +435,7 @@ function genome_blocks_table() {
echo "<th>Sample(s)</th>";
if (in_array($_SESSION["gbs_import_method"], array("CNVnator", "CNVkit", "Sequenza"))) {
if (in_array($_SESSION["gbs_import_method"], array("CNVnator", "CNVkit", "Sequenza", "PURPLE"))) {
echo "<th>Copy Number</th>";
} elseif (in_array($_SESSION["gbs_import_method"], array("VarpipeSV", "Manta", "LUMPY"))) {
echo "<th>Event Type</th>";
@@ -61,6 +61,9 @@
# Sequenza-specific errors
if_set_display_error("gbs_import_invalid_sequenza_output_file_format", "You must upload the output file from Sequenza ending with \"segments.txt\".");
# PURPLE-specific errors
if_set_display_error("gbs_import_invalid_purple_output_file_format", "You must upload the output file from PURPLE ending with \"purple.cnv\".");
# ROHmer-specific errors
if_set_display_error("gbs_import_invalid_rohmer_output_file_format", "You must upload the BED file output from ROHmer.");
@@ -153,6 +156,8 @@
echo "<label for=\"label_lumpy\">LUMPY</label>";
echo "<input type=\"radio\" id=\"label_sequenza\" name=\"method\" value=\"Sequenza\" onclick=\"javascript:showdiv('sequenza');\">";
echo "<label for=\"label_sequenza\">Sequenza</label>";
echo "<input type=\"radio\" id=\"label_purple\" name=\"method\" value=\"PURPLE\" onclick=\"javascript:showdiv('purple');\">";
echo "<label for=\"label_purple\">PURPLE</label>";
echo "<input type=\"radio\" id=\"label_rohmer\" name=\"method\" value=\"ROHmer\" onclick=\"javascript:showdiv('rohmer');\">";
echo "<label for=\"label_rohmer\">ROHmer</label>";
echo "<input type=\"radio\" id=\"label_varpipesv\" name=\"method\" value=\"VarpipeSV\" onclick=\"javascript:showdiv('varpipesv');\">";
@@ -175,6 +180,11 @@
echo "<input type=\"text\" name=\"import_sample_sequenza\">";
echo "<p style=\"font-size:75%;\">Enter the sample name Sequenza was run on above.</p>";
echo "</div>";
echo "<div class=\"selection\" id=\"purple\" style=\"display: none;\">";
echo "<input type=\"text\" name=\"import_sample_purple\">";
echo "<p style=\"font-size:75%;\">Enter the sample name PURPLE was run on above.</p>";
echo "</div>";
echo "<div class=\"selection\" id=\"rohmer\" style=\"display: none;\">";
echo "<input type=\"text\" name=\"import_sample_rohmer\">";
@@ -226,6 +236,10 @@
echo "<div class=\"selection\" id=\"sequenza\" style=\"display: none;\">";
echo "<p style=\"font-size:75%;\">Select the segments.txt file.</p>";
echo "</div>";
echo "<div class=\"selection\" id=\"purple\" style=\"display: none;\">";
echo "<p style=\"font-size:75%;\">Select the purple.cnv file.</p>";
echo "</div>";
echo "<div class=\"selection\" id=\"manta\" style=\"display: none;\">";
echo "<p style=\"font-size:75%;\">Select the somaticSV.vcf file.</p>";
@@ -118,6 +118,13 @@
$GLOBALS['default_sequenza_columns'] = array();
array_push($GLOBALS['default_sequenza_columns'], "CN.A", "CN.B", "AF.B", "DP.ratio");
#############################################
# DEFAULT PURPLE COLUMNS TO SAVE
#############################################
$GLOBALS['default_purple_columns'] = array();
array_push($GLOBALS['default_purple_columns'], "bafCount", "observedBAF", "actualBAF", "segmentStartSupport", "segmentEndSupport", "method");
#############################################
# WHITELISTED GBS RESULTS PAGE COLUMNS
#############################################

0 comments on commit 3b6ddd0

Please sign in to comment.
You can’t perform that action at this time.