Skip to content

Commit

Permalink
Merge pull request #976 from nuno-agostinho/improve/regulation
Browse files Browse the repository at this point in the history
Regulation pipeline: deprecate update_vf parameter + autoflush STDOUT
  • Loading branch information
dglemos committed Apr 6, 2023
2 parents 92ce2e1 + dd79894 commit c86f72d
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 71 deletions.
139 changes: 70 additions & 69 deletions modules/Bio/EnsEMBL/Variation/Pipeline/FinishRegulationEffect.pm
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ use warnings;

use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess);

use IO::Handle;
STDOUT->autoflush(1); # flush STDOUT buffer immediately

sub run {
my $self = shift;

Expand All @@ -48,86 +51,84 @@ sub run {
print $wrt "Finish Regulation Effect\n";
print $wrt "Start: " . localtime() . "\n\n";

if ($self->param('update_vf') || $self->param('only_update_vf')) {
my $vdba = $self->get_species_adaptor('variation');
my $dbc = $vdba->dbc;
$dbc->reconnect_when_lost(1);
# pre-clean-up:
foreach my $table (qw/regulatory_region_consequences variation_feature_overlap_regulation variation_feature_consequences/) {
$dbc->do(qq{DROP TABLE IF EXISTS $table;})
}

my @regulatory_tables = ('motif_feature_variation', 'regulatory_feature_variation');

print $wrt "Collect variation features overlapping regulatory features\n";
my $vdba = $self->get_species_adaptor('variation');
my $dbc = $vdba->dbc;
$dbc->reconnect_when_lost(1);
# pre-clean-up:
foreach my $table (qw/regulatory_region_consequences variation_feature_overlap_regulation variation_feature_consequences/) {
$dbc->do(qq{DROP TABLE IF EXISTS $table;})
}

$dbc->do(qq{
CREATE TABLE IF NOT EXISTS regulatory_region_consequences(
variation_feature_id int(10),
consequence_types set($consequence_types) NOT NULL DEFAULT 'intergenic_variant',
key variation_feature_idx(variation_feature_id)
)engine=MyISAM;});
my @regulatory_tables = ('motif_feature_variation', 'regulatory_feature_variation');

for my $table (@regulatory_tables) {
$dbc->do(qq{
INSERT INTO regulatory_region_consequences(variation_feature_id, consequence_types)
SELECT variation_feature_id, consequence_types
FROM $table;});
}
print $wrt "Collect variation features overlapping regulatory features\n";

print $wrt "Completed collect variation features overlapping regulatory features\n";
print $wrt "Collect overlap with variation_feature table\n";
$dbc->do(qq{
CREATE TABLE IF NOT EXISTS regulatory_region_consequences(
variation_feature_id int(10),
consequence_types set($consequence_types) NOT NULL DEFAULT 'intergenic_variant',
key variation_feature_idx(variation_feature_id)
)engine=MyISAM;});

for my $table (@regulatory_tables) {
$dbc->do(qq{
CREATE TABLE IF NOT EXISTS variation_feature_overlap_regulation(
variation_feature_id int(10),
consequence_types set($consequence_types) NOT NULL DEFAULT 'intergenic_variant',
key variation_feature_idx(variation_feature_id)
)engine=MyISAM;});

$dbc->do(qq{
INSERT INTO variation_feature_overlap_regulation(variation_feature_id, consequence_types)
SELECT vf.variation_feature_id, vf.consequence_types
FROM variation_feature vf, regulatory_region_consequences rrc
WHERE rrc.variation_feature_id = vf.variation_feature_id;});
INSERT INTO regulatory_region_consequences(variation_feature_id, consequence_types)
SELECT variation_feature_id, consequence_types
FROM $table;});
}

$dbc->do(qq{
INSERT INTO regulatory_region_consequences
SELECT * FROM variation_feature_overlap_regulation;});
print $wrt "Completed collect variation features overlapping regulatory features\n";
print $wrt "Collect overlap with variation_feature table\n";

$dbc->do(qq{
CREATE TABLE IF NOT EXISTS variation_feature_overlap_regulation(
variation_feature_id int(10),
consequence_types set($consequence_types) NOT NULL DEFAULT 'intergenic_variant',
key variation_feature_idx(variation_feature_id)
)engine=MyISAM;});

$dbc->do(qq{
INSERT INTO variation_feature_overlap_regulation(variation_feature_id, consequence_types)
SELECT vf.variation_feature_id, vf.consequence_types
FROM variation_feature vf, regulatory_region_consequences rrc
WHERE rrc.variation_feature_id = vf.variation_feature_id;});

$dbc->do(qq{
INSERT INTO regulatory_region_consequences
SELECT * FROM variation_feature_overlap_regulation;});

# combine consequences for all variation_features in regulatory_region_consequences
my $tmp_table = 'variation_feature_consequences';

$dbc->do(qq{CREATE TABLE IF NOT EXISTS $tmp_table(
variation_feature_id int(10),
consequence_types set($consequence_types) NOT NULL DEFAULT 'intergenic_variant',
key variation_feature_idx(variation_feature_id)
)engine=MyISAM;});

# combine consequences for all variation_features in regulatory_region_consequences
my $tmp_table = 'variation_feature_consequences';
$dbc->do(qq{
INSERT INTO $tmp_table (variation_feature_id, consequence_types)
SELECT variation_feature_id, GROUP_CONCAT(DISTINCT(consequence_types))
FROM regulatory_region_consequences
GROUP BY variation_feature_id;}) or die "Populating tmp table failed";

$dbc->do(qq{CREATE TABLE IF NOT EXISTS $tmp_table(
variation_feature_id int(10),
consequence_types set($consequence_types) NOT NULL DEFAULT 'intergenic_variant',
key variation_feature_idx(variation_feature_id)
)engine=MyISAM;});
print $wrt "Final update of variation_feature table\n";

$dbc->do(qq{
INSERT INTO $tmp_table (variation_feature_id, consequence_types)
SELECT variation_feature_id, GROUP_CONCAT(DISTINCT(consequence_types))
FROM regulatory_region_consequences
GROUP BY variation_feature_id;}) or die "Populating tmp table failed";
$tmp_table = 'variation_feature_consequences';
# update variation feature
$dbc->do(qq{
UPDATE variation_feature vf, $tmp_table vfc
SET vf.consequence_types = vfc.consequence_types
WHERE vf.variation_feature_id = vfc.variation_feature_id;}) or die "Failed to update vf table";

print $wrt "Final update of variation_feature table\n";
print $wrt "Completed update of variation_feature table\n";

$tmp_table = 'variation_feature_consequences';
# update variation feature
$dbc->do(qq{
UPDATE variation_feature vf, $tmp_table vfc
SET vf.consequence_types = vfc.consequence_types
WHERE vf.variation_feature_id = vfc.variation_feature_id;}) or die "Failed to update vf table";

print $wrt "Completed update of variation_feature table\n";

# post-clean-up:
foreach my $table (qw/regulatory_region_consequences variation_feature_overlap_regulation variation_feature_consequences/) {
$dbc->do(qq{DROP TABLE IF EXISTS $table;})
}
foreach my $table (qw/motif_feature_variation regulatory_feature_variation/) {
$dbc->do(qq{ALTER TABLE $table ENABLE KEYS;});
}
# post-clean-up:
foreach my $table (qw/regulatory_region_consequences variation_feature_overlap_regulation variation_feature_consequences/) {
$dbc->do(qq{DROP TABLE IF EXISTS $table;})
}
foreach my $table (qw/motif_feature_variation regulatory_feature_variation/) {
$dbc->do(qq{ALTER TABLE $table ENABLE KEYS;});
}

print $wrt "\nDone: " . localtime() . "\n";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ sub default_options {
split_slice => 1,
split_slice_length => 5e6,
only_update_vf => 0,
update_vf => 1,

# if set to 1 this option tells the transcript_effect analysis to disambiguate
# ambiguity codes in single nucleotide alleles, so e.g. an allele string like
# 'T/M' will be treated as if it were 'T/A/C' (this was a request from ensembl
Expand Down Expand Up @@ -100,7 +100,6 @@ sub pipeline_wide_parameters {
disambiguate_single_nucleotide_alleles => $self->o('disambiguate_single_nucleotide_alleles'),
ensembl_registry => $self->o('registry_file'),
only_update_vf => $self->o('only_update_vf'),
update_vf => $self->o('update_vf'),
species => $self->o('species'),
debug => $self->o('debug'),
split_slice => $self->o('split_slice'),
Expand Down

0 comments on commit c86f72d

Please sign in to comment.