Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| #!/usr/bin/perl | |
| use strict; | |
| my $ssiz=7000; # sample size | |
| if ($ARGV[0] =~ /^-[h?]/) { | |
| print "Usage: determine-phred FILE | |
| Reads a sam, fastq or pileup, possibly gzipped and returns the phred-scale, | |
| either 64 or 33, based on a quick scan of the data in the file. | |
| "; | |
| exit 0; | |
| } | |
| my $cnt; | |
| my $dphred = 64; | |
| if ($ARGV[0] =~ /\.gz$/) { | |
| $ARGV[0] = "gunzip -c '$ARGV[0]'|"; | |
| } | |
| my $qual; | |
| my $comm; | |
| my $fmt; | |
| if (@ARGV > 1) { | |
| my @mult = @ARGV; | |
| for my $f (@mult) { | |
| @ARGV = ($f); | |
| determine(); | |
| print "$f\t$dphred\n"; | |
| } | |
| } else { | |
| determine(); | |
| print "$dphred\n"; | |
| } | |
| sub determine { | |
| $_ = <>; | |
| if (/^\@/ && ! /^\@SQ\t/) { | |
| # fastq | |
| scalar <>; # read | |
| $comm = scalar <>; # comment | |
| if (!(substr($comm,0,1) eq '+')) { | |
| die "Unknown file format\n"; | |
| } | |
| $qual = <>; | |
| chomp $qual; | |
| $fmt = 'fq'; | |
| } elsif (/^\S+\t\d+\t[ACTGN]\t\d+\t\S+\t(\S+)$/i) { | |
| $qual = $1; | |
| $fmt = 'pileup'; | |
| } else { | |
| # sam | |
| $fmt = 'sam'; | |
| $qual = (split(/\t/, $_))[10]; | |
| } | |
| if (!$qual) { | |
| die "Unknown file format\n"; | |
| } | |
| my $rc = 1; | |
| while($qual) { | |
| ++$rc; | |
| for (my $i =length($qual)/2; $i < length($qual); ++$i) { | |
| if (ord(substr($qual,$i,1)) < 64) { | |
| $dphred = 33; | |
| $cnt=$ssiz; # last | |
| last; | |
| } | |
| } | |
| $qual = ''; | |
| last if ++$cnt >= $ssiz; # got enough | |
| if ($fmt eq 'fq') { | |
| # fastq | |
| last if ! scalar <>; # id | |
| last if ! scalar <>; # read | |
| last if ! scalar <>; # comment | |
| $qual = <>; | |
| chomp $qual; | |
| } elsif ($fmt eq 'pileup') { | |
| $qual = (split(/\t/, $_))[5]; | |
| } else { | |
| # sam | |
| $qual = (split(/\t/, $_))[10]; | |
| } | |
| } | |
| if ($rc < 10) { | |
| $dphred = 33; | |
| } | |
| } |