Skip to content

Commit

Permalink
Merge ebc41aa into a849211
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolaasuni committed Aug 9, 2018
2 parents a849211 + ebc41aa commit 000280b
Show file tree
Hide file tree
Showing 13 changed files with 547 additions and 69 deletions.
12 changes: 9 additions & 3 deletions README.md
Expand Up @@ -24,7 +24,7 @@
* **[VariantKey Format](#vkformat)**
* [VariantKey Properties](#vkproperties)
* [VariantKey Input values](#vkinput)
* [RegionKey](#regionkey)
* **[RegionKey](#regionkey)**
* [Binary file formats for lookup tables](#binaryfiles)
* [C Library](#clib)
* [GO Library](#golib)
Expand Down Expand Up @@ -369,7 +369,11 @@ The VariantKey is composed of 3 sections arranged in 64 bit:
<a name="regionkey"></a>
## RegionKey

This library also includes functions to represent a human genetic region as number.
*RegionKey* encodes a human genetic region (defined as the set of *chromosome*, *start position*, *end position* and *strand direction*) in a 64 bit unsigned integer number.

RegionKey allows to repesent a region as a single entity, and provides analogous properties as the ones listed in [VariantKey Properties](#vkproperties).

The encoding of the first 33 bit (CROM, STARTPOS) is the same as in VariantKey.

The RegionKey is composed of 4 sections arranged in 64 bit:

Expand Down Expand Up @@ -433,7 +437,9 @@ The RegionKey is composed of 4 sections arranged in 64 bit:
+1 : 1 dec = "01" bin
```

The last bit of RegionKey is reserved.
* The last bit of RegionKey is reserved.

This software library provides several functions to operate with *RegionKey* and interact with *VariantKey*.


<a name="binaryfiles"></a>
Expand Down
2 changes: 1 addition & 1 deletion VERSION
@@ -1 +1 @@
2.14.0
2.15.0
8 changes: 3 additions & 5 deletions c/src/nrvk.c
Expand Up @@ -58,11 +58,9 @@ size_t find_ref_alt_by_variantkey(const unsigned char *src, uint64_t last, uint6

size_t reverse_variantkey(const unsigned char *src, uint64_t last, uint64_t vk, variantkey_rev_t *rev)
{
variantkey_t h = {0,0,0};
decode_variantkey(vk, &h);
decode_chrom(h.chrom, rev->chrom);
rev->pos = h.pos;
size_t len = decode_refalt(h.refalt, rev->ref, &rev->sizeref, rev->alt, &rev->sizealt);
decode_chrom(extract_variantkey_chrom(vk), rev->chrom);
rev->pos = extract_variantkey_pos(vk);
size_t len = decode_refalt(extract_variantkey_refalt(vk), rev->ref, &rev->sizeref, rev->alt, &rev->sizealt);
if ((len == 0) && (last > 0))
{
len = find_ref_alt_by_variantkey(src, last, vk, rev->ref, &rev->sizeref, rev->alt, &rev->sizealt);
Expand Down
10 changes: 4 additions & 6 deletions c/src/regionkey.c
Expand Up @@ -80,12 +80,10 @@ void decode_regionkey(uint64_t code, regionkey_t *rk)

void reverse_regionkey(uint64_t rk, regionkey_rev_t *rev)
{
regionkey_t h = {0,0,0,0};
decode_regionkey(rk, &h);
decode_chrom(h.chrom, rev->chrom);
rev->startpos = h.startpos;
rev->endpos = h.endpos;
rev->strand = decode_region_strand(h.strand);
decode_chrom(extract_regionkey_chrom(rk), rev->chrom);
rev->startpos = extract_regionkey_startpos(rk);
rev->endpos = extract_regionkey_endpos(rk);
rev->strand = decode_region_strand(extract_regionkey_strand(rk));
}

uint64_t regionkey(const char *chrom, size_t sizechrom, uint32_t startpos, uint32_t endpos, int8_t strand)
Expand Down
2 changes: 1 addition & 1 deletion conda/c.vk/meta.yaml
@@ -1,6 +1,6 @@
package:
name: vk
version: 2.14.0
version: 2.15.0

source:
path: ../..
Expand Down
2 changes: 1 addition & 1 deletion conda/python/meta.yaml
@@ -1,6 +1,6 @@
package:
name: variantkey
version: 2.14.0
version: 2.15.0

source:
path: ../..
Expand Down
2 changes: 1 addition & 1 deletion conda/r/meta.yaml
@@ -1,6 +1,6 @@
package:
name: r-variantkey
version: 2.14.0
version: 2.15.0

source:
path: ../..
Expand Down
1 change: 1 addition & 0 deletions javascript/Makefile
Expand Up @@ -33,6 +33,7 @@ build:
@mkdir -p target/build
uglifyjs --compress --keep-fnames --comments --output target/build/variantkey.js src/variantkey.js
cd test && node test_variantkey.js '../target/build/variantkey.js'
cd test && node test_regionkey.js '../target/build/variantkey.js'

# Format the source code
format:
Expand Down
123 changes: 113 additions & 10 deletions javascript/src/variantkey.js
Expand Up @@ -39,7 +39,7 @@ function encodeChrom(chrom) {
return 0;
}
// X > 23 ; Y > 24 ; M > 25
onecharmap = [
var onecharmap = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* M X Y */
Expand Down Expand Up @@ -90,7 +90,7 @@ function encodeBase(c) {
G > 2
T > 3
*/
map = [
var map = [
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* A C G T */
Expand Down Expand Up @@ -259,7 +259,7 @@ function extractVariantKeyChrom(vk) {
}

function extractVariantKeyPos(vk) {
return (((vk.hi & 0x7FFFFFF) << 1) | (vk.lo >>> 31)) >>> 0;
return (((vk.hi & 0x07FFFFFF) << 1) | (vk.lo >>> 31)) >>> 0;
}

function extractVariantKeyRefAlt(vk) {
Expand All @@ -274,6 +274,16 @@ function decodeVariantKey(vk) {
};
}

function reverseVariantKey(vk) {
var ra = decodeRefAlt(extractVariantKeyRefAlt(vk));
return {
"chrom": decodeChrom(extractVariantKeyChrom(vk)),
"pos": extractVariantKeyPos(vk),
"ref": ra.ref,
"alt": ra.alt
}
}

function variantKey(chrom, pos, ref, alt) {
return encodeVariantKey(encodeChrom(chrom), pos, encodeRefAlt(ref, alt));
}
Expand Down Expand Up @@ -322,14 +332,90 @@ function parseHex(vs) {
};
}

function reverseVariantKey(dvk) {
var ra = decodeRefAlt(dvk.refalt);
function encodeRegionStrand(strand) {
var map = [2, 0, 1, 0];
return map[((++strand) & 3)];
}

function decodeRegionStrand(strand) {
var map = [0, 1, -1, 0];
return map[(strand & 3)];
}

function encodeRegionKey(chrom, startpos, endpos, strand) {
return {
"chrom": decodeChrom(dvk.chrom),
"pos": dvk.pos,
"ref": ra.ref,
"alt": ra.alt
}
"hi": ((((chrom >>> 0) << 27) | (startpos >>> 1)) >>> 0),
"lo": ((((startpos >>> 0) << 31) | ((endpos >>> 0) << 3) | ((strand >>> 0) << 1)) >>> 0)
};
}

function extractRegionKeyChrom(rk) {
return ((rk.hi & 0xF8000000) >>> 27);
}

function extractRegionKeyStartPos(rk) {
return (((rk.hi & 0x07FFFFFF) << 1) | (rk.lo >>> 31)) >>> 0;
}

function extractRegionKeyEndPos(rk) {
return (rk.lo & 0x7FFFFFF8) >>> 3
}

function extractRegionKeyStrand(rk) {
return (rk.lo & 0x00000006) >>> 1;
}

function decodeRegionKey(rk) {
return {
"chrom": extractRegionKeyChrom(rk),
"startpos": extractRegionKeyStartPos(rk),
"endpos": extractRegionKeyEndPos(rk),
"strand": extractRegionKeyStrand(rk)
};
}

function reverseRegionKey(rk) {
return {
"chrom": decodeChrom(extractRegionKeyChrom(rk)),
"startpos": extractRegionKeyStartPos(rk),
"endpos": extractRegionKeyEndPos(rk),
"strand": decodeRegionStrand(extractRegionKeyStrand(rk))
};
}

function regionKey(chrom, startpos, endpos, strand) {
return encodeRegionKey(encodeChrom(chrom), startpos, endpos, encodeRegionStrand(strand));
}

function regionKeyString(rk) {
return padL08(rk.hi.toString(16)) + padL08(rk.lo.toString(16));
}

function getVariantKeyEndPos(vk) {
return extractVariantKeyPos(vk) + ((vk.lo & 0x78000000) >>> 27);
}

function areOverlappingRegions(a_chrom, a_startpos, a_endpos, b_chrom, b_startpos, b_endpos) {
return ((a_chrom == b_chrom) && (a_startpos < b_endpos) && (a_endpos > b_startpos));
}

function areOverlappingRegionRegionKey(chrom, startpos, endpos, rk) {
return ((chrom == extractRegionKeyChrom(rk)) && (startpos < extractRegionKeyEndPos(rk)) && (endpos > extractRegionKeyStartPos(rk)));
}

function areOverlappingRegionKeys(rka, rkb) {
return ((extractRegionKeyChrom(rka) == extractRegionKeyChrom(rkb)) && (extractRegionKeyStartPos(rka) < extractRegionKeyEndPos(rkb)) && (extractRegionKeyEndPos(rka) > extractRegionKeyStartPos(rkb)));
}

function areOverlappingVariantKeyRegionKey(vk, rk) {
return ((extractVariantKeyChrom(vk) == extractRegionKeyChrom(rk)) && (extractVariantKeyPos(vk) < extractRegionKeyEndPos(rk)) && (getVariantKeyEndPos(vk) > extractRegionKeyStartPos(rk)));
}

function variantKeyToRegionKey(vk) {
return {
"hi": vk.hi,
"lo": ((vk.lo & 0x80000000) | ((getVariantKeyEndPos(vk) << 3) >>> 0)) >>> 0
};
}

if (typeof(module) !== 'undefined') {
Expand All @@ -351,5 +437,22 @@ if (typeof(module) !== 'undefined') {
compareVariantKeyChromPos: compareVariantKeyChromPos,
variantKeyString: variantKeyString,
reverseVariantKey: reverseVariantKey,
encodeRegionStrand: encodeRegionStrand,
decodeRegionStrand: decodeRegionStrand,
encodeRegionKey: encodeRegionKey,
extractRegionKeyChrom: extractRegionKeyChrom,
extractRegionKeyStartPos: extractRegionKeyStartPos,
extractRegionKeyEndPos: extractRegionKeyEndPos,
extractRegionKeyStrand: extractRegionKeyStrand,
decodeRegionKey: decodeRegionKey,
reverseRegionKey: reverseRegionKey,
regionKey: regionKey,
regionKeyString: regionKeyString,
getVariantKeyEndPos: getVariantKeyEndPos,
areOverlappingRegions: areOverlappingRegions,
areOverlappingRegionRegionKey: areOverlappingRegionRegionKey,
areOverlappingRegionKeys: areOverlappingRegionKeys,
areOverlappingVariantKeyRegionKey: areOverlappingVariantKeyRegionKey,
variantKeyToRegionKey: variantKeyToRegionKey,
}
}

0 comments on commit 000280b

Please sign in to comment.