|
|
@@ -74,6 +74,60 @@ uint8_t rcCharToDna5[] = { |
|
|
/* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
};
|
|
|
|
|
|
+/**
|
|
|
+ * Mapping from ASCII characters for ambiguous nucleotides into masks:
|
|
|
+ */
|
|
|
+uint8_t asc2dnamask[] = {
|
|
|
+ /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* 64 */ 0, 1,14, 2,13, 0, 0, 4,11, 0, 0,12, 0, 3,15, 0,
|
|
|
+ /* A B C D G H K M N */
|
|
|
+ /* 80 */ 0, 0, 5, 6, 8, 0, 7, 9, 0,10, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* R S T V W Y */
|
|
|
+ /* 96 */ 0, 1,14, 2,13, 0, 0, 4,11, 0, 0,12, 0, 3,15, 0,
|
|
|
+ /* a b c d g h k m n */
|
|
|
+ /* 112 */ 0, 0, 5, 6, 8, 0, 7, 9, 0,10, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* r s t v w y */
|
|
|
+ /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+ /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
+};
|
|
|
+
|
|
|
+/// For converting from ASCII to the Dna5 code where A=0, C=1, G=2,
|
|
|
+/// T=3, N=4
|
|
|
+/// According to the manual all the other characters, including
|
|
|
+/// IUPAC codes are being converted to N
|
|
|
+uint8_t asc2dna[] = {
|
|
|
+ /* 0 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* 16 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* 32 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* - */
|
|
|
+ /* 48 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* 64 */ 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* A B C D G H K M N */
|
|
|
+ /* 80 */ 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* R S T U V W Y */
|
|
|
+ /* 96 */ 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* a b c d g h k m n */
|
|
|
+ /* 112 */ 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* r s t u v w y */
|
|
|
+ /* 128 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* 144 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* 160 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* 176 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* 192 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* 208 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* 224 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ /* 240 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+};
|
|
|
+
|
|
|
/// For converting from ASCII to the Dna5 code where A=0, C=1, G=2,
|
|
|
/// T=3, N=4
|
|
|
uint8_t asc2col[] = {
|
|
|
@@ -133,32 +187,6 @@ uint8_t asc2dnacat[] = { |
|
|
/**
|
|
|
* Mapping from ASCII characters for ambiguous nucleotides into masks:
|
|
|
*/
|
|
|
-uint8_t asc2dnamask[] = {
|
|
|
- /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
- /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
- /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
- /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
- /* 64 */ 0, 1,14, 2,13, 0, 0, 4,11, 0, 0,12, 0, 3,15, 0,
|
|
|
- /* A B C D G H K M N */
|
|
|
- /* 80 */ 0, 0, 5, 6, 8, 0, 7, 9, 0,10, 0, 0, 0, 0, 0, 0,
|
|
|
- /* R S T V W Y */
|
|
|
- /* 96 */ 0, 1,14, 2,13, 0, 0, 4,11, 0, 0,12, 0, 3,15, 0,
|
|
|
- /* a b c d g h k m n */
|
|
|
- /* 112 */ 0, 0, 5, 6, 8, 0, 7, 9, 0,10, 0, 0, 0, 0, 0, 0,
|
|
|
- /* r s t v w y */
|
|
|
- /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
- /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
- /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
- /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
- /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
- /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
- /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
- /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
-};
|
|
|
-
|
|
|
-/**
|
|
|
- * Mapping from ASCII characters for ambiguous nucleotides into masks:
|
|
|
- */
|
|
|
char asc2dnacomp[] = {
|
|
|
/* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
/* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
@@ -237,3 +265,49 @@ uint8_t dinuc2color[5][5] = { |
|
|
/* T */ {3, 2, 1, 0, 4},
|
|
|
/* N */ {4, 4, 4, 4, 4}
|
|
|
};
|
|
|
+
|
|
|
+/// Convert bit encoded DNA char to its complement
|
|
|
+int dnacomp[5] = {
|
|
|
+ 3, 2, 1, 0, 4
|
|
|
+};
|
|
|
+
|
|
|
+const char *iupacs = "!ACMGRSVTWYHKDBN!acmgrsvtwyhkdbn";
|
|
|
+
|
|
|
+char mask2iupac[16] = {
|
|
|
+ -1,
|
|
|
+ 'A', // 0001
|
|
|
+ 'C', // 0010
|
|
|
+ 'M', // 0011
|
|
|
+ 'G', // 0100
|
|
|
+ 'R', // 0101
|
|
|
+ 'S', // 0110
|
|
|
+ 'V', // 0111
|
|
|
+ 'T', // 1000
|
|
|
+ 'W', // 1001
|
|
|
+ 'Y', // 1010
|
|
|
+ 'H', // 1011
|
|
|
+ 'K', // 1100
|
|
|
+ 'D', // 1101
|
|
|
+ 'B', // 1110
|
|
|
+ 'N', // 1111
|
|
|
+};
|
|
|
+
|
|
|
+int maskcomp[16] = {
|
|
|
+ 0, // 0000 (!) -> 0000 (!)
|
|
|
+ 8, // 0001 (A) -> 1000 (T)
|
|
|
+ 4, // 0010 (C) -> 0100 (G)
|
|
|
+ 12, // 0011 (M) -> 1100 (K)
|
|
|
+ 2, // 0100 (G) -> 0010 (C)
|
|
|
+ 10, // 0101 (R) -> 1010 (Y)
|
|
|
+ 6, // 0110 (S) -> 0110 (S)
|
|
|
+ 14, // 0111 (V) -> 1110 (B)
|
|
|
+ 1, // 1000 (T) -> 0001 (A)
|
|
|
+ 9, // 1001 (W) -> 1001 (W)
|
|
|
+ 5, // 1010 (Y) -> 0101 (R)
|
|
|
+ 13, // 1011 (H) -> 1101 (D)
|
|
|
+ 3, // 1100 (K) -> 0011 (M)
|
|
|
+ 11, // 1101 (D) -> 1011 (H)
|
|
|
+ 7, // 1110 (B) -> 0111 (V)
|
|
|
+ 15, // 1111 (N) -> 1111 (N)
|
|
|
+};
|
|
|
+
|
0 comments on commit
64af9e5