-
Notifications
You must be signed in to change notification settings - Fork 1
/
.Rhistory
512 lines (512 loc) · 34.2 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
#' @param ... Additional arguments to be passed between methods.
#' @param trans_table The translation table to use for translating from nucleotides to amino acids.
#' Default is 0, which indicates that censored translation should be performed. If the taxonomy
#' of the sample is known, use the function which_trans_table() to determine the translation table to use.
#' @param frame_offset The offset to the reading frame to be applied for translation. By default the offset
#' is zero, so the first character in the framed sequence is considered the first nucleotide of the first codon.
#' Passing frame_offset = 1 would offset the sequence by one and therefore make the second character in the
#' framed sequence the first nucleotide of the first codon.
#'
#' @return An object of class \code{"coi5p"}
#' @seealso \code{\link{coi5p}}
#' @seealso \code{\link{frame}}
#' @seealso \code{\link{which_trans_table}}
#' @details
#' The translate function allows for the translation of framed sequences from nucleotides to amino acids, both
#' in instances when the correct genetic code corresponding to a sequence is known, and in instances when taxonomic
#' information is unavailable or unreliable.
#' @examples
#' #previously run functions:
#' dat = coi5p(example_nt_string )
#' dat = frame(dat)
#' #translate when the translation table is not known:
#' dat = translate(dat)
#' #translate when the translation table is known:
#' dat = translate(dat, trans_table = 5)
#' #additional components in output coi5p object:
#' dat$aaSeq
#'@name translate
translate = function(x, ...){
UseMethod("translate")
}
####
#' @rdname translate
#' @export
translate.coi5p = function(x, ..., trans_table = 0, frame_offset = 0){
if(is.null(x$framed)){
stop("translate function only accepts framed coi5p objects. See function: frame.")
}
if(trans_table == 0){
x$aaSeq = censored_translation(x$framed, reading_frame = (frame_offset+1))
}else{
#split the DNA string into a vector, all characters to lower case
dna_list = strsplit(gsub('-', 'n', as.character(tolower(x$framed))),"")
dna_vec = dna_list[[1]]
#translate using the designated numcode, returns a vector of AAs
aa_vec = seqinr::translate(dna_vec, frame = frame_offset, numcode=trans_table, ambiguous= TRUE, NAstring = '-')
x$aaSeq = paste(aa_vec, collapse= "")
}
return(x)
}
#' Check if a coi5p sequence likely contains an error.
#'
#' @param x A coi5p class object for which frame() and translate() have been run.
#' @param ... Additional arguments to be passed between methods.
#' @param indel_threshold The log likelihood threshold used to assess whether or not sequences
#' are likely to contain an indel. Default is -358.88. Values lower than this will be classified
#' as likely to contain an indel and values higher will be classified as not likely to contain an indel.
#' @param aa_PHMM The profile hidden Markov model against which the translated amino acid sequence should be compared.
#' Default is the full COI-5P amino acid PHMM (aa_coi_PHMM).
#'
#' @return An object of class \code{"coi5p"}
#' @seealso \code{\link{coi5p}}
#' @seealso \code{\link{frame}}
#' @seealso \code{\link{translate}}
#' @seealso \code{\link{subsetPHMM}}
#' @details
#' The indel check function analyzes the framed and translated DNA sequences in two ways in order to
#' allow users to make an informed decision about whether or not a DNA sequence contains a frameshift error.
#' This test is designed to detect insertion or deletion errors resulting from technical errors in DNA sequencing,
#' but can in some instances identify biological contaminants (i.e. if the contaminant sequence uses a different
#' genetic code than the target, or if the contaminants are things such as pseudogenes that possess sequences that
#' are highly divergent from animal COI-5P sequences).
#'
#' The two tests performed are: (1) a query for stop codons in the amino acid sequence and (2) an evaluation of the
#' log likelihood value resulting from the comparison of the framed coi5p amino acid sequence against the COI-5P
#' amino acid PHMM. The default likelihood value for identifying a sequence is likely erroneous is -358.88. Sequences with
#' likelihood values lower than this will receive an indel_likely value of TRUE. The threshold of -358.88 was experimentally
#' determined to be the optimal likelihood threshold for separating of full-length sequences with and without errors when
#' the censored translation option is used. Sequences will have higher likelihood values when a specific genetic code is used.
#' Sequences will have lower likelihood values when they are not complete barcode sequences (i.e. <500bp in length). For these
#' reasons the likelihood threshold is not a specific value but a parameter that can be altered based on the type of translation
#' and length of the sequences. Below are experimentally determined suggested values for different size and translation table
#' combinations.
#'
#' Short barcode sequences, known genetic code: indel_threshold = -354.44
#'
#' Short barcode sequences, unknown genetic code: indel_threshold = -440.24
#'
#' Full length barcode sequences, known genetic code: indel_threshold = -246.20
#'
#' Full length barcode sequences, unknown genetic code: indel_threshold = -358.88
#'
#' Source: Nugent et al. 2019 (doi: https://doi.org/10.1101/2019.12.12.865014).
#' @examples
#' #previously run functions:
#' dat = coi5p(example_nt_string)
#' dat = frame(dat)
#' dat = translate(dat)
#' #current function
#' dat = indel_check(dat)
#' #with custom indel threshold
#' dat = indel_check(dat, indel_threshold = -400)
#' #additional components in output coi5p object:
#' dat$stop_codons #Boolean - Indicates if there are stop codons in the amino acid sequence.
#' dat$indel_likely #Boolean - Indicates if the likelihood score below the specified indel_threshold.
#' dat$aaScore #view the amino acid log likelihood score
#' @name indel_check
indel_check = function(x, ...){
UseMethod("indel_check")
}
####
#' @rdname indel_check
#' @export
indel_check.coi5p = function(x, ..., indel_threshold = -358.88, aa_PHMM = aa_coi_PHMM){
if(is.null(x$framed)|is.null(x$aaSeq) ){
stop("indel_check function only accepts framed and translated coi5p objects. See functions: frame, translate.")
}
aaBin = individual_AAbin(x$aaSeq)
aaPHMMout = aphid::Viterbi(aa_PHMM, aaBin, odds = FALSE)
x$aaScore = aaPHMMout[['score']]
x$data$aaPath = aaPHMMout[['path']]
if(x$aaScore > indel_threshold){
x$indel_likely = FALSE
}else{
x$indel_likely = TRUE
}
if(grepl('\\*', x$aaSeq)){
x$stop_codons = TRUE
}else{
x$stop_codons = FALSE
}
return(x)
}
#' Run the entire coi5p pipeline for an input sequence.
#'
#' This function will take a raw DNA sequence string and run each of the coi5p methods in turn
#' (coi5p, frame, translate, indel_check). Note that if you are not interested in all components
#' of the output (i.e. only want sequences set in frame reading or translated), then the
#' coi5p analysis functions can be called individually to avoid unnecessary computation.
#'
#'
#' @param x A nucleotide string.
#' Valid characters within the nucleotide string are: 'a', 't', 'g', 'c', '-', and 'n'.
#' The nucleotide string can be input as upper case, but will be automatically converted to lower case.
#' @param ... Additional arguments to be passed between methods.
#' @param name An optional character string. Identifier for the sequence.
#' @param trans_table The translation table to use for translating from nucleotides to amino acids.
#' Default is 0, which indicates that censored translation should be performed. If the taxonomy
#' of the sample is known, use the function which_trans_table() to determine the translation table to use.
#' @param frame_offset The offset to the reading frame to be applied for translation. By default the offset
#' is zero, so the first character in the framed sequence is considered the first nucleotide of the first codon.
#' Passing frame_offset = 1 would make the second character in the framed sequence the first nucleotide of
#' the first codon.
#' @param indel_threshold The log likelihood threshold used to assess whether or not sequences
#' are likely to contain an indel. Default is -358.88. Values lower than this will be classified
#' as likely to contain an indel and values higher will be classified as not likely to contain an indel.
#' For recommendations on selecting a indel_threshold value, consult: Nugent et al. 2019 (doi: https://doi.org/10.1101/2019.12.12.865014).
#' @param nt_PHMM The profile hidden Markov model against which the raw sequence should be compared in the framing step.
#' Default is the full COI-5P nucleotide PHMM (nt_coi_PHMM).
#' @param aa_PHMM The profile hidden Markov model against which the translated amino acid sequence should be compared
#' in the indel_check step. Default is the full COI-5P amino acid PHMM (aa_coi_PHMM).
#'
#' @return An object of class \code{"coi5p"}
#' @examples
#' dat = coi5p_pipe(example_nt_string)
#' #full coi5p object can then be printed
#' dat
#' #components of output coi5p object can be called individually:
#' dat$raw #raw input sequence
#' dat$name #name that was passed
#' dat$framed #sequence in common reading frame
#' dat$aaSeq #sequence translated to amino acids (censored)
#' dat$indel_likely #whether an insertion or deletion likely exists in the sequence
#' dat$stop_codons #whether or not there are stop codons in the amino acid sequence
#' dat = coi5p_pipe(example_nt_string , trans_table = 5)
#' dat$aaSeq #sequence translated to amino acids using designated translation table
#' @seealso \code{\link{coi5p}}
#' @seealso \code{\link{frame}}
#' @seealso \code{\link{translate}}
#' @seealso \code{\link{indel_check}}
#' @seealso \code{\link{which_trans_table}}
#' @seealso \code{\link{subsetPHMM}}
#'
#' @name coi5p_pipe
#' @export
coi5p_pipe = function(x, ... ,
name = character(),
trans_table = 0,
frame_offset = 0,
nt_PHMM = coil::nt_coi_PHMM,
aa_PHMM = coil::aa_coi_PHMM,
indel_threshold = -358.88){
dat = coi5p(x, name=name)
dat = frame(dat , nt_PHMM = nt_PHMM)
dat = translate(dat, trans_table = trans_table, frame_offset = frame_offset)
dat = indel_check(dat, indel_threshold=indel_threshold, aa_PHMM = aa_PHMM)
return(dat)
}
#' Flatten a list of coi5p output objects into a dataframe.
#'
#' This helper function is designed to act upon a list of coi5p objects and extract the object components
#' that the user requires.
#' @param x A list of coi5p objects.
#' @param keep_cols The name of a coi5p object component, or a vector of components that should be turned into
#' dataframe columns. Available components are: name, raw, framed, aaSeq, aaScore, indel_likely, stop_codons.
#' @return A dataframe with the coi5p object information flattened into columns.
#' @examples
#' #create a list of coi5p objects
#' coi_output = lapply(example_barcode_data$sequence, function(x){
#' coi5p_pipe(x)
#' })
#' #flatten the list into a dataframe
#' coi_df = flatten_coi5p(coi_output)
#' #extract only a single column
#' coi_framed = flatten_coi5p(coi_output, keep_cols = "framed")
#' #or subset multiple columns
#' coi_framed = flatten_coi5p(coi_output, keep_cols = c("framed", "aaSeq"))
#' @seealso \code{\link{coi5p_pipe}}
#' @name flatten_coi5p
#' @export
flatten_coi5p = function(x, keep_cols = "all"){
if(length(keep_cols) == 1 && keep_cols == "all"){
vals = names(x[[1]])
vals = vals[vals != "data"]
} else{
vals = keep_cols
}
data_list = list()
for(v in vals){
if(v == "data"){
stop("flatten_coi5p is not designed to return the data component of the coi5p object, it is for internal use only.")
}
if(!v %in% names(x[[1]])){
stop(paste("The coi5p objects you are flattening do not contain the column:", v))
}
if(v == "name"){
id_col = sapply(x, function(i) i[["name"]])
for(i in 1:length(id_col)){
if(length(id_col[[i]]) == 0){
id_col[[i]] = NA
}
data_list[[v]] = unlist(id_col)
}
}else{
data_list[[v]] = sapply(x, function(i) i[[v]])
}
}
return(as.data.frame(data_list, stringsAsFactors = FALSE))
}
seq_outlie = "TATGCTTTATTTTATTTTTGCTACCTGATCTGGAATGGTGGCTACAGGTTTAAGAGTTCTAATTCGAATTGAGCTAAGCGTTGCTACAGGCTGAATAGGAGACGATCAGCTTTACAACGTAATTGTTACGGCTCACGCTTTAATTATGTTATTTTTCTTTCTAATGCCTTTCCTTATGGGAGGATTTGGTAATACTCTTGTTCCTCTTATGATTGGAGCTCCAGACATGGCGTTCCCTCGAATGAACAACATGAGATTCTGAATGCTTCCCCCTTCTATGACACTTCTTCTAACATCTGCCCTAATTGAAAGAGGGGCAGGTACAGGATGGACTGTTTACCCTCCGCTATCAGGGATTGTATCCCATGCTGGTGGAAGGGTAGACTTGGCGATTTTTTCGTTACACCTTTCCGGTGCGTCTTCAATTTTAGGTACTGTAAATTTTCTTGCCACAGTGTTTAATATGCGAGGGCCTGGAATCACTTTCGAGCGAACCCCTCTATTTGTATGAGCTATGGTAGTTACAGTTGTTCTGTTACTTTTATCCCTTCCGGTATTTGCTGGTGGGATTACTATGCTACTTACAGATCGAAACTTCAATACTAGATTTTTCGATCCTGCTGGGGGTGGTGATCCTATTTTATTCCAGCACTTATTT"
dat = coi5p(seq_outlie)
dat = frame(dat)
dat$framed
processed_outlie = coi5p_pipe(seq_outlie)
processed_outlie
library(coil)
library(coil)
dat$framed
adj_for_dels
path_out
seq_outlie = "TATGCTTTATTTTATTTTTGCTACCTGATCTGGAATGGTGGCTACAGGTTTAAGAGTTCTAATTCGAATTGAGCTAAGCGTTGCTACAGGCTGAATAGGAGACGATCAGCTTTACAACGTAATTGTTACGGCTCACGCTTTAATTATGTTATTTTTCTTTCTAATGCCTTTCCTTATGGGAGGATTTGGTAATACTCTTGTTCCTCTTATGATTGGAGCTCCAGACATGGCGTTCCCTCGAATGAACAACATGAGATTCTGAATGCTTCCCCCTTCTATGACACTTCTTCTAACATCTGCCCTAATTGAAAGAGGGGCAGGTACAGGATGGACTGTTTACCCTCCGCTATCAGGGATTGTATCCCATGCTGGTGGAAGGGTAGACTTGGCGATTTTTTCGTTACACCTTTCCGGTGCGTCTTCAATTTTAGGTACTGTAAATTTTCTTGCCACAGTGTTTAATATGCGAGGGCCTGGAATCACTTTCGAGCGAACCCCTCTATTTGTATGAGCTATGGTAGTTACAGTTGTTCTGTTACTTTTATCCCTTCCGGTATTTGCTGGTGGGATTACTATGCTACTTACAGATCGAAACTTCAATACTAGATTTTTCGATCCTGCTGGGGGTGGTGATCCTATTTTATTCCAGCACTTATTT"
dat = coi5p(seq_outlie)
ntBin = individual_DNAbin(x$raw)
ntPHMMout = aphid::Viterbi(nt_PHMM, ntBin, odds = FALSE)
org_seq = x$raw
org_seq
ntPHMMout
ntPHMMout$path
org_seq_vec = strsplit(tolower(org_seq), split='')[[1]]
adj_for_dels = ins_front_trim(path_out)
adj_for_dels
add_org_drop = 0
#this line accounts for 1s and twos in the leading drop section, need to trim those out of original seq as well
add_org_drop = sum(path_out[1:(adj_for_dels-1)] == 2) + sum(path_out[1:(adj_for_dels-1)] == 1)
path_out
path_out = ntPHMMout$path
path_out
#this line accounts for 1s and twos in the leading drop section, need to trim those out of original seq as well
add_org_drop = sum(path_out[1:(adj_for_dels-1)] == 2) + sum(path_out[1:(adj_for_dels-1)] == 1)
path_out
path_out[1:(adj_for_dels-1)] == 2
adj_for_dels-1
adj_for_dels = ins_front_trim(path_out)
adj_for_dels
#this line accounts for 1s and twos in the leading drop section, need to trim those out of original seq as well
add_org_drop = sum(path_out[1:(adj_for_dels-1)] == 2) + sum(path_out[1:(adj_for_dels-1)] == 1)
add_org_drop
org_buff = sum(path_out[1:(adj_for_dels-1)] == 0)
org_buff
add_org_drop
0*org_buff
front = c(0*org_buff)
front
front = c()
org_buff = 1
1:org_buff
library(coil)
seq_outlie = "TATGCTTTATTTTATTTTTGCTACCTGATCTGGAATGGTGGCTACAGGTTTAAGAGTTCTAATTCGAATTGAGCTAAGCGTTGCTACAGGCTGAATAGGAGACGATCAGCTTTACAACGTAATTGTTACGGCTCACGCTTTAATTATGTTATTTTTCTTTCTAATGCCTTTCCTTATGGGAGGATTTGGTAATACTCTTGTTCCTCTTATGATTGGAGCTCCAGACATGGCGTTCCCTCGAATGAACAACATGAGATTCTGAATGCTTCCCCCTTCTATGACACTTCTTCTAACATCTGCCCTAATTGAAAGAGGGGCAGGTACAGGATGGACTGTTTACCCTCCGCTATCAGGGATTGTATCCCATGCTGGTGGAAGGGTAGACTTGGCGATTTTTTCGTTACACCTTTCCGGTGCGTCTTCAATTTTAGGTACTGTAAATTTTCTTGCCACAGTGTTTAATATGCGAGGGCCTGGAATCACTTTCGAGCGAACCCCTCTATTTGTATGAGCTATGGTAGTTACAGTTGTTCTGTTACTTTTATCCCTTCCGGTATTTGCTGGTGGGATTACTATGCTACTTACAGATCGAAACTTCAATACTAGATTTTTCGATCCTGCTGGGGGTGGTGATCCTATTTTATTCCAGCACTTATTT"
dat = coi5p(seq_outlie)
dat = frame(dat)
dat
library(coil)
seq_outlie = "TATGCTTTATTTTATTTTTGCTACCTGATCTGGAATGGTGGCTACAGGTTTAAGAGTTCTAATTCGAATTGAGCTAAGCGTTGCTACAGGCTGAATAGGAGACGATCAGCTTTACAACGTAATTGTTACGGCTCACGCTTTAATTATGTTATTTTTCTTTCTAATGCCTTTCCTTATGGGAGGATTTGGTAATACTCTTGTTCCTCTTATGATTGGAGCTCCAGACATGGCGTTCCCTCGAATGAACAACATGAGATTCTGAATGCTTCCCCCTTCTATGACACTTCTTCTAACATCTGCCCTAATTGAAAGAGGGGCAGGTACAGGATGGACTGTTTACCCTCCGCTATCAGGGATTGTATCCCATGCTGGTGGAAGGGTAGACTTGGCGATTTTTTCGTTACACCTTTCCGGTGCGTCTTCAATTTTAGGTACTGTAAATTTTCTTGCCACAGTGTTTAATATGCGAGGGCCTGGAATCACTTTCGAGCGAACCCCTCTATTTGTATGAGCTATGGTAGTTACAGTTGTTCTGTTACTTTTATCCCTTCCGGTATTTGCTGGTGGGATTACTATGCTACTTACAGATCGAAACTTCAATACTAGATTTTTCGATCCTGCTGGGGGTGGTGATCCTATTTTATTCCAGCACTTATTT"
dat = coi5p(seq_outlie)
dat = frame(dat)
dat$framed
dat
add_org_drop
folmer_start
front
library(coil)
library(coil)
seq_outlie = "TATGCTTTATTTTATTTTTGCTACCTGATCTGGAATGGTGGCTACAGGTTTAAGAGTTCTAATTCGAATTGAGCTAAGCGTTGCTACAGGCTGAATAGGAGACGATCAGCTTTACAACGTAATTGTTACGGCTCACGCTTTAATTATGTTATTTTTCTTTCTAATGCCTTTCCTTATGGGAGGATTTGGTAATACTCTTGTTCCTCTTATGATTGGAGCTCCAGACATGGCGTTCCCTCGAATGAACAACATGAGATTCTGAATGCTTCCCCCTTCTATGACACTTCTTCTAACATCTGCCCTAATTGAAAGAGGGGCAGGTACAGGATGGACTGTTTACCCTCCGCTATCAGGGATTGTATCCCATGCTGGTGGAAGGGTAGACTTGGCGATTTTTTCGTTACACCTTTCCGGTGCGTCTTCAATTTTAGGTACTGTAAATTTTCTTGCCACAGTGTTTAATATGCGAGGGCCTGGAATCACTTTCGAGCGAACCCCTCTATTTGTATGAGCTATGGTAGTTACAGTTGTTCTGTTACTTTTATCCCTTCCGGTATTTGCTGGTGGGATTACTATGCTACTTACAGATCGAAACTTCAATACTAGATTTTTCGATCCTGCTGGGGGTGGTGATCCTATTTTATTCCAGCACTTATTT"
processed_outlie = coi5p_pipe(seq_outlie)
processed_outlie
library(coil)
seq_outlie = "TATGCTTTATTTTATTTTTGCTACCTGATCTGGAATGGTGGCTACAGGTTTAAGAGTTCTAATTCGAATTGAGCTAAGCGTTGCTACAGGCTGAATAGGAGACGATCAGCTTTACAACGTAATTGTTACGGCTCACGCTTTAATTATGTTATTTTTCTTTCTAATGCCTTTCCTTATGGGAGGATTTGGTAATACTCTTGTTCCTCTTATGATTGGAGCTCCAGACATGGCGTTCCCTCGAATGAACAACATGAGATTCTGAATGCTTCCCCCTTCTATGACACTTCTTCTAACATCTGCCCTAATTGAAAGAGGGGCAGGTACAGGATGGACTGTTTACCCTCCGCTATCAGGGATTGTATCCCATGCTGGTGGAAGGGTAGACTTGGCGATTTTTTCGTTACACCTTTCCGGTGCGTCTTCAATTTTAGGTACTGTAAATTTTCTTGCCACAGTGTTTAATATGCGAGGGCCTGGAATCACTTTCGAGCGAACCCCTCTATTTGTATGAGCTATGGTAGTTACAGTTGTTCTGTTACTTTTATCCCTTCCGGTATTTGCTGGTGGGATTACTATGCTACTTACAGATCGAAACTTCAATACTAGATTTTTCGATCCTGCTGGGGGTGGTGATCCTATTTTATTCCAGCACTTATTT"
processed_outlie = coi5p_pipe(seq_outlie)
processed_outlie
#can the program pull off the folmer primers?
f_front = 'GGTCAACAAATCATAAAGATATTGG'
ex_seq = "ctctacttgatttttggtgcatgagcaggaatagttggaatagctttaagtttactaattcgcgctgaactaggtcaacccggatctcttttaggggatgatcagatttataatgtgatcgtaaccgcccatgcctttgtaataatcttttttatggttatacctgtaataattggtggctttggcaattgacttgttcctttaataattggtgcaccagatatagcattccctcgaataaataatataagtttctggcttcttcctccttcgttcttacttctcctggcctccgcaggagtagaagctggagcaggaaccggatgaactgtatatcctcctttagcaggtaatttagcacatgctggcccctctgttgatttagccatcttttcccttcatttggccggtatctcatcaattttagcctctattaattttattacaactattattaatataaaacccccaactatttctcaatatcaaacaccattatttgtttgatctattcttatcaccactgttcttctactccttgctctccctgttcttgcagccggaattacaatattattaacagaccgcaacctcaacactacattctttgaccccgcagggggaggggacccaattctctatcaacactta"
f_rear = 'TAAACTTCAGGGTGACCAAAAAATCA'
folmer_test = coi5p_pipe(paste0(f_front,ex_seq, f_rear ), name = 'primer_test')
library(coil)
f_front = 'GGTCAACAAATCATAAAGATATTGG'
ex_seq = "ctctacttgatttttggtgcatgagcaggaatagttggaatagctttaagtttactaattcgcgctgaactaggtcaacccggatctcttttaggggatgatcagatttataatgtgatcgtaaccgcccatgcctttgtaataatcttttttatggttatacctgtaataattggtggctttggcaattgacttgttcctttaataattggtgcaccagatatagcattccctcgaataaataatataagtttctggcttcttcctccttcgttcttacttctcctggcctccgcaggagtagaagctggagcaggaaccggatgaactgtatatcctcctttagcaggtaatttagcacatgctggcccctctgttgatttagccatcttttcccttcatttggccggtatctcatcaattttagcctctattaattttattacaactattattaatataaaacccccaactatttctcaatatcaaacaccattatttgtttgatctattcttatcaccactgttcttctactccttgctctccctgttcttgcagccggaattacaatattattaacagaccgcaacctcaacactacattctttgaccccgcagggggaggggacccaattctctatcaacactta"
f_rear = 'TAAACTTCAGGGTGACCAAAAAATCA'
folmer_test = coi5p_pipe(paste0(f_front,ex_seq, f_rear ), name = 'primer_test')
folmer_test
folmer_test$data$raw_start
folmer_test$raw
folmer_test$framed
folmer_test
folmer_test$raw
x =
'GGTCAACAAATCATAAAGATATTGG'
nchar(x)
folmer_test$data$raw_start
folmer_test$data$ntPath
folmer_test$data$aaPath
folmer_test
GGTCAACAAATCATAAAGATATTGG
x ='GGTCAACAAATCATAAAGATATTGG''
GGTCAACAAATCATAAAGATATTGG
'''
x ='GGTCAACAAATCATAAAGATATTGG'
nchar(x)
coi_output = lapply(example_barcode_data$sequence, function(x){
coi5p_pipe(x)
})
out_df = flatten_coi5p(coi_output)
out_df = flatten_coi5p(coi_output, keep_cols = c("name", "raw", "stop_codons"))
out_df$stop_codon
out_df
example_barcode_data
x = example_barcode_data$sequence[[6]]
x
y = coi5p_pipe(x)
y
y$data$ntPath
y$align_report
#can the program pull off the folmer primers?
f_front = 'GGTCAACAAATCATAAAGATATTGG'
ex_seq = "ctctacttgatttttggtgcatgagcaggaatagttggaatagctttaagtttactaattcgcgctgaactaggtcaacccggatctcttttaggggatgatcagatttataatgtgatcgtaaccgcccatgcctttgtaataatcttttttatggttatacctgtaataattggtggctttggcaattgacttgttcctttaataattggtgcaccagatatagcattccctcgaataaataatataagtttctggcttcttcctccttcgttcttacttctcctggcctccgcaggagtagaagctggagcaggaaccggatgaactgtatatcctcctttagcaggtaatttagcacatgctggcccctctgttgatttagccatcttttcccttcatttggccggtatctcatcaattttagcctctattaattttattacaactattattaatataaaacccccaactatttctcaatatcaaacaccattatttgtttgatctattcttatcaccactgttcttctactccttgctctccctgttcttgcagccggaattacaatattattaacagaccgcaacctcaacactacattctttgaccccgcagggggaggggacccaattctctatcaacactta"
f_rear = 'TAAACTTCAGGGTGACCAAAAAATCA'
folmer_test = coi5p_pipe(paste0(f_front,ex_seq, f_rear ), name = 'primer_test')
folmer_test$data$raw_start
folmer_test$framed
folmer_test$data$ntPath
y = example_barcode_data$sequence[[6]]
x = coi5p_pipe(y)
x$data$ntPath
path_out = x$data$ntPath
path_out
path_out[1:2]
path_out[1:2] == c(1,2)
all.equal(path_out[1:2],c(1,2))
all.equal(path_out[1:2],c(1,2))
path_out[1] = 2
path_out[2] = 1
path_out
out_df = flatten_coi5p(coi_output)
f_front = 'GGTCAACAAATCATAAAGATATTGG'
ex_seq = "ctctacttgatttttggtgcatgagcaggaatagttggaatagctttaagtttactaattcgcgctgaactaggtcaacccggatctcttttaggggatgatcagatttataatgtgatcgtaaccgcccatgcctttgtaataatcttttttatggttatacctgtaataattggtggctttggcaattgacttgttcctttaataattggtgcaccagatatagcattccctcgaataaataatataagtttctggcttcttcctccttcgttcttacttctcctggcctccgcaggagtagaagctggagcaggaaccggatgaactgtatatcctcctttagcaggtaatttagcacatgctggcccctctgttgatttagccatcttttcccttcatttggccggtatctcatcaattttagcctctattaattttattacaactattattaatataaaacccccaactatttctcaatatcaaacaccattatttgtttgatctattcttatcaccactgttcttctactccttgctctccctgttcttgcagccggaattacaatattattaacagaccgcaacctcaacactacattctttgaccccgcagggggaggggacccaattctctatcaacactta"
f_rear = 'TAAACTTCAGGGTGACCAAAAAATCA'
folmer_test = coi5p_pipe(paste0(f_front,ex_seq, f_rear ), name = 'primer_test')
#26th bp is bp 1, the 25 primer bp are removed
expect_equal(folmer_test$data$raw_start, 26)
folmer_test
x = c(1,2,3,4,5,6,7,8)
x[1:7]
long_sequence = 'aaccgctgattattttcaaccaaccacaaagatatcggcaaactttatattttatttttggagcttgagctggaatagttggaacatctttaagaattttaattcgagctgaattaggacatcctggagcattaattggagatgatcaaatttataatgtaattgtaactgcacatgcttttattataattttttttatggttatacctattataattggtggatttggaaattgattagtgcctttaatattaggtgctcctgatatagcattcccacgaataaataatataagattttgactactacctcctgctctttctttactattagtaagtagaatagttgaaaatggagctggaacaggatgaactgtttatccacctttatccgctggaattgctcatggtggagcttcagttgatttagctattttttctctacatttagcagggatttcttcaattttaggagctctaaattttattacaactgtaattaatatacgatcaacaggaatttcattagatcgtatacctttatttgtttgatcagtagttattactgctttattattgttattatcacttccagtactagcaggagctattactatattattaacagatcgaaatttaaatacatcattttttgacccagcgggaggaggagatcctattttatatcaacatttattatttattta'
long_sequence_framed = 'actttatattttatttttggagcttgagctggaatagttggaacatctttaagaattttaattcgagctgaattaggacatcctggagcattaattggagatgatcaaatttataatgtaattgtaactgcacatgcttttattataattttttttatggttatacctattataattggtggatttggaaattgattagtgcctttaatattaggtgctcctgatatagcattcccacgaataaataatataagattttgactactacctcctgctctttctttactattagtaagtagaatagttgaaaatggagctggaacaggatgaactgtttatccacctttatccgctggaattgctcatggtggagcttcagttgatttagctattttttctctacatttagcagggatttcttcaattttaggagctctaaattttattacaactgtaattaatatacgatcaacaggaatttcattagatcgtatacctttatttgtttgatcagtagttattactgctttattattgttattatcacttccagtactagcaggagctattactatattattaacagatcgaaatttaaatacatcattttttgacccagcgggaggaggagatcctattttatatcaacattta'
#expected:
#42 leading bp trimmed and there is a leading dash
x_long = 42
y_long = 1
long_desc = paste0("Base pair ", x_long, " of the raw sequence is base pair ", y_long, " of the COI-5P region.")
long_desc
library(coil)
#expected:
#42 leading bp trimmed and there is a leading dash
x_long = 42
y_long = 1
long_desc = paste0("Base pair ", x_long, " of the raw sequence is base pair ", y_long, " of the COI-5P region.")
long_desc
long_dat = coi5p(long_sequence, name = 'long_frame_report')
long_dat = frame(long_dat)
long_dat
long_dat$data$ntPath
long_sequence = 'aaccgctgattattttcaaccaaccacaaagatatcggcaaactttatattttatttttggagcttgagctggaatagttggaacatctttaagaattttaattcgagctgaattaggacatcctggagcattaattggagatgatcaaatttataatgtaattgtaactgcacatgcttttattataattttttttatggttatacctattataattggtggatttggaaattgattagtgcctttaatattaggtgctcctgatatagcattcccacgaataaataatataagattttgactactacctcctgctctttctttactattagtaagtagaatagttgaaaatggagctggaacaggatgaactgtttatccacctttatccgctggaattgctcatggtggagcttcagttgatttagctattttttctctacatttagcagggatttcttcaattttaggagctctaaattttattacaactgtaattaatatacgatcaacaggaatttcattagatcgtatacctttatttgtttgatcagtagttattactgctttattattgttattatcacttccagtactagcaggagctattactatattattaacagatcgaaatttaaatacatcattttttgacccagcgggaggaggagatcctattttatatcaacatttattatttattta'
long_dat = coi5p(long_sequence, name = 'long_frame_report')
long_dat = frame(long_dat)
long_dat
library(coil)
library(coil)
long_sequence = 'aaccgctgattattttcaaccaaccacaaagatatcggcaaactttatattttatttttggagcttgagctggaatagttggaacatctttaagaattttaattcgagctgaattaggacatcctggagcattaattggagatgatcaaatttataatgtaattgtaactgcacatgcttttattataattttttttatggttatacctattataattggtggatttggaaattgattagtgcctttaatattaggtgctcctgatatagcattcccacgaataaataatataagattttgactactacctcctgctctttctttactattagtaagtagaatagttgaaaatggagctggaacaggatgaactgtttatccacctttatccgctggaattgctcatggtggagcttcagttgatttagctattttttctctacatttagcagggatttcttcaattttaggagctctaaattttattacaactgtaattaatatacgatcaacaggaatttcattagatcgtatacctttatttgtttgatcagtagttattactgctttattattgttattatcacttccagtactagcaggagctattactatattattaacagatcgaaatttaaatacatcattttttgacccagcgggaggaggagatcctattttatatcaacatttattatttattta'
long_sequence_framed = '-ctttatattttatttttggagcttgagctggaatagttggaacatctttaagaattttaattcgagctgaattaggacatcctggagcattaattggagatgatcaaatttataatgtaattgtaactgcacatgcttttattataattttttttatggttatacctattataattggtggatttggaaattgattagtgcctttaatattaggtgctcctgatatagcattcccacgaataaataatataagattttgactactacctcctgctctttctttactattagtaagtagaatagttgaaaatggagctggaacaggatgaactgtttatccacctttatccgctggaattgctcatggtggagcttcagttgatttagctattttttctctacatttagcagggatttcttcaattttaggagctctaaattttattacaactgtaattaatatacgatcaacaggaatttcattagatcgtatacctttatttgtttgatcagtagttattactgctttattattgttattatcacttccagtactagcaggagctattactatattattaacagatcgaaatttaaatacatcattttttgacccagcgggaggaggagatcctattttatatcaacattta'
#expected:
#42 leading bp trimmed and there is a leading dash
x_long = 42
y_long = 1
long_desc = paste0("Base pair ", x_long, " of the raw sequence is base pair ", y_long, " of the COI-5P region.")
long_dat = coi5p(long_sequence, name = 'long_frame_report')
long_dat = frame(long_dat)
long_dat$data$raw_int_trim
long_dat$data$raw_start
long_dat$data$folmer_start
long_dat$align_report
long_dat$was_trimmed
long_dat$framed
library(coil)
f_front = 'GGTCAACAAATCATAAAGATATTGG'
ex_seq = "ctctacttgatttttggtgcatgagcaggaatagttggaatagctttaagtttactaattcgcgctgaactaggtcaacccggatctcttttaggggatgatcagatttataatgtgatcgtaaccgcccatgcctttgtaataatcttttttatggttatacctgtaataattggtggctttggcaattgacttgttcctttaataattggtgcaccagatatagcattccctcgaataaataatataagtttctggcttcttcctccttcgttcttacttctcctggcctccgcaggagtagaagctggagcaggaaccggatgaactgtatatcctcctttagcaggtaatttagcacatgctggcccctctgttgatttagccatcttttcccttcatttggccggtatctcatcaattttagcctctattaattttattacaactattattaatataaaacccccaactatttctcaatatcaaacaccattatttgtttgatctattcttatcaccactgttcttctactccttgctctccctgttcttgcagccggaattacaatattattaacagaccgcaacctcaacactacattctttgaccccgcagggggaggggacccaattctctatcaacactta"
f_rear = 'TAAACTTCAGGGTGACCAAAAAATCA'
y = paste0(f_front,ex_seq, f_rear )
x = coi5p(y)
x
ntBin = individual_DNAbin(x$raw)
ntBin
ntPHMMout = aphid::Viterbi(nt_PHMM, ntBin, odds = FALSE)
ntPHMMout
ntPHMMout[['path']]
ntPHMMout = aphid::Viterbi(nt_PHMM, ntBin, odds = FALSE)
nt_PHMM = nt_coi_PHMM
ntPHMMout = aphid::Viterbi(nt_PHMM, ntBin, odds = FALSE)
ntPHMMout[['path']]
#install.packages('coil')
library(coil)
output = coi5p_pipe(example_nt_string)
output
library(coil)
library(devtools)
devtools::install_github('CNuge/coil')
sequence = 'ctttacctgatttttggtgcatgagcaggtatagttggaacagccctaagtctcctaattcgagctgaacttgggcaacctggatcacttttaggagatgatcagatttataatgtaatcgtaaccgcccacgcttttgtaataatctttttcatggttataccaattataattggtggtttcggaaattgattagttcctttaataattggagcgccagatatagccttcccacgaataaataacataagtttctgacttcttccaccatcatttcttcttctcctcgcctctgctggagtagaagctggagcaggtactggttgaacagtttatcctccattagctagcaatctagcacatgctggaccatctgttgatttagctattttttctcttcacttagccggtgtttcatcaattttagcttcaattaattttatcacaaccattattaatataaaaccaccagctatttcccaatatcaaacaccattatttgtttgatctattcttgtaaccactattcttcttctcctctcacttccagttcttgcagcaggaattacaatattacttacagatcgtaaccttaatactacattctttgaccctgcaggtggaggagacccaatcctttatcaacattta'
sequence_framed = '---ctttacctgatttttggtgcatgagcaggtatagttggaacagccctaagtctcctaattcgagctgaacttgggcaacctggatcacttttaggagatgatcagatttataatgtaatcgtaaccgcccacgcttttgtaataatctttttcatggttataccaattataattggtggtttcggaaattgattagttcctttaataattggagcgccagatatagccttcccacgaataaataacataagtttctgacttcttccaccatcatttcttcttctcctcgcctctgctggagtagaagctggagcaggtactggttgaacagtttatcctccattagctagcaatctagcacatgctggaccatctgttgatttagctattttttctcttcacttagccggtgtttcatcaattttagcttcaattaattttatcacaaccattattaatataaaaccaccagctatttcccaatatcaaacaccattatttgtttgatctattcttgtaaccactattcttcttctcctctcacttccagttcttgcagcaggaattacaatattacttacagatcgtaaccttaatactacattctttgaccctgcaggtggaggagacccaatcctttatcaacattta'
seqname = 'test_seq1'
sequence_AAcensored = "-LYLIFGAWAG?VGTALSLLIRAELGQPGSLLGDDQIYNVIVTAHAFV?IFFMV?PI?IGGFGNWLVPL?IGAPD?AFPR?NN?SFWLLPPSFLLLLASAGVEAGAGTGWTVYPPLASNLAHAGPSVDLAIFSLHLAGVSSILASINFITTIIN??PPAISQYQTPLFVWSILVTTILLLLSLPVLAAGIT?LLTDRNLNTTFFDPAGGGDPILYQHL"
sequence_AA5 ="-LYLIFGAWAGMVGTALSLLIRAELGQPGSLLGDDQIYNVIVTAHAFVMIFFMVMPIMIGGFGNWLVPLMIGAPDMAFPRMNNMSFWLLPPSFLLLLASAGVEAGAGTGWTVYPPLASNLAHAGPSVDLAIFSLHLAGVSSILASINFITTIINMKPPAISQYQTPLFVWSILVTTILLLLSLPVLAAGITMLLTDRNLNTTFFDPAGGGDPILYQHL"
dat = coi5p(sequence, name = seqname)
expect_equal(dat$raw, sequence)
expect_equal(dat$name, seqname)
library(testthat)
library coil
library(coil)
sequence = 'ctttacctgatttttggtgcatgagcaggtatagttggaacagccctaagtctcctaattcgagctgaacttgggcaacctggatcacttttaggagatgatcagatttataatgtaatcgtaaccgcccacgcttttgtaataatctttttcatggttataccaattataattggtggtttcggaaattgattagttcctttaataattggagcgccagatatagccttcccacgaataaataacataagtttctgacttcttccaccatcatttcttcttctcctcgcctctgctggagtagaagctggagcaggtactggttgaacagtttatcctccattagctagcaatctagcacatgctggaccatctgttgatttagctattttttctcttcacttagccggtgtttcatcaattttagcttcaattaattttatcacaaccattattaatataaaaccaccagctatttcccaatatcaaacaccattatttgtttgatctattcttgtaaccactattcttcttctcctctcacttccagttcttgcagcaggaattacaatattacttacagatcgtaaccttaatactacattctttgaccctgcaggtggaggagacccaatcctttatcaacattta'
sequence_framed = '---ctttacctgatttttggtgcatgagcaggtatagttggaacagccctaagtctcctaattcgagctgaacttgggcaacctggatcacttttaggagatgatcagatttataatgtaatcgtaaccgcccacgcttttgtaataatctttttcatggttataccaattataattggtggtttcggaaattgattagttcctttaataattggagcgccagatatagccttcccacgaataaataacataagtttctgacttcttccaccatcatttcttcttctcctcgcctctgctggagtagaagctggagcaggtactggttgaacagtttatcctccattagctagcaatctagcacatgctggaccatctgttgatttagctattttttctcttcacttagccggtgtttcatcaattttagcttcaattaattttatcacaaccattattaatataaaaccaccagctatttcccaatatcaaacaccattatttgtttgatctattcttgtaaccactattcttcttctcctctcacttccagttcttgcagcaggaattacaatattacttacagatcgtaaccttaatactacattctttgaccctgcaggtggaggagacccaatcctttatcaacattta'
seqname = 'test_seq1'
sequence_AAcensored = "-LYLIFGAWAG?VGTALSLLIRAELGQPGSLLGDDQIYNVIVTAHAFV?IFFMV?PI?IGGFGNWLVPL?IGAPD?AFPR?NN?SFWLLPPSFLLLLASAGVEAGAGTGWTVYPPLASNLAHAGPSVDLAIFSLHLAGVSSILASINFITTIIN??PPAISQYQTPLFVWSILVTTILLLLSLPVLAAGIT?LLTDRNLNTTFFDPAGGGDPILYQHL"
sequence_AA5 ="-LYLIFGAWAGMVGTALSLLIRAELGQPGSLLGDDQIYNVIVTAHAFVMIFFMVMPIMIGGFGNWLVPLMIGAPDMAFPRMNNMSFWLLPPSFLLLLASAGVEAGAGTGWTVYPPLASNLAHAGPSVDLAIFSLHLAGVSSILASINFITTIINMKPPAISQYQTPLFVWSILVTTILLLLSLPVLAAGITMLLTDRNLNTTFFDPAGGGDPILYQHL"
dat = coi5p(sequence, name = seqname)
expect_equal(dat$raw, sequence)
expect_equal(dat$name, seqname)
coi_output = lapply(example_barcode_data$sequence, function(x){
coi5p_pipe(x)
})
out_df = flatten_coi5p(coi_output)
out_df = flatten_coi5p(coi_output, keep_cols = c("name", "raw", "stop_codons"))
out_df$stop_codons
library(coil)
sequence = 'ctttacctgatttttggtgcatgagcaggtatagttggaacagccctaagtctcctaattcgagctgaacttgggcaacctggatcacttttaggagatgatcagatttataatgtaatcgtaaccgcccacgcttttgtaataatctttttcatggttataccaattataattggtggtttcggaaattgattagttcctttaataattggagcgccagatatagccttcccacgaataaataacataagtttctgacttcttccaccatcatttcttcttctcctcgcctctgctggagtagaagctggagcaggtactggttgaacagtttatcctccattagctagcaatctagcacatgctggaccatctgttgatttagctattttttctcttcacttagccggtgtttcatcaattttagcttcaattaattttatcacaaccattattaatataaaaccaccagctatttcccaatatcaaacaccattatttgtttgatctattcttgtaaccactattcttcttctcctctcacttccagttcttgcagcaggaattacaatattacttacagatcgtaaccttaatactacattctttgaccctgcaggtggaggagacccaatcctttatcaacattta'
sequence_framed = '---ctttacctgatttttggtgcatgagcaggtatagttggaacagccctaagtctcctaattcgagctgaacttgggcaacctggatcacttttaggagatgatcagatttataatgtaatcgtaaccgcccacgcttttgtaataatctttttcatggttataccaattataattggtggtttcggaaattgattagttcctttaataattggagcgccagatatagccttcccacgaataaataacataagtttctgacttcttccaccatcatttcttcttctcctcgcctctgctggagtagaagctggagcaggtactggttgaacagtttatcctccattagctagcaatctagcacatgctggaccatctgttgatttagctattttttctcttcacttagccggtgtttcatcaattttagcttcaattaattttatcacaaccattattaatataaaaccaccagctatttcccaatatcaaacaccattatttgtttgatctattcttgtaaccactattcttcttctcctctcacttccagttcttgcagcaggaattacaatattacttacagatcgtaaccttaatactacattctttgaccctgcaggtggaggagacccaatcctttatcaacattta'
seqname = 'test_seq1'
sequence_AAcensored = "-LYLIFGAWAG?VGTALSLLIRAELGQPGSLLGDDQIYNVIVTAHAFV?IFFMV?PI?IGGFGNWLVPL?IGAPD?AFPR?NN?SFWLLPPSFLLLLASAGVEAGAGTGWTVYPPLASNLAHAGPSVDLAIFSLHLAGVSSILASINFITTIIN??PPAISQYQTPLFVWSILVTTILLLLSLPVLAAGIT?LLTDRNLNTTFFDPAGGGDPILYQHL"
sequence_AA5 ="-LYLIFGAWAGMVGTALSLLIRAELGQPGSLLGDDQIYNVIVTAHAFVMIFFMVMPIMIGGFGNWLVPLMIGAPDMAFPRMNNMSFWLLPPSFLLLLASAGVEAGAGTGWTVYPPLASNLAHAGPSVDLAIFSLHLAGVSSILASINFITTIINMKPPAISQYQTPLFVWSILVTTILLLLSLPVLAAGITMLLTDRNLNTTFFDPAGGGDPILYQHL"
dat = coi5p(sequence, name = seqname)
expect_equal(dat$raw, sequence)
expect_equal(dat$name, seqname)
data_full = coi5p_pipe(sequence, name = seqname, trans_table = 5)
expect_equal(data_full$raw, sequence)
expect_equal(data_full$name, seqname)
expect_equal(data_full$framed, sequence_framed)
expect_equal(data_full$aaSeq, sequence_AA5)
expect_equal(data_full$indel_likely, FALSE)
expect_equal(data_full$stop_codons, FALSE)
coi_output = lapply(example_barcode_data$sequence, function(x){
coi5p_pipe(x)
})
out_df = flatten_coi5p(coi_output)
out_df = flatten_coi5p(coi_output, keep_cols = c("name", "raw", "stop_codons"))
expect_equal(out_df$stop_codons, c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE))
out_df$raw
out_df$stop_codons
out_df
example_barcode_data
out_df$raw[[6]]
out_df$framed[[6]]
names(out_df)
out_df
out_df = flatten_coi5p(coi_output)
out_df