-
Notifications
You must be signed in to change notification settings - Fork 25
/
primers_interpreter.rb
113 lines (95 loc) · 3.1 KB
/
primers_interpreter.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
module BatchLoad
class Import::Sequences::PrimersInterpreter < BatchLoad::Import
# @param [Hash] args
def initialize(**args)
@sequences = {}
super(args)
end
# TODO: update this
# @return [Integer]
def build_sequences
@total_data_lines = 0;
sequences = {}
sequence_values = {}
i = 0
# loop through rows
csv.each do |row|
i += 1
parse_result = BatchLoad::RowParse.new
parse_result.objects[:sequence] = []
@processed_rows[i] = parse_result
begin # processing
# Check for duplicates of names of sequences, ignore them
# Check for duplicates of actual sequence, duplicates become alternative name
# Official names are first ones encounter
# ? in actual sequence become ‘N’
# Ignore ‘;’ at the end of actual sequence
name = row['name']
gene_name = row['gene_name']
type = row['type']
sequence = row['sequence'] || ''
# Replace '?' with 'N' and remove ';' from sequence
sequence.gsub!(/\?/, 'N')
sequence.gsub!(/;/, '')
if sequences.key?(name) || sequence.blank?
next
elsif sequence_values.key?(sequence)
official_name = sequence_values[sequence]
sequences[official_name][:alternate_names].push(name)
else
sequence_values[sequence] = name
sequences[name] = {
official_name: name,
alternate_names: [],
type: type,
gene_name: gene_name,
sequence: sequence,
index: i
}
end
@total_data_lines += 1
#rescue
end
end
@total_lines = i
sequences.each_value do |sequence_obj|
# Sequence attributes
sequence_attributes = {
name: sequence_obj[:official_name],
sequence_type: 'DNA',
sequence: sequence_obj[:sequence],
alternate_values_attributes: [],
data_attributes_attributes: []
}
# AlternateValues attributes
sequence_obj[:alternate_names].each do |alternate_name|
sequence_attributes[:alternate_values_attributes].push({
type: 'AlternateValue::AlternateSpelling',
alternate_value_object_attribute: 'name',
value: alternate_name
})
end
# DataAttributes attributes
sequence_attributes[:data_attributes_attributes].push({
type: 'ImportAttribute',
import_predicate: 'GeneName',
value: sequence_obj[:gene_name]
})
sequence_attributes[:data_attributes_attributes].push({
type: 'ImportAttribute',
import_predicate: 'Type',
value: sequence_obj[:type]
})
parse_result = @processed_rows[sequence_obj[:index]]
parse_result.objects[:sequence].push(Sequence.new(sequence_attributes))
end
end
# @return [Boolean]
def build
if valid?
build_sequences
@processed = true
end
end
end
end