-
Notifications
You must be signed in to change notification settings - Fork 0
/
DataTable.tcl
194 lines (165 loc) · 5.31 KB
/
DataTable.tcl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# spinout
# Copyright 2020 Mark Hubbard, a.k.a. "TheMarkitecht"
# http://www.TheMarkitecht.com
#
# Project home: http://github.com/TheMarkitecht/spinout
# spinout is a superb pinout creation, maintenance, and conversion tool
# for FPGA developers.
#
# This file is part of spinout.
#
# spinout is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# spinout is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with spinout. If not, see <https://www.gnu.org/licenses/>.
package provide DataTable 1.0
###### classes modeling data files ####################
# colms maps name to dataColm object.
# colmOrder lists the names in the order they appear in the file.
# both methods of finding a column are required.
class DataTable {
r colms {}
r colmOrder {}
r rows {}
}
DataTable method fromColmNames {colmNames} {
foreach name $colmNames {
set c [DataColm new set name $name idx [llength $colmOrder]]
lappend colmOrder $c
set colms($name) $c
}
}
DataTable method colmExists {colmName} {
dict exists $colms $colmName
}
DataTable method colmByName {colmName} {
return $colms($colmName)
}
DataTable method colmByIdx {colmIdx} {
return [lindex $colmOrder $colmIdx]
}
DataTable method addRow {row} {
$row setTable $self
lappend rows $row
}
class DataColm {
r name {}
r idx 0
}
class DataRow {
r table {}
r vList {}
r vDic {}
}
DataRow method fromValueList {tbl valueList} {
set table $tbl
set vList $valueList
}
DataRow method setTable {tbl} {
set table $tbl
}
DataRow method updateDic {row} {
# map a list of values into a dictionary keyed by column headers. memorize it in vDic.
# this costs extra time on thousands of rows, so don't do it if not needed.
set vDic [dict create]
foreach v $vList h [$file colmOrder] {
dict set vDic $h $v
}
}
DataRow method byIdx {colmIdx} {
# return the value in the given column number.
return [lindex $vList $colmIdx]
}
DataRow method byName {colmName} {
# return the value in the given column name.
# throws an error if the column doesn't exist.
# this is done without vDic, so vDic doesn't have to be built if it's not needed.
return [lindex $vList [[$table colmByName $colmName] idx]]
}
DataRow method byName? {colmName} {
# return the value in the given column name, or an empty string if the column doesn't exist.
if { ! [dict exists [$table colms] $colmName]} {return {}}
return [lindex $vList [[$table colmByName $colmName] idx]]
}
# CsvFile is a specialization (subclass) of DataTable.
class CsvFile DataTable {
p fn {}
}
# load a csvFile object graph into memory from
# an ordinary .CSV disk file (comma-separated values).
CsvFile method fromFile {csvFn} {
set fn $csvFn
set f [open $fn r]
set raw [string map [list \r {}] [read $f]]
close $f
set dataLines [lassign [split $raw \n ] headerLine]
unset raw
# parse header line into csvColm objects and an indexing array.
set headers [[CsvRow new fromLine $headerLine] vList]
# note that if the file was exported from Notion, it might contain nonprintable characters,
# especially at the start of the file. those can prevent a naive script from recognizing the header row.
# here shave off characters to prevent that problem.
lassign $headers hdr0
if {[string range $hdr0 end-5 end] eq {Signal} } {
set headers [lreplace $headers 0 0 Signal]
}
set idx -1
foreach h $headers {
set h [string trim $h]
set colm [DataColm new set name $h idx [incr idx]]
lappend colmOrder $colm
set colms($h) $colm
}
# parse data rows into objects.
foreach ln $dataLines {
set row [CsvRow new fromLine $ln]
# skip blank rows.
if {[$row byIdx 0] eq {} } continue
$self addRow $row
}
}
CsvFile method save {csvFn} {
set f [open $csvFn w]
set cNames [lmap c $colmOrder {$c name}]
puts $f [join [lmap n $cNames {CsvRow quoteForFile $n}] , ]
foreach row $rows {
puts $f [$row toFile]
}
close $f
}
class CsvRow DataRow {
}
# split a raw line of CSV text into a row of data values.
CsvRow method fromLine {rawTextLine} {
# remove surrounding quotes due to embedded commas.
foreach {match bare delim1 quoted delim2 delim3} [regexp -all -inline $::CsvRow::itemRe $rawTextLine] {
if {$quoted ne {}} {
lappend vList [string trim $quoted]
} else {
lappend vList [string trim $bare]
}
}
}
set ::CsvRow::itemRe [string map [list { } {} \n {}] {
([^",]+?) (,|$) |
["] ([^"]+?) ["] (,|$) |
(,|$)
}]
set ::CsvRow::oneWordRe {^[a-zA-Z0-9_]*$}
CsvRow classProc quoteForFile {dataValue} {
if {[regexp $::CsvRow::oneWordRe $dataValue]} {
return $dataValue
}
return \"${dataValue}\"
}
CsvRow method toFile {} {
join [lmap v $vList {CsvRow quoteForFile $v}] ,
}