-
-
Notifications
You must be signed in to change notification settings - Fork 31
/
CSVImporter.swift
366 lines (311 loc) · 16 KB
/
CSVImporter.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
//
// CSVImporter.swift
// CSVImporter
//
// Created by Cihat Gündüz on 13.01.16.
// Copyright © 2016 Flinesoft. All rights reserved.
//
import Foundation
import HandySwift
/// An enum to represent the possible line endings of CSV files.
public enum LineEnding: String {
case newLine = "\n"
case carriageReturn = "\r"
case carriageReturnLineFeed = "\r\n"
case unknown = ""
}
/// Importer for CSV files that maps your lines to a specified data structure.
public class CSVImporter<T> {
// MARK: - Stored Instance Properties
let source: Source
let delimiter: String
var lastProgressReport: Date?
var progressClosure: ((_ importedDataLinesCount: Int) -> Void)?
var finishClosure: ((_ importedRecords: [T]) -> Void)?
var failClosure: (() -> Void)?
let workQosClass: DispatchQoS.QoSClass
let callbacksQosClass: DispatchQoS.QoSClass?
// MARK: - Computed Instance Properties
var shouldReportProgress: Bool {
return self.progressClosure != nil && (self.lastProgressReport == nil || Date().timeIntervalSince(self.lastProgressReport!) > 0.1)
}
var workDispatchQueue: DispatchQueue {
return DispatchQueue.global(qos: workQosClass)
}
var callbacksDispatchQueue: DispatchQueue {
guard let callbacksQosClass = callbacksQosClass else { return DispatchQueue.main }
return DispatchQueue.global(qos: callbacksQosClass)
}
// MARK: - Initializers
/// Internal initializer to prevent duplicate code.
private init(source: Source, delimiter: String, workQosClass: DispatchQoS.QoSClass, callbacksQosClass: DispatchQoS.QoSClass?) {
self.source = source
self.delimiter = delimiter
self.workQosClass = workQosClass
self.callbacksQosClass = callbacksQosClass
delimiterQuoteDelimiter = "\(delimiter)\"\"\(delimiter)"
delimiterDelimiter = delimiter + delimiter
quoteDelimiter = "\"\"\(delimiter)"
delimiterQuote = "\(delimiter)\"\""
}
/// Creates a `CSVImporter` object with required configuration options.
///
/// - Parameters:
/// - path: The path to the CSV file to import.
/// - delimiter: The delimiter used within the CSV file for separating fields. Defaults to ",".
/// - lineEnding: The lineEnding used in the file. If not specified will be determined automatically.
/// - encoding: The encoding the file is read with. Defaults to `.utf8`.
/// - workQosClass: The QOS class of the background queue to run the heavy work in. Defaults to `.utility`.
/// - callbacksQosClass: The QOS class of the background queue to run the callbacks in or `nil` for the main queue. Defaults to `nil`.
public convenience init(path: String, delimiter: String = ",", lineEnding: LineEnding = .unknown, encoding: String.Encoding = .utf8,
workQosClass: DispatchQoS.QoSClass = .utility, callbacksQosClass: DispatchQoS.QoSClass? = nil) {
let textFile = TextFile(path: path, encoding: encoding)
let fileSource = FileSource(textFile: textFile, encoding: encoding, lineEnding: lineEnding)
self.init(source: fileSource, delimiter: delimiter, workQosClass: workQosClass, callbacksQosClass: callbacksQosClass)
}
/// Creates a `CSVImporter` object with required configuration options.
///
/// - Parameters:
/// - url: File URL for the CSV file to import.
/// - delimiter: The delimiter used within the CSV file for separating fields. Defaults to ",".
/// - lineEnding: The lineEnding used in the file. If not specified will be determined automatically.
/// - encoding: The encoding the file is read with. Defaults to `.utf8`.
/// - workQosClass: The QOS class of the background queue to run the heavy work in. Defaults to `.utility`.
/// - callbacksQosClass: The QOS class of the background queue to run the callbacks in or `nil` for the main queue. Defaults to `nil`.
public convenience init?(url: URL, delimiter: String = ",", lineEnding: LineEnding = .unknown, encoding: String.Encoding = .utf8,
workQosClass: DispatchQoS.QoSClass = .utility, callbacksQosClass: DispatchQoS.QoSClass? = nil) {
guard url.isFileURL else { return nil }
self.init(path: url.path, delimiter: delimiter, lineEnding: lineEnding, encoding: encoding, workQosClass: workQosClass, callbacksQosClass: callbacksQosClass)
}
/// Creates a `CSVImporter` object with required configuration options.
///
/// NOTE: This initializer doesn't save any memory as the given String is already loaded into memory.
/// Don't use this if you are working with a large file which you could refer to with a path also.
///
/// - Parameters:
/// - contentString: The string which contains the content of a CSV file.
/// - delimiter: The delimiter used within the CSV file for separating fields. Defaults to ",".
/// - lineEnding: The lineEnding used in the file. If not specified will be determined automatically.
/// - workQosClass: The QOS class of the background queue to run the heavy work in. Defaults to `.utility`.
/// - callbacksQosClass: The QOS class of the background queue to run the callbacks in or `nil` for the main queue. Defaults to `nil`.
public convenience init(contentString: String, delimiter: String = ",", lineEnding: LineEnding = .unknown,
workQosClass: DispatchQoS.QoSClass = .utility, callbacksQosClass: DispatchQoS.QoSClass? = nil) {
let stringSource = StringSource(contentString: contentString, lineEnding: lineEnding)
self.init(source: stringSource, delimiter: delimiter, workQosClass: workQosClass, callbacksQosClass: callbacksQosClass)
}
// MARK: - Instance Methods
/// Starts importing the records within the CSV file line by line.
///
/// - Parameters:
/// - mapper: A closure to map the data received in a line to your data structure.
/// - Returns: `self` to enable consecutive method calls (e.g. `importer.startImportingRecords {...}.onProgress {...}`).
public func startImportingRecords(mapper closure: @escaping (_ recordValues: [String]) -> T) -> Self {
workDispatchQueue.async {
var importedRecords = [T]()
let importedLinesWithSuccess = self.importLines { valuesInLine in
let newRecord = closure(valuesInLine)
importedRecords.append(newRecord)
self.reportProgressIfNeeded(importedRecords)
}
if importedLinesWithSuccess {
self.reportFinish(importedRecords)
} else {
self.reportFail()
}
}
return self
}
/// Starts importing the records within the CSV file line by line interpreting the first line as the data structure.
///
/// - Parameters:
/// - structure: A closure for doing something with the found structure within the first line of the CSV file.
/// - recordMapper: A closure to map the dictionary data interpreted from a line to your data structure.
/// - Returns: `self` to enable consecutive method calls (e.g. `importer.startImportingRecords {...}.onProgress {...}`).
public func startImportingRecords(structure structureClosure: @escaping (_ headerValues: [String]) -> Void,
recordMapper closure: @escaping (_ recordValues: [String: String]) -> T) -> Self {
workDispatchQueue.async {
var recordStructure: [String]?
var importedRecords = [T]()
let importedLinesWithSuccess = self.importLines { valuesInLine in
if recordStructure == nil {
recordStructure = valuesInLine
structureClosure(valuesInLine)
} else {
if let structuredValuesInLine = [String: String](keys: recordStructure!, values: valuesInLine) {
let newRecord = closure(structuredValuesInLine)
importedRecords.append(newRecord)
self.reportProgressIfNeeded(importedRecords)
} else {
print("Warning: Couldn't structurize line.")
}
}
}
if importedLinesWithSuccess {
self.reportFinish(importedRecords)
} else {
self.reportFail()
}
}
return self
}
/// Synchronously imports all records and provides the end result only.
///
/// Use the `startImportingRecords` method for an asynchronous import with progress, fail and finish callbacks.
///
/// - Parameters:
/// - mapper: A closure to map the data received in a line to your data structure.
/// - Returns: The imported records array.
public func importRecords(mapper closure: @escaping (_ recordValues: [String]) -> T) -> [T] {
var importedRecords = [T]()
_ = self.importLines { valuesInLine in
let newRecord = closure(valuesInLine)
importedRecords.append(newRecord)
}
return importedRecords
}
/// Synchronously imports all records and provides the end result only.
///
/// Use the `startImportingRecords` method for an asynchronous import with progress, fail and finish callbacks.
///
/// - structure: A closure for doing something with the found structure within the first line of the CSV file.
/// - recordMapper: A closure to map the dictionary data interpreted from a line to your data structure.
/// - Returns: The imported records array.
public func importRecords(structure structureClosure: @escaping (_ headerValues: [String]) -> Void,
recordMapper closure: @escaping (_ recordValues: [String: String]) -> T) -> [T] {
var recordStructure: [String]?
var importedRecords = [T]()
_ = self.importLines { valuesInLine in
if recordStructure == nil {
recordStructure = valuesInLine
structureClosure(valuesInLine)
} else {
if let structuredValuesInLine = [String: String](keys: recordStructure!, values: valuesInLine) {
let newRecord = closure(structuredValuesInLine)
importedRecords.append(newRecord)
} else {
print("CSVImporter – Warning: Couldn't structurize line.")
}
}
}
return importedRecords
}
/// Imports all lines one by one and
///
/// - Parameters:
/// - valuesInLine: The values found within a line.
/// - Returns: `true` on finish or `false` if can't read file.
func importLines(_ closure: (_ valuesInLine: [String]) -> Void) -> Bool {
var anyLine = false
source.forEach { line in
anyLine = true
autoreleasepool {
let valuesInLine = readValuesInLine(line)
closure(valuesInLine)
}
}
return anyLine
}
// Various private constants used for reading lines
private let startPartRegex = try! NSRegularExpression(pattern: "\\A\"[^\"]*\\z", options: .caseInsensitive) // swiftlint:disable:this force_try
private let middlePartRegex = try! NSRegularExpression(pattern: "\\A[^\"]*\\z", options: .caseInsensitive) // swiftlint:disable:this force_try
private let endPartRegex = try! NSRegularExpression(pattern: "\\A[^\"]*\"\\z", options: .caseInsensitive) // swiftlint:disable:this force_try
private let substitute = "\u{001a}"
private let delimiterQuoteDelimiter: String
private let delimiterDelimiter: String
private let quoteDelimiter: String
private let delimiterQuote: String
/// Reads the line and returns the fields found. Handles double quotes according to RFC 4180.
///
/// - Parameters:
/// - line: The line to read values from.
/// - Returns: An array of values found in line.
func readValuesInLine(_ line: String) -> [String] {
var correctedLine = line.replacingOccurrences(of: delimiterQuoteDelimiter, with: delimiterDelimiter)
if correctedLine.hasPrefix(quoteDelimiter) {
correctedLine = String(correctedLine.suffix(from: correctedLine.index(correctedLine.startIndex, offsetBy: 2)))
}
if correctedLine.hasSuffix(delimiterQuote) {
correctedLine = String(correctedLine.prefix(upTo: correctedLine.index(correctedLine.startIndex, offsetBy: correctedLine.utf16.count - 2)))
}
correctedLine = correctedLine.replacingOccurrences(of: "\"\"", with: substitute)
var components = correctedLine.components(separatedBy: delimiter)
var index = 0
while index < components.count {
let element = components[index]
if index < components.count - 1 && startPartRegex.firstMatch(in: element, options: .anchored, range: element.fullRange) != nil {
var elementsToMerge = [element]
while middlePartRegex.firstMatch(in: components[index + 1], options: .anchored, range: components[index + 1].fullRange) != nil {
elementsToMerge.append(components[index + 1])
components.remove(at: index + 1)
}
if endPartRegex.firstMatch(in: components[index + 1], options: .anchored, range: components[index + 1].fullRange) != nil {
elementsToMerge.append(components[index + 1])
components.remove(at: index + 1)
components[index] = elementsToMerge.joined(separator: delimiter)
} else {
print("Invalid CSV format in line, opening \" must be closed – line: \(line).")
}
}
index += 1
}
components = components.map { $0.replacingOccurrences(of: "\"", with: "") }
components = components.map { $0.replacingOccurrences(of: substitute, with: "\"") }
return components
}
/// Defines callback to be called in case reading the CSV file fails.
///
/// - Parameters:
/// - closure: The closure to be called on failure.
/// - Returns: `self` to enable consecutive method calls (e.g. `importer.startImportingRecords {...}.onProgress {...}`).
public func onFail(_ closure: @escaping () -> Void) -> Self {
self.failClosure = closure
return self
}
/// Defines callback to be called from time to time.
/// Use this to indicate progress to a user when importing bigger files.
///
/// - Parameters:
/// - closure: The closure to be called on progress. Takes the current count of imported lines as argument.
/// - Returns: `self` to enable consecutive method calls (e.g. `importer.startImportingRecords {...}.onProgress {...}`).
public func onProgress(_ closure: @escaping (_ importedDataLinesCount: Int) -> Void) -> Self {
self.progressClosure = closure
return self
}
/// Defines callback to be called when the import finishes.
///
/// - Parameters:
/// - closure: The closure to be called on finish. Takes the array of all imported records mapped to as its argument.
public func onFinish(_ closure: @escaping (_ importedRecords: [T]) -> Void) {
self.finishClosure = closure
}
func reportFail() {
if let failClosure = self.failClosure {
callbacksDispatchQueue.async {
failClosure()
}
}
}
func reportProgressIfNeeded(_ importedRecords: [T]) {
if self.shouldReportProgress {
self.lastProgressReport = Date()
if let progressClosure = self.progressClosure {
callbacksDispatchQueue.async {
progressClosure(importedRecords.count)
}
}
}
}
func reportFinish(_ importedRecords: [T]) {
if let finishClosure = self.finishClosure {
callbacksDispatchQueue.async {
finishClosure(importedRecords)
}
}
}
}
// MARK: - Helpers
extension String {
var fullRange: NSRange {
return NSRange(location: 0, length: self.utf16.count)
}
}