diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json
index 2f1f6c53..a335ae8d 100644
--- a/dev/.documenter-siteinfo.json
+++ b/dev/.documenter-siteinfo.json
@@ -1 +1 @@
-{"documenter":{"julia_version":"1.10.1","generation_timestamp":"2024-03-02T21:44:37","documenter_version":"1.3.0"}}
\ No newline at end of file
+{"documenter":{"julia_version":"1.10.2","generation_timestamp":"2024-03-08T00:12:18","documenter_version":"1.3.0"}}
\ No newline at end of file
diff --git a/dev/examples.html b/dev/examples.html
index 998d4b7b..db05e92f 100644
--- a/dev/examples.html
+++ b/dev/examples.html
@@ -538,4 +538,4 @@
 8CD2E,GC
 &quot;&quot;&quot;
 
-file = CSV.File(IOBuffer(data); pool=(0.5, 2))</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="writing.html">« Writing</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.3.0 on <span class="colophon-date" title="Saturday 2 March 2024 21:44">Saturday 2 March 2024</span>. Using Julia version 1.10.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+file = CSV.File(IOBuffer(data); pool=(0.5, 2))</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="writing.html">« Writing</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.3.0 on <span class="colophon-date" title="Friday 8 March 2024 00:12">Friday 8 March 2024</span>. Using Julia version 1.10.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/index.html b/dev/index.html
index c6a97237..88210c76 100644
--- a/dev/index.html
+++ b/dev/index.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Home · CSV.jl</title><meta name="title" content="Home · CSV.jl"/><meta property="og:title" content="Home · CSV.jl"/><meta property="twitter:title" content="Home · CSV.jl"/><meta name="description" content="Documentation for CSV.jl."/><meta property="og:description" content="Documentation for CSV.jl."/><meta property="twitter:description" content="Documentation for CSV.jl."/><script data-outdated-warner src="assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="search_index.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit"><a href="index.html">CSV.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li class="is-active"><a class="tocitem" href="index.html">Home</a><ul class="internal"><li><a class="tocitem" href="#Installation"><span>Installation</span></a></li><li><a class="tocitem" href="#Overview"><span>Overview</span></a></li></ul></li><li><a class="tocitem" href="reading.html">Reading</a></li><li><a class="tocitem" href="writing.html">Writing</a></li><li><a class="tocitem" href="examples.html">Examples</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href="index.html">Home</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href="index.html">Home</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/CSV.jl/blob/main/docs/src/index.md#" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="CSV.jl-Documentation"><a class="docs-heading-anchor" href="#CSV.jl-Documentation">CSV.jl Documentation</a><a id="CSV.jl-Documentation-1"></a><a class="docs-heading-anchor-permalink" href="#CSV.jl-Documentation" title="Permalink"></a></h1><p>GitHub Repo: <a href="https://github.com/JuliaData/CSV.jl">https://github.com/JuliaData/CSV.jl</a></p><p>Welcome to CSV.jl! A pure-Julia package for handling delimited text data, be it comma-delimited (csv), tab-delimited (tsv), or otherwise.</p><h2 id="Installation"><a class="docs-heading-anchor" href="#Installation">Installation</a><a id="Installation-1"></a><a class="docs-heading-anchor-permalink" href="#Installation" title="Permalink"></a></h2><p>You can install CSV by typing the following in the Julia REPL:</p><pre><code class="language-julia hljs">] add CSV </code></pre><p>followed by </p><pre><code class="language-julia hljs">using CSV</code></pre><p>to load the package.</p><h2 id="Overview"><a class="docs-heading-anchor" href="#Overview">Overview</a><a id="Overview-1"></a><a class="docs-heading-anchor-permalink" href="#Overview" title="Permalink"></a></h2><p>To start out, let&#39;s discuss the high-level functionality provided by the package, which hopefully will help direct you to more specific documentation for your use-case:</p><ul><li><a href="reading.html#CSV.File"><code>CSV.File</code></a>: the most commonly used function for ingesting delimited data; will read an entire data input or vector of data inputs, detecting number of columns and rows, along with the type of data for each column. Returns a <code>CSV.File</code> object, which is like a lightweight table/DataFrame. Assuming <code>file</code> is a variable of a <code>CSV.File</code> object, individual columns can be accessed like <code>file.col1</code>, <code>file[:col1]</code>, or <code>file[&quot;col&quot;]</code>. You can see parsed column names via <code>file.names</code>. A <code>CSV.File</code> can also be iterated, where a <code>CSV.Row</code> is produced on each iteration, which allows access to each value in the row via <code>row.col1</code>, <code>row[:col1]</code>, or <code>row[1]</code>. You can also index a <code>CSV.File</code> directly, like <code>file[1]</code> to return the entire <code>CSV.Row</code> at the provided index/row number. Multiple threads will be used while parsing the input data if the input is large enough, and full return column buffers to hold the parsed data will be allocated. <code>CSV.File</code> satisfies the <a href="https://github.com/JuliaData/Tables.jl">Tables.jl</a> &quot;source&quot; interface, and so can be passed to valid sink functions like <code>DataFrame</code>, <code>SQLite.load!</code>, <code>Arrow.write</code>, etc. Supports a number of keyword arguments to control parsing, column type, and other file metadata options.</li><li><a href="reading.html#CSV.read"><code>CSV.read</code></a>: a convenience function identical to <code>CSV.File</code>, but used when a <code>CSV.File</code> will be passed directly to a sink function, like a <code>DataFrame</code>. In some cases, sinks may make copies of incoming data for their own safety; by calling <code>CSV.read(file, DataFrame)</code>, no copies of the parsed <code>CSV.File</code> will be made, and the <code>DataFrame</code> will take direct ownership of the <code>CSV.File</code>&#39;s columns, which is more efficient than doing <code>CSV.File(file) |&gt; DataFrame</code> which will result in an extra copy of each column being made. Keyword arguments are identical to <code>CSV.File</code>. Any valid Tables.jl sink function/table type can be passed as the 2nd argument. Like <code>CSV.File</code>, a vector of data inputs can be passed as the 1st argument, which will result in a single &quot;long&quot; table of all the inputs vertically concatenated. Each input must have identical schemas (column names and types).</li><li><a href="reading.html#CSV.Rows"><code>CSV.Rows</code></a>: an alternative approach for consuming delimited data, where the input is only consumed one row at a time, which allows &quot;streaming&quot; the data with a lower memory footprint than <code>CSV.File</code>. Supports many of the same options as <code>CSV.File</code>, except column type handling is a little different. By default, every column type will be essentially <code>Union{Missing, String}</code>, i.e. no automatic type detection is done, but column types can be provided manually. Multithreading is not used while parsing. After constructing a <code>CSV.Rows</code> object, rows can be &quot;streamed&quot; by iterating, where each iteration produces a <code>CSV.Row2</code> object, which operates similar to <code>CSV.File</code>&#39;s <code>CSV.Row</code> type where individual row values can be accessed via <code>row.col1</code>, <code>row[:col1]</code>, or <code>row[1]</code>. If each row is processed individually, additional memory can be saved by passing <code>reusebuffer=true</code>, which means a single buffer will be allocated to hold the values of only the currently iterated row. <code>CSV.Rows</code> also supports the Tables.jl interface and can also be passed to valid sink functions.</li><li><a href="reading.html#CSV.Chunks"><code>CSV.Chunks</code></a>: similar to <code>CSV.File</code>, but allows passing a <code>ntasks::Integer</code> keyword argument which will cause the input file to be &quot;chunked&quot; up into <code>ntasks</code> number of chunks. After constructing a <code>CSV.Chunks</code> object, each iteration of the object will return a <code>CSV.File</code> of the next parsed chunk. Useful for processing extremely large files in &quot;chunks&quot;. Because each iterated element is a valid Tables.jl &quot;source&quot;, <code>CSV.Chunks</code> satisfies the <code>Tables.partitions</code> interface, so sinks that can process input partitions can operate by passing <code>CSV.Chunks</code> as the &quot;source&quot;.</li><li><a href="writing.html#CSV.write"><code>CSV.write</code></a>: A valid Tables.jl &quot;sink&quot; function for writing any valid input table out in a delimited text format. Supports many options for controlling the output like delimiter, quote characters, etc. Writes data to an internal buffer, which is flushed out when full, buffer size is configurable. Also supports writing out partitioned inputs as separate output files, one file per input partition. To write out a <code>DataFrame</code>, for example, it&#39;s simply <code>CSV.write(&quot;data.csv&quot;, df)</code>, or to write out a matrix, it&#39;s <code>using Tables; CSV.write(&quot;data.csv&quot;, Tables.table(mat))</code></li><li><a href="writing.html#CSV.RowWriter"><code>CSV.RowWriter</code></a>: An alternative way to produce csv output; takes any valid Tables.jl input, and on each iteration, produces a single csv-formatted string from the input table&#39;s row.</li></ul><p>That&#39;s quite a bit! Let&#39;s boil down a TL;DR:</p><ul><li>Just want to read a delimited file or collection of files and do basic stuff with data? Use <a href="reading.html#CSV.File"><code>CSV.File(file)</code></a> or <a href="reading.html#CSV.read"><code>CSV.read(file, DataFrame)</code></a></li><li>Don&#39;t need the data as a whole or want to stream through a large file row-by-row? Use <a href="reading.html#CSV.Rows"><code>CSV.Rows</code></a>.</li><li>Want to process a large file in &quot;batches&quot;/chunks? Use <a href="reading.html#CSV.Chunks"><code>CSV.Chunks</code></a>.</li><li>Need to produce a csv? Use <a href="writing.html#CSV.write"><code>CSV.write</code></a>.</li><li>Want to iterate an input table and produce a single csv string per row? <a href="writing.html#CSV.RowWriter"><code>CSV.RowWriter</code></a>.</li></ul><p>For the rest of the manual, we&#39;re going to have two big sections, <em><a href="reading.html#Reading">Reading</a></em> and <em><a href="writing.html#Writing">Writing</a></em> where we&#39;ll walk through the various options to <code>CSV.File</code>/<code>CSV.read</code>/<code>CSV.Rows</code>/<code>CSV.Chunks</code> and <code>CSV.write</code>/<code>CSV.RowWriter</code>.</p><ul><li><a href="reading.html#Reading">Reading</a></li><li class="no-marker"><ul><li><a href="reading.html#input"><code>input</code></a></li><li><a href="reading.html#header"><code>header</code></a></li><li><a href="reading.html#normalizenames"><code>normalizenames</code></a></li><li><a href="reading.html#skipto"><code>skipto</code></a></li><li><a href="reading.html#footerskip"><code>footerskip</code></a></li><li><a href="reading.html#transpose"><code>transpose</code></a></li><li><a href="reading.html#comment"><code>comment</code></a></li><li><a href="reading.html#ignoreemptyrows"><code>ignoreemptyrows</code></a></li><li><a href="reading.html#select"><code>select</code> / <code>drop</code></a></li><li><a href="reading.html#limit"><code>limit</code></a></li><li><a href="reading.html#ntasks"><code>ntasks</code></a></li><li><a href="reading.html#rows_to_check"><code>rows_to_check</code></a></li><li><a href="reading.html#source"><code>source</code></a></li><li><a href="reading.html#missingstring"><code>missingstring</code></a></li><li><a href="reading.html#delim"><code>delim</code></a></li><li><a href="reading.html#ignorerepeated"><code>ignorerepeated</code></a></li><li><a href="reading.html#quoted"><code>quoted</code></a></li><li><a href="reading.html#quotechar"><code>quotechar</code> / <code>openquotechar</code> / <code>closequotechar</code></a></li><li><a href="reading.html#escapechar"><code>escapechar</code></a></li><li><a href="reading.html#dateformat"><code>dateformat</code></a></li><li><a href="reading.html#decimal"><code>decimal</code></a></li><li><a href="reading.html#groupmark"><code>groupmark</code> / thousands separator</a></li><li><a href="reading.html#truestrings"><code>truestrings</code> / <code>falsestrings</code></a></li><li><a href="reading.html#types"><code>types</code></a></li><li><a href="reading.html#typemap"><code>typemap</code></a></li><li><a href="reading.html#pool"><code>pool</code></a></li><li><a href="reading.html#downcast"><code>downcast</code></a></li><li><a href="reading.html#stringtype"><code>stringtype</code></a></li><li><a href="reading.html#strict"><code>strict</code> / <code>silencewarnings</code> / <code>maxwarnings</code></a></li><li><a href="reading.html#debug"><code>debug</code></a></li><li><a href="reading.html#API-Reference">API Reference</a></li><li><a href="reading.html#Common-terms">Common terms</a></li></ul></li><li><a href="writing.html#Writing">Writing</a></li><li><a href="examples.html#Examples">Examples</a></li><li class="no-marker"><ul><li><a href="examples.html#stringencodings">Non-UTF-8 character encodings</a></li><li><a href="examples.html#vectorinputs">Concatenate multiple inputs at once</a></li><li><a href="examples.html#gzipped_input">Gzipped input</a></li><li><a href="examples.html#csv_string">Delimited data in a string</a></li><li><a href="examples.html#http">Data from the web/a url</a></li><li><a href="examples.html#zip_example">Reading from a zip file</a></li><li><a href="examples.html#second_row_header">Column names on 2nd row</a></li><li><a href="examples.html#no_header">No column names in data</a></li><li><a href="examples.html#manual_header">Manually provide column names</a></li><li><a href="examples.html#multi_row_header">Multi-row column names</a></li><li><a href="examples.html#normalize_header">Normalizing column names</a></li><li><a href="examples.html#skipto_example">Skip to specific row where data starts</a></li><li><a href="examples.html#footerskip_example">Skipping trailing useless rows</a></li><li><a href="examples.html#transpose_example">Reading transposed data</a></li><li><a href="examples.html#comment_example">Ignoring commented rows</a></li><li><a href="examples.html#ignoreemptyrows_example">Ignoring empty rows</a></li><li><a href="examples.html#select_example">Including/excluding columns</a></li><li><a href="examples.html#limit_example">Limiting number of rows from data</a></li><li><a href="examples.html#missing_string_example">Specifying custom missing strings</a></li><li><a href="examples.html#string_delim">String delimiter</a></li><li><a href="examples.html#ignorerepeated_example">Fixed width files</a></li><li><a href="examples.html#quoted_example">Turning off quoted cell parsing</a></li><li><a href="examples.html#quotechar_example">Quoted &amp; escaped fields</a></li><li><a href="examples.html#dateformat_example">DateFormat</a></li><li><a href="examples.html#decimal_example">Custom decimal separator</a></li><li><a href="examples.html#thousands_example">Thousands separator</a></li><li><a href="examples.html#groupmark_example">Custom groupmarks</a></li><li><a href="examples.html#truestrings_example">Custom bool strings</a></li><li><a href="examples.html#matrix_example">Matrix-like Data</a></li><li><a href="examples.html#types_example">Providing types</a></li><li><a href="examples.html#typemap_example">Typemap</a></li><li><a href="examples.html#pool_example">Pooled values</a></li><li><a href="examples.html#nonstring_pool_example">Non-string pooled values</a></li><li><a href="examples.html#pool_absolute_threshold">Pool with absolute threshold</a></li></ul></li></ul></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="reading.html">Reading »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.3.0 on <span class="colophon-date" title="Saturday 2 March 2024 21:44">Saturday 2 March 2024</span>. Using Julia version 1.10.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Home · CSV.jl</title><meta name="title" content="Home · CSV.jl"/><meta property="og:title" content="Home · CSV.jl"/><meta property="twitter:title" content="Home · CSV.jl"/><meta name="description" content="Documentation for CSV.jl."/><meta property="og:description" content="Documentation for CSV.jl."/><meta property="twitter:description" content="Documentation for CSV.jl."/><script data-outdated-warner src="assets/warner.js"></script><link href="https://cdnjs.cloudflare.com/ajax/libs/lato-font/3.0.0/css/lato-font.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/juliamono/0.050/juliamono.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.16.8/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="search_index.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark" data-theme-primary-dark/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit"><a href="index.html">CSV.jl</a></span></div><button class="docs-search-query input is-rounded is-small is-clickable my-2 mx-auto py-1 px-2" id="documenter-search-query">Search docs (Ctrl + /)</button><ul class="docs-menu"><li class="is-active"><a class="tocitem" href="index.html">Home</a><ul class="internal"><li><a class="tocitem" href="#Installation"><span>Installation</span></a></li><li><a class="tocitem" href="#Overview"><span>Overview</span></a></li></ul></li><li><a class="tocitem" href="reading.html">Reading</a></li><li><a class="tocitem" href="writing.html">Writing</a></li><li><a class="tocitem" href="examples.html">Examples</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><a class="docs-sidebar-button docs-navbar-link fa-solid fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href="index.html">Home</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href="index.html">Home</a></li></ul></nav><div class="docs-right"><a class="docs-navbar-link" href="https://github.com/JuliaData/CSV.jl/blob/main/docs/src/index.md#" title="Edit source on GitHub"><span class="docs-icon fa-solid"></span></a><a class="docs-settings-button docs-navbar-link fa-solid fa-gear" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-article-toggle-button fa-solid fa-chevron-up" id="documenter-article-toggle-button" href="javascript:;" title="Collapse all docstrings"></a></div></header><article class="content" id="documenter-page"><h1 id="CSV.jl-Documentation"><a class="docs-heading-anchor" href="#CSV.jl-Documentation">CSV.jl Documentation</a><a id="CSV.jl-Documentation-1"></a><a class="docs-heading-anchor-permalink" href="#CSV.jl-Documentation" title="Permalink"></a></h1><p>GitHub Repo: <a href="https://github.com/JuliaData/CSV.jl">https://github.com/JuliaData/CSV.jl</a></p><p>Welcome to CSV.jl! A pure-Julia package for handling delimited text data, be it comma-delimited (csv), tab-delimited (tsv), or otherwise.</p><h2 id="Installation"><a class="docs-heading-anchor" href="#Installation">Installation</a><a id="Installation-1"></a><a class="docs-heading-anchor-permalink" href="#Installation" title="Permalink"></a></h2><p>You can install CSV by typing the following in the Julia REPL:</p><pre><code class="language-julia hljs">] add CSV </code></pre><p>followed by </p><pre><code class="language-julia hljs">using CSV</code></pre><p>to load the package.</p><h2 id="Overview"><a class="docs-heading-anchor" href="#Overview">Overview</a><a id="Overview-1"></a><a class="docs-heading-anchor-permalink" href="#Overview" title="Permalink"></a></h2><p>To start out, let&#39;s discuss the high-level functionality provided by the package, which hopefully will help direct you to more specific documentation for your use-case:</p><ul><li><a href="reading.html#CSV.File"><code>CSV.File</code></a>: the most commonly used function for ingesting delimited data; will read an entire data input or vector of data inputs, detecting number of columns and rows, along with the type of data for each column. Returns a <code>CSV.File</code> object, which is like a lightweight table/DataFrame. Assuming <code>file</code> is a variable of a <code>CSV.File</code> object, individual columns can be accessed like <code>file.col1</code>, <code>file[:col1]</code>, or <code>file[&quot;col&quot;]</code>. You can see parsed column names via <code>file.names</code>. A <code>CSV.File</code> can also be iterated, where a <code>CSV.Row</code> is produced on each iteration, which allows access to each value in the row via <code>row.col1</code>, <code>row[:col1]</code>, or <code>row[1]</code>. You can also index a <code>CSV.File</code> directly, like <code>file[1]</code> to return the entire <code>CSV.Row</code> at the provided index/row number. Multiple threads will be used while parsing the input data if the input is large enough, and full return column buffers to hold the parsed data will be allocated. <code>CSV.File</code> satisfies the <a href="https://github.com/JuliaData/Tables.jl">Tables.jl</a> &quot;source&quot; interface, and so can be passed to valid sink functions like <code>DataFrame</code>, <code>SQLite.load!</code>, <code>Arrow.write</code>, etc. Supports a number of keyword arguments to control parsing, column type, and other file metadata options.</li><li><a href="reading.html#CSV.read"><code>CSV.read</code></a>: a convenience function identical to <code>CSV.File</code>, but used when a <code>CSV.File</code> will be passed directly to a sink function, like a <code>DataFrame</code>. In some cases, sinks may make copies of incoming data for their own safety; by calling <code>CSV.read(file, DataFrame)</code>, no copies of the parsed <code>CSV.File</code> will be made, and the <code>DataFrame</code> will take direct ownership of the <code>CSV.File</code>&#39;s columns, which is more efficient than doing <code>CSV.File(file) |&gt; DataFrame</code> which will result in an extra copy of each column being made. Keyword arguments are identical to <code>CSV.File</code>. Any valid Tables.jl sink function/table type can be passed as the 2nd argument. Like <code>CSV.File</code>, a vector of data inputs can be passed as the 1st argument, which will result in a single &quot;long&quot; table of all the inputs vertically concatenated. Each input must have identical schemas (column names and types).</li><li><a href="reading.html#CSV.Rows"><code>CSV.Rows</code></a>: an alternative approach for consuming delimited data, where the input is only consumed one row at a time, which allows &quot;streaming&quot; the data with a lower memory footprint than <code>CSV.File</code>. Supports many of the same options as <code>CSV.File</code>, except column type handling is a little different. By default, every column type will be essentially <code>Union{Missing, String}</code>, i.e. no automatic type detection is done, but column types can be provided manually. Multithreading is not used while parsing. After constructing a <code>CSV.Rows</code> object, rows can be &quot;streamed&quot; by iterating, where each iteration produces a <code>CSV.Row2</code> object, which operates similar to <code>CSV.File</code>&#39;s <code>CSV.Row</code> type where individual row values can be accessed via <code>row.col1</code>, <code>row[:col1]</code>, or <code>row[1]</code>. If each row is processed individually, additional memory can be saved by passing <code>reusebuffer=true</code>, which means a single buffer will be allocated to hold the values of only the currently iterated row. <code>CSV.Rows</code> also supports the Tables.jl interface and can also be passed to valid sink functions.</li><li><a href="reading.html#CSV.Chunks"><code>CSV.Chunks</code></a>: similar to <code>CSV.File</code>, but allows passing a <code>ntasks::Integer</code> keyword argument which will cause the input file to be &quot;chunked&quot; up into <code>ntasks</code> number of chunks. After constructing a <code>CSV.Chunks</code> object, each iteration of the object will return a <code>CSV.File</code> of the next parsed chunk. Useful for processing extremely large files in &quot;chunks&quot;. Because each iterated element is a valid Tables.jl &quot;source&quot;, <code>CSV.Chunks</code> satisfies the <code>Tables.partitions</code> interface, so sinks that can process input partitions can operate by passing <code>CSV.Chunks</code> as the &quot;source&quot;.</li><li><a href="writing.html#CSV.write"><code>CSV.write</code></a>: A valid Tables.jl &quot;sink&quot; function for writing any valid input table out in a delimited text format. Supports many options for controlling the output like delimiter, quote characters, etc. Writes data to an internal buffer, which is flushed out when full, buffer size is configurable. Also supports writing out partitioned inputs as separate output files, one file per input partition. To write out a <code>DataFrame</code>, for example, it&#39;s simply <code>CSV.write(&quot;data.csv&quot;, df)</code>, or to write out a matrix, it&#39;s <code>using Tables; CSV.write(&quot;data.csv&quot;, Tables.table(mat))</code></li><li><a href="writing.html#CSV.RowWriter"><code>CSV.RowWriter</code></a>: An alternative way to produce csv output; takes any valid Tables.jl input, and on each iteration, produces a single csv-formatted string from the input table&#39;s row.</li></ul><p>That&#39;s quite a bit! Let&#39;s boil down a TL;DR:</p><ul><li>Just want to read a delimited file or collection of files and do basic stuff with data? Use <a href="reading.html#CSV.File"><code>CSV.File(file)</code></a> or <a href="reading.html#CSV.read"><code>CSV.read(file, DataFrame)</code></a></li><li>Don&#39;t need the data as a whole or want to stream through a large file row-by-row? Use <a href="reading.html#CSV.Rows"><code>CSV.Rows</code></a>.</li><li>Want to process a large file in &quot;batches&quot;/chunks? Use <a href="reading.html#CSV.Chunks"><code>CSV.Chunks</code></a>.</li><li>Need to produce a csv? Use <a href="writing.html#CSV.write"><code>CSV.write</code></a>.</li><li>Want to iterate an input table and produce a single csv string per row? <a href="writing.html#CSV.RowWriter"><code>CSV.RowWriter</code></a>.</li></ul><p>For the rest of the manual, we&#39;re going to have two big sections, <em><a href="reading.html#Reading">Reading</a></em> and <em><a href="writing.html#Writing">Writing</a></em> where we&#39;ll walk through the various options to <code>CSV.File</code>/<code>CSV.read</code>/<code>CSV.Rows</code>/<code>CSV.Chunks</code> and <code>CSV.write</code>/<code>CSV.RowWriter</code>.</p><ul><li><a href="reading.html#Reading">Reading</a></li><li class="no-marker"><ul><li><a href="reading.html#input"><code>input</code></a></li><li><a href="reading.html#header"><code>header</code></a></li><li><a href="reading.html#normalizenames"><code>normalizenames</code></a></li><li><a href="reading.html#skipto"><code>skipto</code></a></li><li><a href="reading.html#footerskip"><code>footerskip</code></a></li><li><a href="reading.html#transpose"><code>transpose</code></a></li><li><a href="reading.html#comment"><code>comment</code></a></li><li><a href="reading.html#ignoreemptyrows"><code>ignoreemptyrows</code></a></li><li><a href="reading.html#select"><code>select</code> / <code>drop</code></a></li><li><a href="reading.html#limit"><code>limit</code></a></li><li><a href="reading.html#ntasks"><code>ntasks</code></a></li><li><a href="reading.html#rows_to_check"><code>rows_to_check</code></a></li><li><a href="reading.html#source"><code>source</code></a></li><li><a href="reading.html#missingstring"><code>missingstring</code></a></li><li><a href="reading.html#delim"><code>delim</code></a></li><li><a href="reading.html#ignorerepeated"><code>ignorerepeated</code></a></li><li><a href="reading.html#quoted"><code>quoted</code></a></li><li><a href="reading.html#quotechar"><code>quotechar</code> / <code>openquotechar</code> / <code>closequotechar</code></a></li><li><a href="reading.html#escapechar"><code>escapechar</code></a></li><li><a href="reading.html#dateformat"><code>dateformat</code></a></li><li><a href="reading.html#decimal"><code>decimal</code></a></li><li><a href="reading.html#groupmark"><code>groupmark</code> / thousands separator</a></li><li><a href="reading.html#truestrings"><code>truestrings</code> / <code>falsestrings</code></a></li><li><a href="reading.html#types"><code>types</code></a></li><li><a href="reading.html#typemap"><code>typemap</code></a></li><li><a href="reading.html#pool"><code>pool</code></a></li><li><a href="reading.html#downcast"><code>downcast</code></a></li><li><a href="reading.html#stringtype"><code>stringtype</code></a></li><li><a href="reading.html#strict"><code>strict</code> / <code>silencewarnings</code> / <code>maxwarnings</code></a></li><li><a href="reading.html#debug"><code>debug</code></a></li><li><a href="reading.html#API-Reference">API Reference</a></li><li><a href="reading.html#Common-terms">Common terms</a></li></ul></li><li><a href="writing.html#Writing">Writing</a></li><li><a href="examples.html#Examples">Examples</a></li><li class="no-marker"><ul><li><a href="examples.html#stringencodings">Non-UTF-8 character encodings</a></li><li><a href="examples.html#vectorinputs">Concatenate multiple inputs at once</a></li><li><a href="examples.html#gzipped_input">Gzipped input</a></li><li><a href="examples.html#csv_string">Delimited data in a string</a></li><li><a href="examples.html#http">Data from the web/a url</a></li><li><a href="examples.html#zip_example">Reading from a zip file</a></li><li><a href="examples.html#second_row_header">Column names on 2nd row</a></li><li><a href="examples.html#no_header">No column names in data</a></li><li><a href="examples.html#manual_header">Manually provide column names</a></li><li><a href="examples.html#multi_row_header">Multi-row column names</a></li><li><a href="examples.html#normalize_header">Normalizing column names</a></li><li><a href="examples.html#skipto_example">Skip to specific row where data starts</a></li><li><a href="examples.html#footerskip_example">Skipping trailing useless rows</a></li><li><a href="examples.html#transpose_example">Reading transposed data</a></li><li><a href="examples.html#comment_example">Ignoring commented rows</a></li><li><a href="examples.html#ignoreemptyrows_example">Ignoring empty rows</a></li><li><a href="examples.html#select_example">Including/excluding columns</a></li><li><a href="examples.html#limit_example">Limiting number of rows from data</a></li><li><a href="examples.html#missing_string_example">Specifying custom missing strings</a></li><li><a href="examples.html#string_delim">String delimiter</a></li><li><a href="examples.html#ignorerepeated_example">Fixed width files</a></li><li><a href="examples.html#quoted_example">Turning off quoted cell parsing</a></li><li><a href="examples.html#quotechar_example">Quoted &amp; escaped fields</a></li><li><a href="examples.html#dateformat_example">DateFormat</a></li><li><a href="examples.html#decimal_example">Custom decimal separator</a></li><li><a href="examples.html#thousands_example">Thousands separator</a></li><li><a href="examples.html#groupmark_example">Custom groupmarks</a></li><li><a href="examples.html#truestrings_example">Custom bool strings</a></li><li><a href="examples.html#matrix_example">Matrix-like Data</a></li><li><a href="examples.html#types_example">Providing types</a></li><li><a href="examples.html#typemap_example">Typemap</a></li><li><a href="examples.html#pool_example">Pooled values</a></li><li><a href="examples.html#nonstring_pool_example">Non-string pooled values</a></li><li><a href="examples.html#pool_absolute_threshold">Pool with absolute threshold</a></li></ul></li></ul></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="reading.html">Reading »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.3.0 on <span class="colophon-date" title="Friday 8 March 2024 00:12">Friday 8 March 2024</span>. Using Julia version 1.10.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/reading.html b/dev/reading.html
index fddfa084..b3226ba3 100644
--- a/dev/reading.html
+++ b/dev/reading.html
@@ -18,7 +18,7 @@
      │ String1  String1  String1
 ─────┼───────────────────────────
    1 │ a        b        c
-   2 │ 1        2        3</code></pre><p><strong>Arguments</strong></p><p><strong>File layout options:</strong></p><ul><li><code>header=1</code>: how column names should be determined; if given as an <code>Integer</code>, indicates the row to parse for column names; as an <code>AbstractVector{&lt;:Integer}</code>, indicates a set of rows to be concatenated together as column names; <code>Vector{Symbol}</code> or <code>Vector{String}</code> give column names explicitly (should match # of columns in dataset); if a dataset doesn&#39;t have column names, either provide them as a <code>Vector</code>, or set <code>header=0</code> or <code>header=false</code> and column names will be auto-generated (<code>Column1</code>, <code>Column2</code>, etc.). Note that if a row number header and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the header row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header row will actually be the next non-commented row.</li><li><code>normalizenames::Bool=false</code>: whether column names should be &quot;normalized&quot; into valid Julia identifier symbols; useful when using the <code>tbl.col1</code> <code>getproperty</code> syntax or iterating rows and accessing column values of a row via <code>getproperty</code> (e.g. <code>row.col1</code>)</li><li><code>skipto::Integer</code>: specifies the row where the data starts in the csv file; by default, the next row after the <code>header</code> row(s) is used. If <code>header=0</code>, then the 1st row is assumed to be the start of data; providing a <code>skipto</code> argument does <em>not</em> affect the <code>header</code> argument. Note that if a row number <code>skipto</code> and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the data row will actually be the next non-commented row.</li><li><code>footerskip::Integer</code>: number of rows at the end of a file to skip parsing.  Do note that commented rows (see the <code>comment</code> keyword argument) <em>do not</em> count towards the row number provided for <code>footerskip</code>, they are completely ignored by the parser</li><li><code>transpose::Bool</code>: read a csv file &quot;transposed&quot;, i.e. each column is parsed as a row</li><li><code>comment::String</code>: string that will cause rows that begin with it to be skipped while parsing. Note that if a row number header or <code>skipto</code> and <code>comment</code> are provided, the header/data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header/data row will actually be the next non-commented row.</li><li><code>ignoreemptyrows::Bool=true</code>: whether empty rows in a file should be ignored (if <code>false</code>, each column will be assigned <code>missing</code> for that empty row)</li><li><code>select</code>: an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;selector&quot; function of the form <code>(i, name) -&gt; keep::Bool</code>; only columns in the collection or for which the selector function returns <code>true</code> will be parsed and accessible in the resulting <code>CSV.File</code>. Invalid values in <code>select</code> are ignored.</li><li><code>drop</code>: inverse of <code>select</code>; an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;drop&quot; function of the form <code>(i, name) -&gt; drop::Bool</code>; columns in the collection or for which the drop function returns <code>true</code> will ignored in the resulting <code>CSV.File</code>. Invalid values in <code>drop</code> are ignored.</li><li><code>limit</code>: an <code>Integer</code> to indicate a limited number of rows to parse in a csv file; use in combination with <code>skipto</code> to read a specific, contiguous chunk within a file; note for large files when multiple threads are used for parsing, the <code>limit</code> argument may not result in an exact # of rows parsed; use <code>ntasks=1</code> to ensure an exact limit if necessary</li><li><code>buffer_in_memory</code>: a <code>Bool</code>, default <code>false</code>, which controls whether a <code>Cmd</code>, <code>IO</code>, or gzipped source will be read/decompressed in memory vs. using a temporary file.</li><li><code>ntasks::Integer=Threads.nthreads()</code>: [not applicable to <code>CSV.Rows</code>] for multithreaded parsed files, this controls the number of tasks spawned to read a file in concurrent chunks; defaults to the # of threads Julia was started with (i.e. <code>JULIA_NUM_THREADS</code> environment variable or <code>julia -t N</code>); setting <code>ntasks=1</code> will avoid any calls to <code>Threads.@spawn</code> and just read the file serially on the main thread; a single thread will also be used for smaller files by default (&lt; 5_000 cells)</li><li><code>rows_to_check::Integer=30</code>: [not applicable to <code>CSV.Rows</code>] a multithreaded parsed file will be split up into <code>ntasks</code> # of equal chunks; <code>rows_to_check</code> controls the # of rows are checked to ensure parsing correctly found valid rows; for certain files with very large quoted text fields, <code>lines_to_check</code> may need to be higher (10, 30, etc.) to ensure parsing correctly finds these rows</li><li><code>source</code>: [only applicable for vector of inputs to <code>CSV.File</code>] a <code>Symbol</code>, <code>String</code>, or <code>Pair</code> of <code>Symbol</code> or <code>String</code> to <code>Vector</code>. As a single <code>Symbol</code> or <code>String</code>, provides the column name that will be added to the parsed columns, the values of the column will be the input &quot;name&quot; (usually file name) of the input from whence the value was parsed. As a <code>Pair</code>, the 2nd part of the pair should be a <code>Vector</code> of values matching the length of the # of inputs, where each value will be used instead of the input name for that inputs values in the auto-added column.</li></ul><p><strong>Parsing options:</strong></p><ul><li><code>missingstring</code>: either a <code>nothing</code>, <code>String</code>, or <code>Vector{String}</code> to use as sentinel values that will be parsed as <code>missing</code>; if <code>nothing</code> is passed, no sentinel/missing values will be parsed; by default, <code>missingstring=&quot;&quot;</code>, which means only an empty field (two consecutive delimiters) is considered <code>missing</code></li><li><code>delim=&#39;,&#39;</code>: a <code>Char</code> or <code>String</code> that indicates how columns are delimited in a file; if no argument is provided, parsing will try to detect the most consistent delimiter on the first 10 rows of the file</li><li><code>ignorerepeated::Bool=false</code>: whether repeated (consecutive/sequential) delimiters should be ignored while parsing; useful for fixed-width files with delimiter padding between cells</li><li><code>quoted::Bool=true</code>: whether parsing should check for <code>quotechar</code> at the start/end of cells</li><li><code>quotechar=&#39;&quot;&#39;</code>, <code>openquotechar</code>, <code>closequotechar</code>: a <code>Char</code> (or different start and end characters) that indicate a quoted field which may contain textual delimiters or newline characters</li><li><code>escapechar=&#39;&quot;&#39;</code>: the <code>Char</code> used to escape quote characters in a quoted field</li><li><code>dateformat::Union{String, Dates.DateFormat, Nothing, AbstractDict}</code>: a date format string to indicate how Date/DateTime columns are formatted for the entire file; if given as an <code>AbstractDict</code>, date format strings to indicate how the Date/DateTime columns corresponding to the keys are formatted. The Dict can map column index <code>Int</code>, or name <code>Symbol</code> or <code>String</code> to the format string for that column.</li><li><code>decimal=&#39;.&#39;</code>: a <code>Char</code> indicating how decimals are separated in floats, i.e. <code>3.14</code> uses <code>&#39;.&#39;</code>, or <code>3,14</code> uses a comma <code>&#39;,&#39;</code></li><li><code>groupmark=nothing</code>: optionally specify a single-byte character denoting the number grouping mark, this allows parsing of numbers that have, e.g., thousand separators (<code>1,000.00</code>).</li><li><code>truestrings</code>, <code>falsestrings</code>: <code>Vector{String}</code>s that indicate how <code>true</code> or <code>false</code> values are represented; by default <code>&quot;true&quot;, &quot;True&quot;, &quot;TRUE&quot;, &quot;T&quot;, &quot;1&quot;</code> are used to detect <code>true</code> and <code>&quot;false&quot;, &quot;False&quot;, &quot;FALSE&quot;, &quot;F&quot;, &quot;0&quot;</code> are used to detect <code>false</code>; note that columns with only <code>1</code> and <code>0</code> values will default to <code>Int64</code> column type unless explicitly requested to be <code>Bool</code> via <code>types</code> keyword argument</li><li><code>stripwhitespace=false</code>: if true, leading and trailing whitespace are stripped from string values, including column names</li></ul><p><strong>Column Type Options:</strong></p><ul><li><code>types</code>: a single <code>Type</code>, <code>AbstractVector</code> or <code>AbstractDict</code> of types, or a function of the form <code>(i, name) -&gt; Union{T, Nothing}</code> to be used for column types; if a single <code>Type</code> is provided, <em>all</em> columns will be parsed with that single type; an <code>AbstractDict</code> can map column index <code>Integer</code>, or name <code>Symbol</code> or <code>String</code> to type for a column, i.e. <code>Dict(1=&gt;Float64)</code> will set the first column as a <code>Float64</code>, <code>Dict(:column1=&gt;Float64)</code> will set the column named <code>column1</code> to <code>Float64</code> and, <code>Dict(&quot;column1&quot;=&gt;Float64)</code> will set the <code>column1</code> to <code>Float64</code>; if a <code>Vector</code> is provided, it must match the # of columns provided or detected in <code>header</code>. If a function is provided, it takes a column index and name as arguments, and should return the desired column type for the column, or <code>nothing</code> to signal the column&#39;s type should be detected while parsing.</li><li><code>typemap::IdDict{Type, Type}</code>: a mapping of a type that should be replaced in every instance with another type, i.e. <code>Dict(Float64=&gt;String)</code> would change every detected <code>Float64</code> column to be parsed as <code>String</code>; only &quot;standard&quot; types are allowed to be mapped to another type, i.e. <code>Int64</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Time</code>, and <code>Bool</code>. If a column of one of those types is &quot;detected&quot;, it will be mapped to the specified type.</li><li><code>pool::Union{Bool, Real, AbstractVector, AbstractDict, Function, Tuple{Float64, Int}}=(0.2, 500)</code>: [not supported by <code>CSV.Rows</code>] controls whether columns will be built as <code>PooledArray</code>; if <code>true</code>, all columns detected as <code>String</code> will be pooled; alternatively, the proportion of unique values below which <code>String</code> columns should be pooled (meaning that if the # of unique strings in a column is under 25%, <code>pool=0.25</code>, it will be pooled). If provided as a <code>Tuple{Float64, Int}</code> like <code>(0.2, 500)</code>, it represents the percent cardinality threshold as the 1st tuple element (<code>0.2</code>), and an upper limit for the # of unique values (<code>500</code>), under which the column will be pooled; this is the default (<code>pool=(0.2, 500)</code>). If an <code>AbstractVector</code>, each element should be <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> and the # of elements should match the # of columns in the dataset; if an <code>AbstractDict</code>, a <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> value can be provided for individual columns where the dict key is given as column index <code>Integer</code>, or column name as <code>Symbol</code> or <code>String</code>. If a function is provided, it should take a column index and name as 2 arguments, and return a <code>Bool</code>, <code>Real</code>, <code>Tuple{Float64, Int}</code>, or <code>nothing</code> for each column.</li><li><code>downcast::Bool=false</code>: controls whether columns detected as <code>Int64</code> will be &quot;downcast&quot; to the smallest possible integer type like <code>Int8</code>, <code>Int16</code>, <code>Int32</code>, etc.</li><li><code>stringtype=InlineStrings.InlineString</code>: controls how detected string columns will ultimately be returned; default is <code>InlineString</code>, which stores string data in a fixed-size primitive type that helps avoid excessive heap memory usage; if a column has values longer than 32 bytes, it will default to <code>String</code>. If <code>String</code> is passed, all string columns will just be normal <code>String</code> values. If <code>PosLenString</code> is passed, string columns will be returned as <code>PosLenStringVector</code>, which is a special &quot;lazy&quot; <code>AbstractVector</code> that acts as a &quot;view&quot; into the original file data. This can lead to the most efficient parsing times, but note that the &quot;view&quot; nature of <code>PosLenStringVector</code> makes it read-only, so operations like <code>push!</code>, <code>append!</code>, or <code>setindex!</code> are not supported. It also keeps a reference to the entire input dataset source, so trying to modify or delete the underlying file, for example, may fail</li><li><code>strict::Bool=false</code>: whether invalid values should throw a parsing error or be replaced with <code>missing</code></li><li><code>silencewarnings::Bool=false</code>: if <code>strict=false</code>, whether invalid value warnings should be silenced</li><li><code>maxwarnings::Int=100</code>: if more than <code>maxwarnings</code> number of warnings are printed while parsing, further warnings will be silenced by default; for multithreaded parsing, each parsing task will print up to <code>maxwarnings</code></li><li><code>debug::Bool=false</code>: passing <code>true</code> will result in many informational prints while a dataset is parsed; can be useful when reporting issues or figuring out what is going on internally while a dataset is parsed</li><li><code>validate::Bool=true</code>: whether or not to validate that columns specified in the <code>types</code>, <code>dateformat</code> and <code>pool</code> keywords are actually found in the data. If <code>false</code> no validation is done, meaning no error will be thrown if <code>types</code>/<code>dateformat</code>/<code>pool</code> specify settings for columns not actually found in the data.</li></ul><p><strong>Iteration options:</strong></p><ul><li><code>reusebuffer=false</code>: [only supported by <code>CSV.Rows</code>] while iterating, whether a single row buffer should be allocated and reused on each iteration; only use if each row will be iterated once and not re-used (e.g. it&#39;s not safe to use this option if doing <code>collect(CSV.Rows(file))</code> because only current iterated row is &quot;valid&quot;)</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/ba1f4d2e2b51017dbf7133e29b1105fd3c8116f2/src/CSV.jl#L81-L112">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="CSV.File" href="#CSV.File"><code>CSV.File</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia hljs">CSV.File(input; kwargs...) =&gt; CSV.File</code></pre><p>Read a UTF-8 CSV input and return a <code>CSV.File</code> object, which is like a lightweight table/dataframe, allowing dot-access to columns and iterating rows. Satisfies the Tables.jl interface, so can be passed to any valid sink, yet to avoid unnecessary copies of data, use <code>CSV.read(input, sink; kwargs...)</code> instead if the <code>CSV.File</code> intermediate object isn&#39;t needed.</p><p>The <a href="reading.html#input"><code>input</code></a> argument can be one of:</p><ul><li>filename given as a string or FilePaths.jl type</li><li>a <code>Vector{UInt8}</code> or <code>SubArray{UInt8, 1, Vector{UInt8}}</code> byte buffer</li><li>a <code>CodeUnits</code> object, which wraps a <code>String</code>, like <code>codeunits(str)</code></li><li>a csv-formatted string can also be passed like <code>IOBuffer(str)</code></li><li>a <code>Cmd</code> or other <code>IO</code></li><li>a gzipped file (or gzipped data in any of the above), which will automatically be decompressed for parsing</li><li>a <code>Vector</code> of any of the above, which will parse and vertically concatenate each source, returning a single, &quot;long&quot; <code>CSV.File</code></li></ul><p>To read a csv file from a url, use the Downloads.jl stdlib or HTTP.jl package, where the resulting downloaded tempfile or <code>HTTP.Response</code> body can be passed like:</p><pre><code class="language-julia hljs">using Downloads, CSV
+   2 │ 1        2        3</code></pre><p><strong>Arguments</strong></p><p><strong>File layout options:</strong></p><ul><li><code>header=1</code>: how column names should be determined; if given as an <code>Integer</code>, indicates the row to parse for column names; as an <code>AbstractVector{&lt;:Integer}</code>, indicates a set of rows to be concatenated together as column names; <code>Vector{Symbol}</code> or <code>Vector{String}</code> give column names explicitly (should match # of columns in dataset); if a dataset doesn&#39;t have column names, either provide them as a <code>Vector</code>, or set <code>header=0</code> or <code>header=false</code> and column names will be auto-generated (<code>Column1</code>, <code>Column2</code>, etc.). Note that if a row number header and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the header row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header row will actually be the next non-commented row.</li><li><code>normalizenames::Bool=false</code>: whether column names should be &quot;normalized&quot; into valid Julia identifier symbols; useful when using the <code>tbl.col1</code> <code>getproperty</code> syntax or iterating rows and accessing column values of a row via <code>getproperty</code> (e.g. <code>row.col1</code>)</li><li><code>skipto::Integer</code>: specifies the row where the data starts in the csv file; by default, the next row after the <code>header</code> row(s) is used. If <code>header=0</code>, then the 1st row is assumed to be the start of data; providing a <code>skipto</code> argument does <em>not</em> affect the <code>header</code> argument. Note that if a row number <code>skipto</code> and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the data row will actually be the next non-commented row.</li><li><code>footerskip::Integer</code>: number of rows at the end of a file to skip parsing.  Do note that commented rows (see the <code>comment</code> keyword argument) <em>do not</em> count towards the row number provided for <code>footerskip</code>, they are completely ignored by the parser</li><li><code>transpose::Bool</code>: read a csv file &quot;transposed&quot;, i.e. each column is parsed as a row</li><li><code>comment::String</code>: string that will cause rows that begin with it to be skipped while parsing. Note that if a row number header or <code>skipto</code> and <code>comment</code> are provided, the header/data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header/data row will actually be the next non-commented row.</li><li><code>ignoreemptyrows::Bool=true</code>: whether empty rows in a file should be ignored (if <code>false</code>, each column will be assigned <code>missing</code> for that empty row)</li><li><code>select</code>: an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;selector&quot; function of the form <code>(i, name) -&gt; keep::Bool</code>; only columns in the collection or for which the selector function returns <code>true</code> will be parsed and accessible in the resulting <code>CSV.File</code>. Invalid values in <code>select</code> are ignored.</li><li><code>drop</code>: inverse of <code>select</code>; an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;drop&quot; function of the form <code>(i, name) -&gt; drop::Bool</code>; columns in the collection or for which the drop function returns <code>true</code> will ignored in the resulting <code>CSV.File</code>. Invalid values in <code>drop</code> are ignored.</li><li><code>limit</code>: an <code>Integer</code> to indicate a limited number of rows to parse in a csv file; use in combination with <code>skipto</code> to read a specific, contiguous chunk within a file; note for large files when multiple threads are used for parsing, the <code>limit</code> argument may not result in an exact # of rows parsed; use <code>ntasks=1</code> to ensure an exact limit if necessary</li><li><code>buffer_in_memory</code>: a <code>Bool</code>, default <code>false</code>, which controls whether a <code>Cmd</code>, <code>IO</code>, or gzipped source will be read/decompressed in memory vs. using a temporary file.</li><li><code>ntasks::Integer=Threads.nthreads()</code>: [not applicable to <code>CSV.Rows</code>] for multithreaded parsed files, this controls the number of tasks spawned to read a file in concurrent chunks; defaults to the # of threads Julia was started with (i.e. <code>JULIA_NUM_THREADS</code> environment variable or <code>julia -t N</code>); setting <code>ntasks=1</code> will avoid any calls to <code>Threads.@spawn</code> and just read the file serially on the main thread; a single thread will also be used for smaller files by default (&lt; 5_000 cells)</li><li><code>rows_to_check::Integer=30</code>: [not applicable to <code>CSV.Rows</code>] a multithreaded parsed file will be split up into <code>ntasks</code> # of equal chunks; <code>rows_to_check</code> controls the # of rows are checked to ensure parsing correctly found valid rows; for certain files with very large quoted text fields, <code>lines_to_check</code> may need to be higher (10, 30, etc.) to ensure parsing correctly finds these rows</li><li><code>source</code>: [only applicable for vector of inputs to <code>CSV.File</code>] a <code>Symbol</code>, <code>String</code>, or <code>Pair</code> of <code>Symbol</code> or <code>String</code> to <code>Vector</code>. As a single <code>Symbol</code> or <code>String</code>, provides the column name that will be added to the parsed columns, the values of the column will be the input &quot;name&quot; (usually file name) of the input from whence the value was parsed. As a <code>Pair</code>, the 2nd part of the pair should be a <code>Vector</code> of values matching the length of the # of inputs, where each value will be used instead of the input name for that inputs values in the auto-added column.</li></ul><p><strong>Parsing options:</strong></p><ul><li><code>missingstring</code>: either a <code>nothing</code>, <code>String</code>, or <code>Vector{String}</code> to use as sentinel values that will be parsed as <code>missing</code>; if <code>nothing</code> is passed, no sentinel/missing values will be parsed; by default, <code>missingstring=&quot;&quot;</code>, which means only an empty field (two consecutive delimiters) is considered <code>missing</code></li><li><code>delim=&#39;,&#39;</code>: a <code>Char</code> or <code>String</code> that indicates how columns are delimited in a file; if no argument is provided, parsing will try to detect the most consistent delimiter on the first 10 rows of the file</li><li><code>ignorerepeated::Bool=false</code>: whether repeated (consecutive/sequential) delimiters should be ignored while parsing; useful for fixed-width files with delimiter padding between cells</li><li><code>quoted::Bool=true</code>: whether parsing should check for <code>quotechar</code> at the start/end of cells</li><li><code>quotechar=&#39;&quot;&#39;</code>, <code>openquotechar</code>, <code>closequotechar</code>: a <code>Char</code> (or different start and end characters) that indicate a quoted field which may contain textual delimiters or newline characters</li><li><code>escapechar=&#39;&quot;&#39;</code>: the <code>Char</code> used to escape quote characters in a quoted field</li><li><code>dateformat::Union{String, Dates.DateFormat, Nothing, AbstractDict}</code>: a date format string to indicate how Date/DateTime columns are formatted for the entire file; if given as an <code>AbstractDict</code>, date format strings to indicate how the Date/DateTime columns corresponding to the keys are formatted. The Dict can map column index <code>Int</code>, or name <code>Symbol</code> or <code>String</code> to the format string for that column.</li><li><code>decimal=&#39;.&#39;</code>: a <code>Char</code> indicating how decimals are separated in floats, i.e. <code>3.14</code> uses <code>&#39;.&#39;</code>, or <code>3,14</code> uses a comma <code>&#39;,&#39;</code></li><li><code>groupmark=nothing</code>: optionally specify a single-byte character denoting the number grouping mark, this allows parsing of numbers that have, e.g., thousand separators (<code>1,000.00</code>).</li><li><code>truestrings</code>, <code>falsestrings</code>: <code>Vector{String}</code>s that indicate how <code>true</code> or <code>false</code> values are represented; by default <code>&quot;true&quot;, &quot;True&quot;, &quot;TRUE&quot;, &quot;T&quot;, &quot;1&quot;</code> are used to detect <code>true</code> and <code>&quot;false&quot;, &quot;False&quot;, &quot;FALSE&quot;, &quot;F&quot;, &quot;0&quot;</code> are used to detect <code>false</code>; note that columns with only <code>1</code> and <code>0</code> values will default to <code>Int64</code> column type unless explicitly requested to be <code>Bool</code> via <code>types</code> keyword argument</li><li><code>stripwhitespace=false</code>: if true, leading and trailing whitespace are stripped from string values, including column names</li></ul><p><strong>Column Type Options:</strong></p><ul><li><code>types</code>: a single <code>Type</code>, <code>AbstractVector</code> or <code>AbstractDict</code> of types, or a function of the form <code>(i, name) -&gt; Union{T, Nothing}</code> to be used for column types; if a single <code>Type</code> is provided, <em>all</em> columns will be parsed with that single type; an <code>AbstractDict</code> can map column index <code>Integer</code>, or name <code>Symbol</code> or <code>String</code> to type for a column, i.e. <code>Dict(1=&gt;Float64)</code> will set the first column as a <code>Float64</code>, <code>Dict(:column1=&gt;Float64)</code> will set the column named <code>column1</code> to <code>Float64</code> and, <code>Dict(&quot;column1&quot;=&gt;Float64)</code> will set the <code>column1</code> to <code>Float64</code>; if a <code>Vector</code> is provided, it must match the # of columns provided or detected in <code>header</code>. If a function is provided, it takes a column index and name as arguments, and should return the desired column type for the column, or <code>nothing</code> to signal the column&#39;s type should be detected while parsing.</li><li><code>typemap::IdDict{Type, Type}</code>: a mapping of a type that should be replaced in every instance with another type, i.e. <code>Dict(Float64=&gt;String)</code> would change every detected <code>Float64</code> column to be parsed as <code>String</code>; only &quot;standard&quot; types are allowed to be mapped to another type, i.e. <code>Int64</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Time</code>, and <code>Bool</code>. If a column of one of those types is &quot;detected&quot;, it will be mapped to the specified type.</li><li><code>pool::Union{Bool, Real, AbstractVector, AbstractDict, Function, Tuple{Float64, Int}}=(0.2, 500)</code>: [not supported by <code>CSV.Rows</code>] controls whether columns will be built as <code>PooledArray</code>; if <code>true</code>, all columns detected as <code>String</code> will be pooled; alternatively, the proportion of unique values below which <code>String</code> columns should be pooled (meaning that if the # of unique strings in a column is under 25%, <code>pool=0.25</code>, it will be pooled). If provided as a <code>Tuple{Float64, Int}</code> like <code>(0.2, 500)</code>, it represents the percent cardinality threshold as the 1st tuple element (<code>0.2</code>), and an upper limit for the # of unique values (<code>500</code>), under which the column will be pooled; this is the default (<code>pool=(0.2, 500)</code>). If an <code>AbstractVector</code>, each element should be <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> and the # of elements should match the # of columns in the dataset; if an <code>AbstractDict</code>, a <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> value can be provided for individual columns where the dict key is given as column index <code>Integer</code>, or column name as <code>Symbol</code> or <code>String</code>. If a function is provided, it should take a column index and name as 2 arguments, and return a <code>Bool</code>, <code>Real</code>, <code>Tuple{Float64, Int}</code>, or <code>nothing</code> for each column.</li><li><code>downcast::Bool=false</code>: controls whether columns detected as <code>Int64</code> will be &quot;downcast&quot; to the smallest possible integer type like <code>Int8</code>, <code>Int16</code>, <code>Int32</code>, etc.</li><li><code>stringtype=InlineStrings.InlineString</code>: controls how detected string columns will ultimately be returned; default is <code>InlineString</code>, which stores string data in a fixed-size primitive type that helps avoid excessive heap memory usage; if a column has values longer than 32 bytes, it will default to <code>String</code>. If <code>String</code> is passed, all string columns will just be normal <code>String</code> values. If <code>PosLenString</code> is passed, string columns will be returned as <code>PosLenStringVector</code>, which is a special &quot;lazy&quot; <code>AbstractVector</code> that acts as a &quot;view&quot; into the original file data. This can lead to the most efficient parsing times, but note that the &quot;view&quot; nature of <code>PosLenStringVector</code> makes it read-only, so operations like <code>push!</code>, <code>append!</code>, or <code>setindex!</code> are not supported. It also keeps a reference to the entire input dataset source, so trying to modify or delete the underlying file, for example, may fail</li><li><code>strict::Bool=false</code>: whether invalid values should throw a parsing error or be replaced with <code>missing</code></li><li><code>silencewarnings::Bool=false</code>: if <code>strict=false</code>, whether invalid value warnings should be silenced</li><li><code>maxwarnings::Int=100</code>: if more than <code>maxwarnings</code> number of warnings are printed while parsing, further warnings will be silenced by default; for multithreaded parsing, each parsing task will print up to <code>maxwarnings</code></li><li><code>debug::Bool=false</code>: passing <code>true</code> will result in many informational prints while a dataset is parsed; can be useful when reporting issues or figuring out what is going on internally while a dataset is parsed</li><li><code>validate::Bool=true</code>: whether or not to validate that columns specified in the <code>types</code>, <code>dateformat</code> and <code>pool</code> keywords are actually found in the data. If <code>false</code> no validation is done, meaning no error will be thrown if <code>types</code>/<code>dateformat</code>/<code>pool</code> specify settings for columns not actually found in the data.</li></ul><p><strong>Iteration options:</strong></p><ul><li><code>reusebuffer=false</code>: [only supported by <code>CSV.Rows</code>] while iterating, whether a single row buffer should be allocated and reused on each iteration; only use if each row will be iterated once and not re-used (e.g. it&#39;s not safe to use this option if doing <code>collect(CSV.Rows(file))</code> because only current iterated row is &quot;valid&quot;)</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/acd36a6df5677c159d569a4c799f88a52b9886b5/src/CSV.jl#L81-L112">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="CSV.File" href="#CSV.File"><code>CSV.File</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia hljs">CSV.File(input; kwargs...) =&gt; CSV.File</code></pre><p>Read a UTF-8 CSV input and return a <code>CSV.File</code> object, which is like a lightweight table/dataframe, allowing dot-access to columns and iterating rows. Satisfies the Tables.jl interface, so can be passed to any valid sink, yet to avoid unnecessary copies of data, use <code>CSV.read(input, sink; kwargs...)</code> instead if the <code>CSV.File</code> intermediate object isn&#39;t needed.</p><p>The <a href="reading.html#input"><code>input</code></a> argument can be one of:</p><ul><li>filename given as a string or FilePaths.jl type</li><li>a <code>Vector{UInt8}</code> or <code>SubArray{UInt8, 1, Vector{UInt8}}</code> byte buffer</li><li>a <code>CodeUnits</code> object, which wraps a <code>String</code>, like <code>codeunits(str)</code></li><li>a csv-formatted string can also be passed like <code>IOBuffer(str)</code></li><li>a <code>Cmd</code> or other <code>IO</code></li><li>a gzipped file (or gzipped data in any of the above), which will automatically be decompressed for parsing</li><li>a <code>Vector</code> of any of the above, which will parse and vertically concatenate each source, returning a single, &quot;long&quot; <code>CSV.File</code></li></ul><p>To read a csv file from a url, use the Downloads.jl stdlib or HTTP.jl package, where the resulting downloaded tempfile or <code>HTTP.Response</code> body can be passed like:</p><pre><code class="language-julia hljs">using Downloads, CSV
 f = CSV.File(Downloads.download(url))
 
 # or
@@ -34,6 +34,6 @@
 
 # load a csv file directly into an sqlite database table
 db = SQLite.DB()
-tbl = CSV.File(file) |&gt; SQLite.load!(db, &quot;sqlite_table&quot;)</code></pre><p><strong>Arguments</strong></p><p><strong>File layout options:</strong></p><ul><li><code>header=1</code>: how column names should be determined; if given as an <code>Integer</code>, indicates the row to parse for column names; as an <code>AbstractVector{&lt;:Integer}</code>, indicates a set of rows to be concatenated together as column names; <code>Vector{Symbol}</code> or <code>Vector{String}</code> give column names explicitly (should match # of columns in dataset); if a dataset doesn&#39;t have column names, either provide them as a <code>Vector</code>, or set <code>header=0</code> or <code>header=false</code> and column names will be auto-generated (<code>Column1</code>, <code>Column2</code>, etc.). Note that if a row number header and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the header row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header row will actually be the next non-commented row.</li><li><code>normalizenames::Bool=false</code>: whether column names should be &quot;normalized&quot; into valid Julia identifier symbols; useful when using the <code>tbl.col1</code> <code>getproperty</code> syntax or iterating rows and accessing column values of a row via <code>getproperty</code> (e.g. <code>row.col1</code>)</li><li><code>skipto::Integer</code>: specifies the row where the data starts in the csv file; by default, the next row after the <code>header</code> row(s) is used. If <code>header=0</code>, then the 1st row is assumed to be the start of data; providing a <code>skipto</code> argument does <em>not</em> affect the <code>header</code> argument. Note that if a row number <code>skipto</code> and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the data row will actually be the next non-commented row.</li><li><code>footerskip::Integer</code>: number of rows at the end of a file to skip parsing.  Do note that commented rows (see the <code>comment</code> keyword argument) <em>do not</em> count towards the row number provided for <code>footerskip</code>, they are completely ignored by the parser</li><li><code>transpose::Bool</code>: read a csv file &quot;transposed&quot;, i.e. each column is parsed as a row</li><li><code>comment::String</code>: string that will cause rows that begin with it to be skipped while parsing. Note that if a row number header or <code>skipto</code> and <code>comment</code> are provided, the header/data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header/data row will actually be the next non-commented row.</li><li><code>ignoreemptyrows::Bool=true</code>: whether empty rows in a file should be ignored (if <code>false</code>, each column will be assigned <code>missing</code> for that empty row)</li><li><code>select</code>: an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;selector&quot; function of the form <code>(i, name) -&gt; keep::Bool</code>; only columns in the collection or for which the selector function returns <code>true</code> will be parsed and accessible in the resulting <code>CSV.File</code>. Invalid values in <code>select</code> are ignored.</li><li><code>drop</code>: inverse of <code>select</code>; an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;drop&quot; function of the form <code>(i, name) -&gt; drop::Bool</code>; columns in the collection or for which the drop function returns <code>true</code> will ignored in the resulting <code>CSV.File</code>. Invalid values in <code>drop</code> are ignored.</li><li><code>limit</code>: an <code>Integer</code> to indicate a limited number of rows to parse in a csv file; use in combination with <code>skipto</code> to read a specific, contiguous chunk within a file; note for large files when multiple threads are used for parsing, the <code>limit</code> argument may not result in an exact # of rows parsed; use <code>ntasks=1</code> to ensure an exact limit if necessary</li><li><code>buffer_in_memory</code>: a <code>Bool</code>, default <code>false</code>, which controls whether a <code>Cmd</code>, <code>IO</code>, or gzipped source will be read/decompressed in memory vs. using a temporary file.</li><li><code>ntasks::Integer=Threads.nthreads()</code>: [not applicable to <code>CSV.Rows</code>] for multithreaded parsed files, this controls the number of tasks spawned to read a file in concurrent chunks; defaults to the # of threads Julia was started with (i.e. <code>JULIA_NUM_THREADS</code> environment variable or <code>julia -t N</code>); setting <code>ntasks=1</code> will avoid any calls to <code>Threads.@spawn</code> and just read the file serially on the main thread; a single thread will also be used for smaller files by default (&lt; 5_000 cells)</li><li><code>rows_to_check::Integer=30</code>: [not applicable to <code>CSV.Rows</code>] a multithreaded parsed file will be split up into <code>ntasks</code> # of equal chunks; <code>rows_to_check</code> controls the # of rows are checked to ensure parsing correctly found valid rows; for certain files with very large quoted text fields, <code>lines_to_check</code> may need to be higher (10, 30, etc.) to ensure parsing correctly finds these rows</li><li><code>source</code>: [only applicable for vector of inputs to <code>CSV.File</code>] a <code>Symbol</code>, <code>String</code>, or <code>Pair</code> of <code>Symbol</code> or <code>String</code> to <code>Vector</code>. As a single <code>Symbol</code> or <code>String</code>, provides the column name that will be added to the parsed columns, the values of the column will be the input &quot;name&quot; (usually file name) of the input from whence the value was parsed. As a <code>Pair</code>, the 2nd part of the pair should be a <code>Vector</code> of values matching the length of the # of inputs, where each value will be used instead of the input name for that inputs values in the auto-added column.</li></ul><p><strong>Parsing options:</strong></p><ul><li><code>missingstring</code>: either a <code>nothing</code>, <code>String</code>, or <code>Vector{String}</code> to use as sentinel values that will be parsed as <code>missing</code>; if <code>nothing</code> is passed, no sentinel/missing values will be parsed; by default, <code>missingstring=&quot;&quot;</code>, which means only an empty field (two consecutive delimiters) is considered <code>missing</code></li><li><code>delim=&#39;,&#39;</code>: a <code>Char</code> or <code>String</code> that indicates how columns are delimited in a file; if no argument is provided, parsing will try to detect the most consistent delimiter on the first 10 rows of the file</li><li><code>ignorerepeated::Bool=false</code>: whether repeated (consecutive/sequential) delimiters should be ignored while parsing; useful for fixed-width files with delimiter padding between cells</li><li><code>quoted::Bool=true</code>: whether parsing should check for <code>quotechar</code> at the start/end of cells</li><li><code>quotechar=&#39;&quot;&#39;</code>, <code>openquotechar</code>, <code>closequotechar</code>: a <code>Char</code> (or different start and end characters) that indicate a quoted field which may contain textual delimiters or newline characters</li><li><code>escapechar=&#39;&quot;&#39;</code>: the <code>Char</code> used to escape quote characters in a quoted field</li><li><code>dateformat::Union{String, Dates.DateFormat, Nothing, AbstractDict}</code>: a date format string to indicate how Date/DateTime columns are formatted for the entire file; if given as an <code>AbstractDict</code>, date format strings to indicate how the Date/DateTime columns corresponding to the keys are formatted. The Dict can map column index <code>Int</code>, or name <code>Symbol</code> or <code>String</code> to the format string for that column.</li><li><code>decimal=&#39;.&#39;</code>: a <code>Char</code> indicating how decimals are separated in floats, i.e. <code>3.14</code> uses <code>&#39;.&#39;</code>, or <code>3,14</code> uses a comma <code>&#39;,&#39;</code></li><li><code>groupmark=nothing</code>: optionally specify a single-byte character denoting the number grouping mark, this allows parsing of numbers that have, e.g., thousand separators (<code>1,000.00</code>).</li><li><code>truestrings</code>, <code>falsestrings</code>: <code>Vector{String}</code>s that indicate how <code>true</code> or <code>false</code> values are represented; by default <code>&quot;true&quot;, &quot;True&quot;, &quot;TRUE&quot;, &quot;T&quot;, &quot;1&quot;</code> are used to detect <code>true</code> and <code>&quot;false&quot;, &quot;False&quot;, &quot;FALSE&quot;, &quot;F&quot;, &quot;0&quot;</code> are used to detect <code>false</code>; note that columns with only <code>1</code> and <code>0</code> values will default to <code>Int64</code> column type unless explicitly requested to be <code>Bool</code> via <code>types</code> keyword argument</li><li><code>stripwhitespace=false</code>: if true, leading and trailing whitespace are stripped from string values, including column names</li></ul><p><strong>Column Type Options:</strong></p><ul><li><code>types</code>: a single <code>Type</code>, <code>AbstractVector</code> or <code>AbstractDict</code> of types, or a function of the form <code>(i, name) -&gt; Union{T, Nothing}</code> to be used for column types; if a single <code>Type</code> is provided, <em>all</em> columns will be parsed with that single type; an <code>AbstractDict</code> can map column index <code>Integer</code>, or name <code>Symbol</code> or <code>String</code> to type for a column, i.e. <code>Dict(1=&gt;Float64)</code> will set the first column as a <code>Float64</code>, <code>Dict(:column1=&gt;Float64)</code> will set the column named <code>column1</code> to <code>Float64</code> and, <code>Dict(&quot;column1&quot;=&gt;Float64)</code> will set the <code>column1</code> to <code>Float64</code>; if a <code>Vector</code> is provided, it must match the # of columns provided or detected in <code>header</code>. If a function is provided, it takes a column index and name as arguments, and should return the desired column type for the column, or <code>nothing</code> to signal the column&#39;s type should be detected while parsing.</li><li><code>typemap::IdDict{Type, Type}</code>: a mapping of a type that should be replaced in every instance with another type, i.e. <code>Dict(Float64=&gt;String)</code> would change every detected <code>Float64</code> column to be parsed as <code>String</code>; only &quot;standard&quot; types are allowed to be mapped to another type, i.e. <code>Int64</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Time</code>, and <code>Bool</code>. If a column of one of those types is &quot;detected&quot;, it will be mapped to the specified type.</li><li><code>pool::Union{Bool, Real, AbstractVector, AbstractDict, Function, Tuple{Float64, Int}}=(0.2, 500)</code>: [not supported by <code>CSV.Rows</code>] controls whether columns will be built as <code>PooledArray</code>; if <code>true</code>, all columns detected as <code>String</code> will be pooled; alternatively, the proportion of unique values below which <code>String</code> columns should be pooled (meaning that if the # of unique strings in a column is under 25%, <code>pool=0.25</code>, it will be pooled). If provided as a <code>Tuple{Float64, Int}</code> like <code>(0.2, 500)</code>, it represents the percent cardinality threshold as the 1st tuple element (<code>0.2</code>), and an upper limit for the # of unique values (<code>500</code>), under which the column will be pooled; this is the default (<code>pool=(0.2, 500)</code>). If an <code>AbstractVector</code>, each element should be <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> and the # of elements should match the # of columns in the dataset; if an <code>AbstractDict</code>, a <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> value can be provided for individual columns where the dict key is given as column index <code>Integer</code>, or column name as <code>Symbol</code> or <code>String</code>. If a function is provided, it should take a column index and name as 2 arguments, and return a <code>Bool</code>, <code>Real</code>, <code>Tuple{Float64, Int}</code>, or <code>nothing</code> for each column.</li><li><code>downcast::Bool=false</code>: controls whether columns detected as <code>Int64</code> will be &quot;downcast&quot; to the smallest possible integer type like <code>Int8</code>, <code>Int16</code>, <code>Int32</code>, etc.</li><li><code>stringtype=InlineStrings.InlineString</code>: controls how detected string columns will ultimately be returned; default is <code>InlineString</code>, which stores string data in a fixed-size primitive type that helps avoid excessive heap memory usage; if a column has values longer than 32 bytes, it will default to <code>String</code>. If <code>String</code> is passed, all string columns will just be normal <code>String</code> values. If <code>PosLenString</code> is passed, string columns will be returned as <code>PosLenStringVector</code>, which is a special &quot;lazy&quot; <code>AbstractVector</code> that acts as a &quot;view&quot; into the original file data. This can lead to the most efficient parsing times, but note that the &quot;view&quot; nature of <code>PosLenStringVector</code> makes it read-only, so operations like <code>push!</code>, <code>append!</code>, or <code>setindex!</code> are not supported. It also keeps a reference to the entire input dataset source, so trying to modify or delete the underlying file, for example, may fail</li><li><code>strict::Bool=false</code>: whether invalid values should throw a parsing error or be replaced with <code>missing</code></li><li><code>silencewarnings::Bool=false</code>: if <code>strict=false</code>, whether invalid value warnings should be silenced</li><li><code>maxwarnings::Int=100</code>: if more than <code>maxwarnings</code> number of warnings are printed while parsing, further warnings will be silenced by default; for multithreaded parsing, each parsing task will print up to <code>maxwarnings</code></li><li><code>debug::Bool=false</code>: passing <code>true</code> will result in many informational prints while a dataset is parsed; can be useful when reporting issues or figuring out what is going on internally while a dataset is parsed</li><li><code>validate::Bool=true</code>: whether or not to validate that columns specified in the <code>types</code>, <code>dateformat</code> and <code>pool</code> keywords are actually found in the data. If <code>false</code> no validation is done, meaning no error will be thrown if <code>types</code>/<code>dateformat</code>/<code>pool</code> specify settings for columns not actually found in the data.</li></ul><p><strong>Iteration options:</strong></p><ul><li><code>reusebuffer=false</code>: [only supported by <code>CSV.Rows</code>] while iterating, whether a single row buffer should be allocated and reused on each iteration; only use if each row will be iterated once and not re-used (e.g. it&#39;s not safe to use this option if doing <code>collect(CSV.Rows(file))</code> because only current iterated row is &quot;valid&quot;)</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/ba1f4d2e2b51017dbf7133e29b1105fd3c8116f2/src/file.jl#L34-L104">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="CSV.Chunks" href="#CSV.Chunks"><code>CSV.Chunks</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia hljs">CSV.Chunks(source; ntasks::Integer=Threads.nthreads(), kwargs...) =&gt; CSV.Chunks</code></pre><p>Returns a file &quot;chunk&quot; iterator. Accepts all the same inputs and keyword arguments as <a href="reading.html#CSV.File"><code>CSV.File</code></a>, see those docs for explanations of each keyword argument.</p><p>The <code>ntasks</code> keyword argument specifies how many chunks a file should be split up into, defaulting to the # of threads available to Julia (i.e. <code>JULIA_NUM_THREADS</code> environment variable) or 8 if Julia is run single-threaded.</p><p>Each iteration of <code>CSV.Chunks</code> produces the next chunk of a file as a <code>CSV.File</code>. While initial file metadata detection is done only once (to determine # of columns, column names, etc), each iteration does independent type inference on columns. This is significant as different chunks may end up with different column types than previous chunks as new values are encountered in the file. Note that, as with <code>CSV.File</code>, types may be passed manually via the <code>type</code> or <code>types</code> keyword arguments.</p><p>This functionality is new and thus considered experimental; please <a href="https://github.com/JuliaData/CSV.jl/issues/new">open an issue</a> if you run into any problems/bugs.</p><p><strong>Arguments</strong></p><p><strong>File layout options:</strong></p><ul><li><code>header=1</code>: how column names should be determined; if given as an <code>Integer</code>, indicates the row to parse for column names; as an <code>AbstractVector{&lt;:Integer}</code>, indicates a set of rows to be concatenated together as column names; <code>Vector{Symbol}</code> or <code>Vector{String}</code> give column names explicitly (should match # of columns in dataset); if a dataset doesn&#39;t have column names, either provide them as a <code>Vector</code>, or set <code>header=0</code> or <code>header=false</code> and column names will be auto-generated (<code>Column1</code>, <code>Column2</code>, etc.). Note that if a row number header and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the header row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header row will actually be the next non-commented row.</li><li><code>normalizenames::Bool=false</code>: whether column names should be &quot;normalized&quot; into valid Julia identifier symbols; useful when using the <code>tbl.col1</code> <code>getproperty</code> syntax or iterating rows and accessing column values of a row via <code>getproperty</code> (e.g. <code>row.col1</code>)</li><li><code>skipto::Integer</code>: specifies the row where the data starts in the csv file; by default, the next row after the <code>header</code> row(s) is used. If <code>header=0</code>, then the 1st row is assumed to be the start of data; providing a <code>skipto</code> argument does <em>not</em> affect the <code>header</code> argument. Note that if a row number <code>skipto</code> and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the data row will actually be the next non-commented row.</li><li><code>footerskip::Integer</code>: number of rows at the end of a file to skip parsing.  Do note that commented rows (see the <code>comment</code> keyword argument) <em>do not</em> count towards the row number provided for <code>footerskip</code>, they are completely ignored by the parser</li><li><code>transpose::Bool</code>: read a csv file &quot;transposed&quot;, i.e. each column is parsed as a row</li><li><code>comment::String</code>: string that will cause rows that begin with it to be skipped while parsing. Note that if a row number header or <code>skipto</code> and <code>comment</code> are provided, the header/data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header/data row will actually be the next non-commented row.</li><li><code>ignoreemptyrows::Bool=true</code>: whether empty rows in a file should be ignored (if <code>false</code>, each column will be assigned <code>missing</code> for that empty row)</li><li><code>select</code>: an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;selector&quot; function of the form <code>(i, name) -&gt; keep::Bool</code>; only columns in the collection or for which the selector function returns <code>true</code> will be parsed and accessible in the resulting <code>CSV.File</code>. Invalid values in <code>select</code> are ignored.</li><li><code>drop</code>: inverse of <code>select</code>; an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;drop&quot; function of the form <code>(i, name) -&gt; drop::Bool</code>; columns in the collection or for which the drop function returns <code>true</code> will ignored in the resulting <code>CSV.File</code>. Invalid values in <code>drop</code> are ignored.</li><li><code>limit</code>: an <code>Integer</code> to indicate a limited number of rows to parse in a csv file; use in combination with <code>skipto</code> to read a specific, contiguous chunk within a file; note for large files when multiple threads are used for parsing, the <code>limit</code> argument may not result in an exact # of rows parsed; use <code>ntasks=1</code> to ensure an exact limit if necessary</li><li><code>buffer_in_memory</code>: a <code>Bool</code>, default <code>false</code>, which controls whether a <code>Cmd</code>, <code>IO</code>, or gzipped source will be read/decompressed in memory vs. using a temporary file.</li><li><code>ntasks::Integer=Threads.nthreads()</code>: [not applicable to <code>CSV.Rows</code>] for multithreaded parsed files, this controls the number of tasks spawned to read a file in concurrent chunks; defaults to the # of threads Julia was started with (i.e. <code>JULIA_NUM_THREADS</code> environment variable or <code>julia -t N</code>); setting <code>ntasks=1</code> will avoid any calls to <code>Threads.@spawn</code> and just read the file serially on the main thread; a single thread will also be used for smaller files by default (&lt; 5_000 cells)</li><li><code>rows_to_check::Integer=30</code>: [not applicable to <code>CSV.Rows</code>] a multithreaded parsed file will be split up into <code>ntasks</code> # of equal chunks; <code>rows_to_check</code> controls the # of rows are checked to ensure parsing correctly found valid rows; for certain files with very large quoted text fields, <code>lines_to_check</code> may need to be higher (10, 30, etc.) to ensure parsing correctly finds these rows</li><li><code>source</code>: [only applicable for vector of inputs to <code>CSV.File</code>] a <code>Symbol</code>, <code>String</code>, or <code>Pair</code> of <code>Symbol</code> or <code>String</code> to <code>Vector</code>. As a single <code>Symbol</code> or <code>String</code>, provides the column name that will be added to the parsed columns, the values of the column will be the input &quot;name&quot; (usually file name) of the input from whence the value was parsed. As a <code>Pair</code>, the 2nd part of the pair should be a <code>Vector</code> of values matching the length of the # of inputs, where each value will be used instead of the input name for that inputs values in the auto-added column.</li></ul><p><strong>Parsing options:</strong></p><ul><li><code>missingstring</code>: either a <code>nothing</code>, <code>String</code>, or <code>Vector{String}</code> to use as sentinel values that will be parsed as <code>missing</code>; if <code>nothing</code> is passed, no sentinel/missing values will be parsed; by default, <code>missingstring=&quot;&quot;</code>, which means only an empty field (two consecutive delimiters) is considered <code>missing</code></li><li><code>delim=&#39;,&#39;</code>: a <code>Char</code> or <code>String</code> that indicates how columns are delimited in a file; if no argument is provided, parsing will try to detect the most consistent delimiter on the first 10 rows of the file</li><li><code>ignorerepeated::Bool=false</code>: whether repeated (consecutive/sequential) delimiters should be ignored while parsing; useful for fixed-width files with delimiter padding between cells</li><li><code>quoted::Bool=true</code>: whether parsing should check for <code>quotechar</code> at the start/end of cells</li><li><code>quotechar=&#39;&quot;&#39;</code>, <code>openquotechar</code>, <code>closequotechar</code>: a <code>Char</code> (or different start and end characters) that indicate a quoted field which may contain textual delimiters or newline characters</li><li><code>escapechar=&#39;&quot;&#39;</code>: the <code>Char</code> used to escape quote characters in a quoted field</li><li><code>dateformat::Union{String, Dates.DateFormat, Nothing, AbstractDict}</code>: a date format string to indicate how Date/DateTime columns are formatted for the entire file; if given as an <code>AbstractDict</code>, date format strings to indicate how the Date/DateTime columns corresponding to the keys are formatted. The Dict can map column index <code>Int</code>, or name <code>Symbol</code> or <code>String</code> to the format string for that column.</li><li><code>decimal=&#39;.&#39;</code>: a <code>Char</code> indicating how decimals are separated in floats, i.e. <code>3.14</code> uses <code>&#39;.&#39;</code>, or <code>3,14</code> uses a comma <code>&#39;,&#39;</code></li><li><code>groupmark=nothing</code>: optionally specify a single-byte character denoting the number grouping mark, this allows parsing of numbers that have, e.g., thousand separators (<code>1,000.00</code>).</li><li><code>truestrings</code>, <code>falsestrings</code>: <code>Vector{String}</code>s that indicate how <code>true</code> or <code>false</code> values are represented; by default <code>&quot;true&quot;, &quot;True&quot;, &quot;TRUE&quot;, &quot;T&quot;, &quot;1&quot;</code> are used to detect <code>true</code> and <code>&quot;false&quot;, &quot;False&quot;, &quot;FALSE&quot;, &quot;F&quot;, &quot;0&quot;</code> are used to detect <code>false</code>; note that columns with only <code>1</code> and <code>0</code> values will default to <code>Int64</code> column type unless explicitly requested to be <code>Bool</code> via <code>types</code> keyword argument</li><li><code>stripwhitespace=false</code>: if true, leading and trailing whitespace are stripped from string values, including column names</li></ul><p><strong>Column Type Options:</strong></p><ul><li><code>types</code>: a single <code>Type</code>, <code>AbstractVector</code> or <code>AbstractDict</code> of types, or a function of the form <code>(i, name) -&gt; Union{T, Nothing}</code> to be used for column types; if a single <code>Type</code> is provided, <em>all</em> columns will be parsed with that single type; an <code>AbstractDict</code> can map column index <code>Integer</code>, or name <code>Symbol</code> or <code>String</code> to type for a column, i.e. <code>Dict(1=&gt;Float64)</code> will set the first column as a <code>Float64</code>, <code>Dict(:column1=&gt;Float64)</code> will set the column named <code>column1</code> to <code>Float64</code> and, <code>Dict(&quot;column1&quot;=&gt;Float64)</code> will set the <code>column1</code> to <code>Float64</code>; if a <code>Vector</code> is provided, it must match the # of columns provided or detected in <code>header</code>. If a function is provided, it takes a column index and name as arguments, and should return the desired column type for the column, or <code>nothing</code> to signal the column&#39;s type should be detected while parsing.</li><li><code>typemap::IdDict{Type, Type}</code>: a mapping of a type that should be replaced in every instance with another type, i.e. <code>Dict(Float64=&gt;String)</code> would change every detected <code>Float64</code> column to be parsed as <code>String</code>; only &quot;standard&quot; types are allowed to be mapped to another type, i.e. <code>Int64</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Time</code>, and <code>Bool</code>. If a column of one of those types is &quot;detected&quot;, it will be mapped to the specified type.</li><li><code>pool::Union{Bool, Real, AbstractVector, AbstractDict, Function, Tuple{Float64, Int}}=(0.2, 500)</code>: [not supported by <code>CSV.Rows</code>] controls whether columns will be built as <code>PooledArray</code>; if <code>true</code>, all columns detected as <code>String</code> will be pooled; alternatively, the proportion of unique values below which <code>String</code> columns should be pooled (meaning that if the # of unique strings in a column is under 25%, <code>pool=0.25</code>, it will be pooled). If provided as a <code>Tuple{Float64, Int}</code> like <code>(0.2, 500)</code>, it represents the percent cardinality threshold as the 1st tuple element (<code>0.2</code>), and an upper limit for the # of unique values (<code>500</code>), under which the column will be pooled; this is the default (<code>pool=(0.2, 500)</code>). If an <code>AbstractVector</code>, each element should be <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> and the # of elements should match the # of columns in the dataset; if an <code>AbstractDict</code>, a <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> value can be provided for individual columns where the dict key is given as column index <code>Integer</code>, or column name as <code>Symbol</code> or <code>String</code>. If a function is provided, it should take a column index and name as 2 arguments, and return a <code>Bool</code>, <code>Real</code>, <code>Tuple{Float64, Int}</code>, or <code>nothing</code> for each column.</li><li><code>downcast::Bool=false</code>: controls whether columns detected as <code>Int64</code> will be &quot;downcast&quot; to the smallest possible integer type like <code>Int8</code>, <code>Int16</code>, <code>Int32</code>, etc.</li><li><code>stringtype=InlineStrings.InlineString</code>: controls how detected string columns will ultimately be returned; default is <code>InlineString</code>, which stores string data in a fixed-size primitive type that helps avoid excessive heap memory usage; if a column has values longer than 32 bytes, it will default to <code>String</code>. If <code>String</code> is passed, all string columns will just be normal <code>String</code> values. If <code>PosLenString</code> is passed, string columns will be returned as <code>PosLenStringVector</code>, which is a special &quot;lazy&quot; <code>AbstractVector</code> that acts as a &quot;view&quot; into the original file data. This can lead to the most efficient parsing times, but note that the &quot;view&quot; nature of <code>PosLenStringVector</code> makes it read-only, so operations like <code>push!</code>, <code>append!</code>, or <code>setindex!</code> are not supported. It also keeps a reference to the entire input dataset source, so trying to modify or delete the underlying file, for example, may fail</li><li><code>strict::Bool=false</code>: whether invalid values should throw a parsing error or be replaced with <code>missing</code></li><li><code>silencewarnings::Bool=false</code>: if <code>strict=false</code>, whether invalid value warnings should be silenced</li><li><code>maxwarnings::Int=100</code>: if more than <code>maxwarnings</code> number of warnings are printed while parsing, further warnings will be silenced by default; for multithreaded parsing, each parsing task will print up to <code>maxwarnings</code></li><li><code>debug::Bool=false</code>: passing <code>true</code> will result in many informational prints while a dataset is parsed; can be useful when reporting issues or figuring out what is going on internally while a dataset is parsed</li><li><code>validate::Bool=true</code>: whether or not to validate that columns specified in the <code>types</code>, <code>dateformat</code> and <code>pool</code> keywords are actually found in the data. If <code>false</code> no validation is done, meaning no error will be thrown if <code>types</code>/<code>dateformat</code>/<code>pool</code> specify settings for columns not actually found in the data.</li></ul><p><strong>Iteration options:</strong></p><ul><li><code>reusebuffer=false</code>: [only supported by <code>CSV.Rows</code>] while iterating, whether a single row buffer should be allocated and reused on each iteration; only use if each row will be iterated once and not re-used (e.g. it&#39;s not safe to use this option if doing <code>collect(CSV.Rows(file))</code> because only current iterated row is &quot;valid&quot;)</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/ba1f4d2e2b51017dbf7133e29b1105fd3c8116f2/src/chunks.jl#L6-L26">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="CSV.Rows" href="#CSV.Rows"><code>CSV.Rows</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia hljs">CSV.Rows(source; kwargs...) =&gt; CSV.Rows</code></pre><p>Read a csv input returning a <code>CSV.Rows</code> object.</p><p>The <a href="reading.html#input"><code>input</code></a> argument can be one of:</p><ul><li>filename given as a string or FilePaths.jl type</li><li>a <code>Vector{UInt8}</code> or <code>SubArray{UInt8, 1, Vector{UInt8}}</code> byte buffer</li><li>a <code>CodeUnits</code> object, which wraps a <code>String</code>, like <code>codeunits(str)</code></li><li>a csv-formatted string can also be passed like <code>IOBuffer(str)</code></li><li>a <code>Cmd</code> or other <code>IO</code></li><li>a gzipped file (or gzipped data in any of the above), which will automatically be decompressed for parsing</li></ul><p>To read a csv file from a url, use the HTTP.jl package, where the <code>HTTP.Response</code> body can be passed like:</p><pre><code class="language-julia hljs">f = CSV.Rows(HTTP.get(url).body)</code></pre><p>For other <code>IO</code> or <code>Cmd</code> inputs, you can pass them like: <code>f = CSV.Rows(read(obj))</code>.</p><p>While similar to <a href="reading.html#CSV.File"><code>CSV.File</code></a>, <code>CSV.Rows</code> provides a slightly different interface, the tradeoffs including:</p><ul><li>Very minimal memory footprint; while iterating, only the current row values are buffered</li><li>Only provides row access via iteration; to access columns, one can stream the rows into a table type</li><li>Performs no type inference; each column/cell is essentially treated as <code>Union{String, Missing}</code>, users can utilize the performant <code>Parsers.parse(T, str)</code> to convert values to a more specific type if needed, or pass types upon construction using the <code>type</code> or <code>types</code> keyword arguments</li></ul><p>Opens the file and uses passed arguments to detect the number of columns, ***but not*** column types (column types default to <code>String</code> unless otherwise manually provided). The returned <code>CSV.Rows</code> object supports the <a href="https://github.com/JuliaData/Tables.jl">Tables.jl</a> interface and can iterate rows. Each row object supports <code>propertynames</code>, <code>getproperty</code>, and <code>getindex</code> to access individual row values. Note that duplicate column names will be detected and adjusted to ensure uniqueness (duplicate column name <code>a</code> will become <code>a_1</code>). For example, one could iterate over a csv file with column names <code>a</code>, <code>b</code>, and <code>c</code> by doing:</p><pre><code class="language-julia hljs">for row in CSV.Rows(file)
+tbl = CSV.File(file) |&gt; SQLite.load!(db, &quot;sqlite_table&quot;)</code></pre><p><strong>Arguments</strong></p><p><strong>File layout options:</strong></p><ul><li><code>header=1</code>: how column names should be determined; if given as an <code>Integer</code>, indicates the row to parse for column names; as an <code>AbstractVector{&lt;:Integer}</code>, indicates a set of rows to be concatenated together as column names; <code>Vector{Symbol}</code> or <code>Vector{String}</code> give column names explicitly (should match # of columns in dataset); if a dataset doesn&#39;t have column names, either provide them as a <code>Vector</code>, or set <code>header=0</code> or <code>header=false</code> and column names will be auto-generated (<code>Column1</code>, <code>Column2</code>, etc.). Note that if a row number header and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the header row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header row will actually be the next non-commented row.</li><li><code>normalizenames::Bool=false</code>: whether column names should be &quot;normalized&quot; into valid Julia identifier symbols; useful when using the <code>tbl.col1</code> <code>getproperty</code> syntax or iterating rows and accessing column values of a row via <code>getproperty</code> (e.g. <code>row.col1</code>)</li><li><code>skipto::Integer</code>: specifies the row where the data starts in the csv file; by default, the next row after the <code>header</code> row(s) is used. If <code>header=0</code>, then the 1st row is assumed to be the start of data; providing a <code>skipto</code> argument does <em>not</em> affect the <code>header</code> argument. Note that if a row number <code>skipto</code> and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the data row will actually be the next non-commented row.</li><li><code>footerskip::Integer</code>: number of rows at the end of a file to skip parsing.  Do note that commented rows (see the <code>comment</code> keyword argument) <em>do not</em> count towards the row number provided for <code>footerskip</code>, they are completely ignored by the parser</li><li><code>transpose::Bool</code>: read a csv file &quot;transposed&quot;, i.e. each column is parsed as a row</li><li><code>comment::String</code>: string that will cause rows that begin with it to be skipped while parsing. Note that if a row number header or <code>skipto</code> and <code>comment</code> are provided, the header/data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header/data row will actually be the next non-commented row.</li><li><code>ignoreemptyrows::Bool=true</code>: whether empty rows in a file should be ignored (if <code>false</code>, each column will be assigned <code>missing</code> for that empty row)</li><li><code>select</code>: an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;selector&quot; function of the form <code>(i, name) -&gt; keep::Bool</code>; only columns in the collection or for which the selector function returns <code>true</code> will be parsed and accessible in the resulting <code>CSV.File</code>. Invalid values in <code>select</code> are ignored.</li><li><code>drop</code>: inverse of <code>select</code>; an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;drop&quot; function of the form <code>(i, name) -&gt; drop::Bool</code>; columns in the collection or for which the drop function returns <code>true</code> will ignored in the resulting <code>CSV.File</code>. Invalid values in <code>drop</code> are ignored.</li><li><code>limit</code>: an <code>Integer</code> to indicate a limited number of rows to parse in a csv file; use in combination with <code>skipto</code> to read a specific, contiguous chunk within a file; note for large files when multiple threads are used for parsing, the <code>limit</code> argument may not result in an exact # of rows parsed; use <code>ntasks=1</code> to ensure an exact limit if necessary</li><li><code>buffer_in_memory</code>: a <code>Bool</code>, default <code>false</code>, which controls whether a <code>Cmd</code>, <code>IO</code>, or gzipped source will be read/decompressed in memory vs. using a temporary file.</li><li><code>ntasks::Integer=Threads.nthreads()</code>: [not applicable to <code>CSV.Rows</code>] for multithreaded parsed files, this controls the number of tasks spawned to read a file in concurrent chunks; defaults to the # of threads Julia was started with (i.e. <code>JULIA_NUM_THREADS</code> environment variable or <code>julia -t N</code>); setting <code>ntasks=1</code> will avoid any calls to <code>Threads.@spawn</code> and just read the file serially on the main thread; a single thread will also be used for smaller files by default (&lt; 5_000 cells)</li><li><code>rows_to_check::Integer=30</code>: [not applicable to <code>CSV.Rows</code>] a multithreaded parsed file will be split up into <code>ntasks</code> # of equal chunks; <code>rows_to_check</code> controls the # of rows are checked to ensure parsing correctly found valid rows; for certain files with very large quoted text fields, <code>lines_to_check</code> may need to be higher (10, 30, etc.) to ensure parsing correctly finds these rows</li><li><code>source</code>: [only applicable for vector of inputs to <code>CSV.File</code>] a <code>Symbol</code>, <code>String</code>, or <code>Pair</code> of <code>Symbol</code> or <code>String</code> to <code>Vector</code>. As a single <code>Symbol</code> or <code>String</code>, provides the column name that will be added to the parsed columns, the values of the column will be the input &quot;name&quot; (usually file name) of the input from whence the value was parsed. As a <code>Pair</code>, the 2nd part of the pair should be a <code>Vector</code> of values matching the length of the # of inputs, where each value will be used instead of the input name for that inputs values in the auto-added column.</li></ul><p><strong>Parsing options:</strong></p><ul><li><code>missingstring</code>: either a <code>nothing</code>, <code>String</code>, or <code>Vector{String}</code> to use as sentinel values that will be parsed as <code>missing</code>; if <code>nothing</code> is passed, no sentinel/missing values will be parsed; by default, <code>missingstring=&quot;&quot;</code>, which means only an empty field (two consecutive delimiters) is considered <code>missing</code></li><li><code>delim=&#39;,&#39;</code>: a <code>Char</code> or <code>String</code> that indicates how columns are delimited in a file; if no argument is provided, parsing will try to detect the most consistent delimiter on the first 10 rows of the file</li><li><code>ignorerepeated::Bool=false</code>: whether repeated (consecutive/sequential) delimiters should be ignored while parsing; useful for fixed-width files with delimiter padding between cells</li><li><code>quoted::Bool=true</code>: whether parsing should check for <code>quotechar</code> at the start/end of cells</li><li><code>quotechar=&#39;&quot;&#39;</code>, <code>openquotechar</code>, <code>closequotechar</code>: a <code>Char</code> (or different start and end characters) that indicate a quoted field which may contain textual delimiters or newline characters</li><li><code>escapechar=&#39;&quot;&#39;</code>: the <code>Char</code> used to escape quote characters in a quoted field</li><li><code>dateformat::Union{String, Dates.DateFormat, Nothing, AbstractDict}</code>: a date format string to indicate how Date/DateTime columns are formatted for the entire file; if given as an <code>AbstractDict</code>, date format strings to indicate how the Date/DateTime columns corresponding to the keys are formatted. The Dict can map column index <code>Int</code>, or name <code>Symbol</code> or <code>String</code> to the format string for that column.</li><li><code>decimal=&#39;.&#39;</code>: a <code>Char</code> indicating how decimals are separated in floats, i.e. <code>3.14</code> uses <code>&#39;.&#39;</code>, or <code>3,14</code> uses a comma <code>&#39;,&#39;</code></li><li><code>groupmark=nothing</code>: optionally specify a single-byte character denoting the number grouping mark, this allows parsing of numbers that have, e.g., thousand separators (<code>1,000.00</code>).</li><li><code>truestrings</code>, <code>falsestrings</code>: <code>Vector{String}</code>s that indicate how <code>true</code> or <code>false</code> values are represented; by default <code>&quot;true&quot;, &quot;True&quot;, &quot;TRUE&quot;, &quot;T&quot;, &quot;1&quot;</code> are used to detect <code>true</code> and <code>&quot;false&quot;, &quot;False&quot;, &quot;FALSE&quot;, &quot;F&quot;, &quot;0&quot;</code> are used to detect <code>false</code>; note that columns with only <code>1</code> and <code>0</code> values will default to <code>Int64</code> column type unless explicitly requested to be <code>Bool</code> via <code>types</code> keyword argument</li><li><code>stripwhitespace=false</code>: if true, leading and trailing whitespace are stripped from string values, including column names</li></ul><p><strong>Column Type Options:</strong></p><ul><li><code>types</code>: a single <code>Type</code>, <code>AbstractVector</code> or <code>AbstractDict</code> of types, or a function of the form <code>(i, name) -&gt; Union{T, Nothing}</code> to be used for column types; if a single <code>Type</code> is provided, <em>all</em> columns will be parsed with that single type; an <code>AbstractDict</code> can map column index <code>Integer</code>, or name <code>Symbol</code> or <code>String</code> to type for a column, i.e. <code>Dict(1=&gt;Float64)</code> will set the first column as a <code>Float64</code>, <code>Dict(:column1=&gt;Float64)</code> will set the column named <code>column1</code> to <code>Float64</code> and, <code>Dict(&quot;column1&quot;=&gt;Float64)</code> will set the <code>column1</code> to <code>Float64</code>; if a <code>Vector</code> is provided, it must match the # of columns provided or detected in <code>header</code>. If a function is provided, it takes a column index and name as arguments, and should return the desired column type for the column, or <code>nothing</code> to signal the column&#39;s type should be detected while parsing.</li><li><code>typemap::IdDict{Type, Type}</code>: a mapping of a type that should be replaced in every instance with another type, i.e. <code>Dict(Float64=&gt;String)</code> would change every detected <code>Float64</code> column to be parsed as <code>String</code>; only &quot;standard&quot; types are allowed to be mapped to another type, i.e. <code>Int64</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Time</code>, and <code>Bool</code>. If a column of one of those types is &quot;detected&quot;, it will be mapped to the specified type.</li><li><code>pool::Union{Bool, Real, AbstractVector, AbstractDict, Function, Tuple{Float64, Int}}=(0.2, 500)</code>: [not supported by <code>CSV.Rows</code>] controls whether columns will be built as <code>PooledArray</code>; if <code>true</code>, all columns detected as <code>String</code> will be pooled; alternatively, the proportion of unique values below which <code>String</code> columns should be pooled (meaning that if the # of unique strings in a column is under 25%, <code>pool=0.25</code>, it will be pooled). If provided as a <code>Tuple{Float64, Int}</code> like <code>(0.2, 500)</code>, it represents the percent cardinality threshold as the 1st tuple element (<code>0.2</code>), and an upper limit for the # of unique values (<code>500</code>), under which the column will be pooled; this is the default (<code>pool=(0.2, 500)</code>). If an <code>AbstractVector</code>, each element should be <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> and the # of elements should match the # of columns in the dataset; if an <code>AbstractDict</code>, a <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> value can be provided for individual columns where the dict key is given as column index <code>Integer</code>, or column name as <code>Symbol</code> or <code>String</code>. If a function is provided, it should take a column index and name as 2 arguments, and return a <code>Bool</code>, <code>Real</code>, <code>Tuple{Float64, Int}</code>, or <code>nothing</code> for each column.</li><li><code>downcast::Bool=false</code>: controls whether columns detected as <code>Int64</code> will be &quot;downcast&quot; to the smallest possible integer type like <code>Int8</code>, <code>Int16</code>, <code>Int32</code>, etc.</li><li><code>stringtype=InlineStrings.InlineString</code>: controls how detected string columns will ultimately be returned; default is <code>InlineString</code>, which stores string data in a fixed-size primitive type that helps avoid excessive heap memory usage; if a column has values longer than 32 bytes, it will default to <code>String</code>. If <code>String</code> is passed, all string columns will just be normal <code>String</code> values. If <code>PosLenString</code> is passed, string columns will be returned as <code>PosLenStringVector</code>, which is a special &quot;lazy&quot; <code>AbstractVector</code> that acts as a &quot;view&quot; into the original file data. This can lead to the most efficient parsing times, but note that the &quot;view&quot; nature of <code>PosLenStringVector</code> makes it read-only, so operations like <code>push!</code>, <code>append!</code>, or <code>setindex!</code> are not supported. It also keeps a reference to the entire input dataset source, so trying to modify or delete the underlying file, for example, may fail</li><li><code>strict::Bool=false</code>: whether invalid values should throw a parsing error or be replaced with <code>missing</code></li><li><code>silencewarnings::Bool=false</code>: if <code>strict=false</code>, whether invalid value warnings should be silenced</li><li><code>maxwarnings::Int=100</code>: if more than <code>maxwarnings</code> number of warnings are printed while parsing, further warnings will be silenced by default; for multithreaded parsing, each parsing task will print up to <code>maxwarnings</code></li><li><code>debug::Bool=false</code>: passing <code>true</code> will result in many informational prints while a dataset is parsed; can be useful when reporting issues or figuring out what is going on internally while a dataset is parsed</li><li><code>validate::Bool=true</code>: whether or not to validate that columns specified in the <code>types</code>, <code>dateformat</code> and <code>pool</code> keywords are actually found in the data. If <code>false</code> no validation is done, meaning no error will be thrown if <code>types</code>/<code>dateformat</code>/<code>pool</code> specify settings for columns not actually found in the data.</li></ul><p><strong>Iteration options:</strong></p><ul><li><code>reusebuffer=false</code>: [only supported by <code>CSV.Rows</code>] while iterating, whether a single row buffer should be allocated and reused on each iteration; only use if each row will be iterated once and not re-used (e.g. it&#39;s not safe to use this option if doing <code>collect(CSV.Rows(file))</code> because only current iterated row is &quot;valid&quot;)</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/acd36a6df5677c159d569a4c799f88a52b9886b5/src/file.jl#L34-L104">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="CSV.Chunks" href="#CSV.Chunks"><code>CSV.Chunks</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia hljs">CSV.Chunks(source; ntasks::Integer=Threads.nthreads(), kwargs...) =&gt; CSV.Chunks</code></pre><p>Returns a file &quot;chunk&quot; iterator. Accepts all the same inputs and keyword arguments as <a href="reading.html#CSV.File"><code>CSV.File</code></a>, see those docs for explanations of each keyword argument.</p><p>The <code>ntasks</code> keyword argument specifies how many chunks a file should be split up into, defaulting to the # of threads available to Julia (i.e. <code>JULIA_NUM_THREADS</code> environment variable) or 8 if Julia is run single-threaded.</p><p>Each iteration of <code>CSV.Chunks</code> produces the next chunk of a file as a <code>CSV.File</code>. While initial file metadata detection is done only once (to determine # of columns, column names, etc), each iteration does independent type inference on columns. This is significant as different chunks may end up with different column types than previous chunks as new values are encountered in the file. Note that, as with <code>CSV.File</code>, types may be passed manually via the <code>type</code> or <code>types</code> keyword arguments.</p><p>This functionality is new and thus considered experimental; please <a href="https://github.com/JuliaData/CSV.jl/issues/new">open an issue</a> if you run into any problems/bugs.</p><p><strong>Arguments</strong></p><p><strong>File layout options:</strong></p><ul><li><code>header=1</code>: how column names should be determined; if given as an <code>Integer</code>, indicates the row to parse for column names; as an <code>AbstractVector{&lt;:Integer}</code>, indicates a set of rows to be concatenated together as column names; <code>Vector{Symbol}</code> or <code>Vector{String}</code> give column names explicitly (should match # of columns in dataset); if a dataset doesn&#39;t have column names, either provide them as a <code>Vector</code>, or set <code>header=0</code> or <code>header=false</code> and column names will be auto-generated (<code>Column1</code>, <code>Column2</code>, etc.). Note that if a row number header and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the header row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header row will actually be the next non-commented row.</li><li><code>normalizenames::Bool=false</code>: whether column names should be &quot;normalized&quot; into valid Julia identifier symbols; useful when using the <code>tbl.col1</code> <code>getproperty</code> syntax or iterating rows and accessing column values of a row via <code>getproperty</code> (e.g. <code>row.col1</code>)</li><li><code>skipto::Integer</code>: specifies the row where the data starts in the csv file; by default, the next row after the <code>header</code> row(s) is used. If <code>header=0</code>, then the 1st row is assumed to be the start of data; providing a <code>skipto</code> argument does <em>not</em> affect the <code>header</code> argument. Note that if a row number <code>skipto</code> and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the data row will actually be the next non-commented row.</li><li><code>footerskip::Integer</code>: number of rows at the end of a file to skip parsing.  Do note that commented rows (see the <code>comment</code> keyword argument) <em>do not</em> count towards the row number provided for <code>footerskip</code>, they are completely ignored by the parser</li><li><code>transpose::Bool</code>: read a csv file &quot;transposed&quot;, i.e. each column is parsed as a row</li><li><code>comment::String</code>: string that will cause rows that begin with it to be skipped while parsing. Note that if a row number header or <code>skipto</code> and <code>comment</code> are provided, the header/data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header/data row will actually be the next non-commented row.</li><li><code>ignoreemptyrows::Bool=true</code>: whether empty rows in a file should be ignored (if <code>false</code>, each column will be assigned <code>missing</code> for that empty row)</li><li><code>select</code>: an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;selector&quot; function of the form <code>(i, name) -&gt; keep::Bool</code>; only columns in the collection or for which the selector function returns <code>true</code> will be parsed and accessible in the resulting <code>CSV.File</code>. Invalid values in <code>select</code> are ignored.</li><li><code>drop</code>: inverse of <code>select</code>; an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;drop&quot; function of the form <code>(i, name) -&gt; drop::Bool</code>; columns in the collection or for which the drop function returns <code>true</code> will ignored in the resulting <code>CSV.File</code>. Invalid values in <code>drop</code> are ignored.</li><li><code>limit</code>: an <code>Integer</code> to indicate a limited number of rows to parse in a csv file; use in combination with <code>skipto</code> to read a specific, contiguous chunk within a file; note for large files when multiple threads are used for parsing, the <code>limit</code> argument may not result in an exact # of rows parsed; use <code>ntasks=1</code> to ensure an exact limit if necessary</li><li><code>buffer_in_memory</code>: a <code>Bool</code>, default <code>false</code>, which controls whether a <code>Cmd</code>, <code>IO</code>, or gzipped source will be read/decompressed in memory vs. using a temporary file.</li><li><code>ntasks::Integer=Threads.nthreads()</code>: [not applicable to <code>CSV.Rows</code>] for multithreaded parsed files, this controls the number of tasks spawned to read a file in concurrent chunks; defaults to the # of threads Julia was started with (i.e. <code>JULIA_NUM_THREADS</code> environment variable or <code>julia -t N</code>); setting <code>ntasks=1</code> will avoid any calls to <code>Threads.@spawn</code> and just read the file serially on the main thread; a single thread will also be used for smaller files by default (&lt; 5_000 cells)</li><li><code>rows_to_check::Integer=30</code>: [not applicable to <code>CSV.Rows</code>] a multithreaded parsed file will be split up into <code>ntasks</code> # of equal chunks; <code>rows_to_check</code> controls the # of rows are checked to ensure parsing correctly found valid rows; for certain files with very large quoted text fields, <code>lines_to_check</code> may need to be higher (10, 30, etc.) to ensure parsing correctly finds these rows</li><li><code>source</code>: [only applicable for vector of inputs to <code>CSV.File</code>] a <code>Symbol</code>, <code>String</code>, or <code>Pair</code> of <code>Symbol</code> or <code>String</code> to <code>Vector</code>. As a single <code>Symbol</code> or <code>String</code>, provides the column name that will be added to the parsed columns, the values of the column will be the input &quot;name&quot; (usually file name) of the input from whence the value was parsed. As a <code>Pair</code>, the 2nd part of the pair should be a <code>Vector</code> of values matching the length of the # of inputs, where each value will be used instead of the input name for that inputs values in the auto-added column.</li></ul><p><strong>Parsing options:</strong></p><ul><li><code>missingstring</code>: either a <code>nothing</code>, <code>String</code>, or <code>Vector{String}</code> to use as sentinel values that will be parsed as <code>missing</code>; if <code>nothing</code> is passed, no sentinel/missing values will be parsed; by default, <code>missingstring=&quot;&quot;</code>, which means only an empty field (two consecutive delimiters) is considered <code>missing</code></li><li><code>delim=&#39;,&#39;</code>: a <code>Char</code> or <code>String</code> that indicates how columns are delimited in a file; if no argument is provided, parsing will try to detect the most consistent delimiter on the first 10 rows of the file</li><li><code>ignorerepeated::Bool=false</code>: whether repeated (consecutive/sequential) delimiters should be ignored while parsing; useful for fixed-width files with delimiter padding between cells</li><li><code>quoted::Bool=true</code>: whether parsing should check for <code>quotechar</code> at the start/end of cells</li><li><code>quotechar=&#39;&quot;&#39;</code>, <code>openquotechar</code>, <code>closequotechar</code>: a <code>Char</code> (or different start and end characters) that indicate a quoted field which may contain textual delimiters or newline characters</li><li><code>escapechar=&#39;&quot;&#39;</code>: the <code>Char</code> used to escape quote characters in a quoted field</li><li><code>dateformat::Union{String, Dates.DateFormat, Nothing, AbstractDict}</code>: a date format string to indicate how Date/DateTime columns are formatted for the entire file; if given as an <code>AbstractDict</code>, date format strings to indicate how the Date/DateTime columns corresponding to the keys are formatted. The Dict can map column index <code>Int</code>, or name <code>Symbol</code> or <code>String</code> to the format string for that column.</li><li><code>decimal=&#39;.&#39;</code>: a <code>Char</code> indicating how decimals are separated in floats, i.e. <code>3.14</code> uses <code>&#39;.&#39;</code>, or <code>3,14</code> uses a comma <code>&#39;,&#39;</code></li><li><code>groupmark=nothing</code>: optionally specify a single-byte character denoting the number grouping mark, this allows parsing of numbers that have, e.g., thousand separators (<code>1,000.00</code>).</li><li><code>truestrings</code>, <code>falsestrings</code>: <code>Vector{String}</code>s that indicate how <code>true</code> or <code>false</code> values are represented; by default <code>&quot;true&quot;, &quot;True&quot;, &quot;TRUE&quot;, &quot;T&quot;, &quot;1&quot;</code> are used to detect <code>true</code> and <code>&quot;false&quot;, &quot;False&quot;, &quot;FALSE&quot;, &quot;F&quot;, &quot;0&quot;</code> are used to detect <code>false</code>; note that columns with only <code>1</code> and <code>0</code> values will default to <code>Int64</code> column type unless explicitly requested to be <code>Bool</code> via <code>types</code> keyword argument</li><li><code>stripwhitespace=false</code>: if true, leading and trailing whitespace are stripped from string values, including column names</li></ul><p><strong>Column Type Options:</strong></p><ul><li><code>types</code>: a single <code>Type</code>, <code>AbstractVector</code> or <code>AbstractDict</code> of types, or a function of the form <code>(i, name) -&gt; Union{T, Nothing}</code> to be used for column types; if a single <code>Type</code> is provided, <em>all</em> columns will be parsed with that single type; an <code>AbstractDict</code> can map column index <code>Integer</code>, or name <code>Symbol</code> or <code>String</code> to type for a column, i.e. <code>Dict(1=&gt;Float64)</code> will set the first column as a <code>Float64</code>, <code>Dict(:column1=&gt;Float64)</code> will set the column named <code>column1</code> to <code>Float64</code> and, <code>Dict(&quot;column1&quot;=&gt;Float64)</code> will set the <code>column1</code> to <code>Float64</code>; if a <code>Vector</code> is provided, it must match the # of columns provided or detected in <code>header</code>. If a function is provided, it takes a column index and name as arguments, and should return the desired column type for the column, or <code>nothing</code> to signal the column&#39;s type should be detected while parsing.</li><li><code>typemap::IdDict{Type, Type}</code>: a mapping of a type that should be replaced in every instance with another type, i.e. <code>Dict(Float64=&gt;String)</code> would change every detected <code>Float64</code> column to be parsed as <code>String</code>; only &quot;standard&quot; types are allowed to be mapped to another type, i.e. <code>Int64</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Time</code>, and <code>Bool</code>. If a column of one of those types is &quot;detected&quot;, it will be mapped to the specified type.</li><li><code>pool::Union{Bool, Real, AbstractVector, AbstractDict, Function, Tuple{Float64, Int}}=(0.2, 500)</code>: [not supported by <code>CSV.Rows</code>] controls whether columns will be built as <code>PooledArray</code>; if <code>true</code>, all columns detected as <code>String</code> will be pooled; alternatively, the proportion of unique values below which <code>String</code> columns should be pooled (meaning that if the # of unique strings in a column is under 25%, <code>pool=0.25</code>, it will be pooled). If provided as a <code>Tuple{Float64, Int}</code> like <code>(0.2, 500)</code>, it represents the percent cardinality threshold as the 1st tuple element (<code>0.2</code>), and an upper limit for the # of unique values (<code>500</code>), under which the column will be pooled; this is the default (<code>pool=(0.2, 500)</code>). If an <code>AbstractVector</code>, each element should be <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> and the # of elements should match the # of columns in the dataset; if an <code>AbstractDict</code>, a <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> value can be provided for individual columns where the dict key is given as column index <code>Integer</code>, or column name as <code>Symbol</code> or <code>String</code>. If a function is provided, it should take a column index and name as 2 arguments, and return a <code>Bool</code>, <code>Real</code>, <code>Tuple{Float64, Int}</code>, or <code>nothing</code> for each column.</li><li><code>downcast::Bool=false</code>: controls whether columns detected as <code>Int64</code> will be &quot;downcast&quot; to the smallest possible integer type like <code>Int8</code>, <code>Int16</code>, <code>Int32</code>, etc.</li><li><code>stringtype=InlineStrings.InlineString</code>: controls how detected string columns will ultimately be returned; default is <code>InlineString</code>, which stores string data in a fixed-size primitive type that helps avoid excessive heap memory usage; if a column has values longer than 32 bytes, it will default to <code>String</code>. If <code>String</code> is passed, all string columns will just be normal <code>String</code> values. If <code>PosLenString</code> is passed, string columns will be returned as <code>PosLenStringVector</code>, which is a special &quot;lazy&quot; <code>AbstractVector</code> that acts as a &quot;view&quot; into the original file data. This can lead to the most efficient parsing times, but note that the &quot;view&quot; nature of <code>PosLenStringVector</code> makes it read-only, so operations like <code>push!</code>, <code>append!</code>, or <code>setindex!</code> are not supported. It also keeps a reference to the entire input dataset source, so trying to modify or delete the underlying file, for example, may fail</li><li><code>strict::Bool=false</code>: whether invalid values should throw a parsing error or be replaced with <code>missing</code></li><li><code>silencewarnings::Bool=false</code>: if <code>strict=false</code>, whether invalid value warnings should be silenced</li><li><code>maxwarnings::Int=100</code>: if more than <code>maxwarnings</code> number of warnings are printed while parsing, further warnings will be silenced by default; for multithreaded parsing, each parsing task will print up to <code>maxwarnings</code></li><li><code>debug::Bool=false</code>: passing <code>true</code> will result in many informational prints while a dataset is parsed; can be useful when reporting issues or figuring out what is going on internally while a dataset is parsed</li><li><code>validate::Bool=true</code>: whether or not to validate that columns specified in the <code>types</code>, <code>dateformat</code> and <code>pool</code> keywords are actually found in the data. If <code>false</code> no validation is done, meaning no error will be thrown if <code>types</code>/<code>dateformat</code>/<code>pool</code> specify settings for columns not actually found in the data.</li></ul><p><strong>Iteration options:</strong></p><ul><li><code>reusebuffer=false</code>: [only supported by <code>CSV.Rows</code>] while iterating, whether a single row buffer should be allocated and reused on each iteration; only use if each row will be iterated once and not re-used (e.g. it&#39;s not safe to use this option if doing <code>collect(CSV.Rows(file))</code> because only current iterated row is &quot;valid&quot;)</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/acd36a6df5677c159d569a4c799f88a52b9886b5/src/chunks.jl#L6-L26">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="CSV.Rows" href="#CSV.Rows"><code>CSV.Rows</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia hljs">CSV.Rows(source; kwargs...) =&gt; CSV.Rows</code></pre><p>Read a csv input returning a <code>CSV.Rows</code> object.</p><p>The <a href="reading.html#input"><code>input</code></a> argument can be one of:</p><ul><li>filename given as a string or FilePaths.jl type</li><li>a <code>Vector{UInt8}</code> or <code>SubArray{UInt8, 1, Vector{UInt8}}</code> byte buffer</li><li>a <code>CodeUnits</code> object, which wraps a <code>String</code>, like <code>codeunits(str)</code></li><li>a csv-formatted string can also be passed like <code>IOBuffer(str)</code></li><li>a <code>Cmd</code> or other <code>IO</code></li><li>a gzipped file (or gzipped data in any of the above), which will automatically be decompressed for parsing</li></ul><p>To read a csv file from a url, use the HTTP.jl package, where the <code>HTTP.Response</code> body can be passed like:</p><pre><code class="language-julia hljs">f = CSV.Rows(HTTP.get(url).body)</code></pre><p>For other <code>IO</code> or <code>Cmd</code> inputs, you can pass them like: <code>f = CSV.Rows(read(obj))</code>.</p><p>While similar to <a href="reading.html#CSV.File"><code>CSV.File</code></a>, <code>CSV.Rows</code> provides a slightly different interface, the tradeoffs including:</p><ul><li>Very minimal memory footprint; while iterating, only the current row values are buffered</li><li>Only provides row access via iteration; to access columns, one can stream the rows into a table type</li><li>Performs no type inference; each column/cell is essentially treated as <code>Union{String, Missing}</code>, users can utilize the performant <code>Parsers.parse(T, str)</code> to convert values to a more specific type if needed, or pass types upon construction using the <code>type</code> or <code>types</code> keyword arguments</li></ul><p>Opens the file and uses passed arguments to detect the number of columns, ***but not*** column types (column types default to <code>String</code> unless otherwise manually provided). The returned <code>CSV.Rows</code> object supports the <a href="https://github.com/JuliaData/Tables.jl">Tables.jl</a> interface and can iterate rows. Each row object supports <code>propertynames</code>, <code>getproperty</code>, and <code>getindex</code> to access individual row values. Note that duplicate column names will be detected and adjusted to ensure uniqueness (duplicate column name <code>a</code> will become <code>a_1</code>). For example, one could iterate over a csv file with column names <code>a</code>, <code>b</code>, and <code>c</code> by doing:</p><pre><code class="language-julia hljs">for row in CSV.Rows(file)
     println(&quot;a=$(row.a), b=$(row.b), c=$(row.c)&quot;)
-end</code></pre><p><strong>Arguments</strong></p><p><strong>File layout options:</strong></p><ul><li><code>header=1</code>: how column names should be determined; if given as an <code>Integer</code>, indicates the row to parse for column names; as an <code>AbstractVector{&lt;:Integer}</code>, indicates a set of rows to be concatenated together as column names; <code>Vector{Symbol}</code> or <code>Vector{String}</code> give column names explicitly (should match # of columns in dataset); if a dataset doesn&#39;t have column names, either provide them as a <code>Vector</code>, or set <code>header=0</code> or <code>header=false</code> and column names will be auto-generated (<code>Column1</code>, <code>Column2</code>, etc.). Note that if a row number header and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the header row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header row will actually be the next non-commented row.</li><li><code>normalizenames::Bool=false</code>: whether column names should be &quot;normalized&quot; into valid Julia identifier symbols; useful when using the <code>tbl.col1</code> <code>getproperty</code> syntax or iterating rows and accessing column values of a row via <code>getproperty</code> (e.g. <code>row.col1</code>)</li><li><code>skipto::Integer</code>: specifies the row where the data starts in the csv file; by default, the next row after the <code>header</code> row(s) is used. If <code>header=0</code>, then the 1st row is assumed to be the start of data; providing a <code>skipto</code> argument does <em>not</em> affect the <code>header</code> argument. Note that if a row number <code>skipto</code> and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the data row will actually be the next non-commented row.</li><li><code>footerskip::Integer</code>: number of rows at the end of a file to skip parsing.  Do note that commented rows (see the <code>comment</code> keyword argument) <em>do not</em> count towards the row number provided for <code>footerskip</code>, they are completely ignored by the parser</li><li><code>transpose::Bool</code>: read a csv file &quot;transposed&quot;, i.e. each column is parsed as a row</li><li><code>comment::String</code>: string that will cause rows that begin with it to be skipped while parsing. Note that if a row number header or <code>skipto</code> and <code>comment</code> are provided, the header/data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header/data row will actually be the next non-commented row.</li><li><code>ignoreemptyrows::Bool=true</code>: whether empty rows in a file should be ignored (if <code>false</code>, each column will be assigned <code>missing</code> for that empty row)</li><li><code>select</code>: an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;selector&quot; function of the form <code>(i, name) -&gt; keep::Bool</code>; only columns in the collection or for which the selector function returns <code>true</code> will be parsed and accessible in the resulting <code>CSV.File</code>. Invalid values in <code>select</code> are ignored.</li><li><code>drop</code>: inverse of <code>select</code>; an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;drop&quot; function of the form <code>(i, name) -&gt; drop::Bool</code>; columns in the collection or for which the drop function returns <code>true</code> will ignored in the resulting <code>CSV.File</code>. Invalid values in <code>drop</code> are ignored.</li><li><code>limit</code>: an <code>Integer</code> to indicate a limited number of rows to parse in a csv file; use in combination with <code>skipto</code> to read a specific, contiguous chunk within a file; note for large files when multiple threads are used for parsing, the <code>limit</code> argument may not result in an exact # of rows parsed; use <code>ntasks=1</code> to ensure an exact limit if necessary</li><li><code>buffer_in_memory</code>: a <code>Bool</code>, default <code>false</code>, which controls whether a <code>Cmd</code>, <code>IO</code>, or gzipped source will be read/decompressed in memory vs. using a temporary file.</li><li><code>ntasks::Integer=Threads.nthreads()</code>: [not applicable to <code>CSV.Rows</code>] for multithreaded parsed files, this controls the number of tasks spawned to read a file in concurrent chunks; defaults to the # of threads Julia was started with (i.e. <code>JULIA_NUM_THREADS</code> environment variable or <code>julia -t N</code>); setting <code>ntasks=1</code> will avoid any calls to <code>Threads.@spawn</code> and just read the file serially on the main thread; a single thread will also be used for smaller files by default (&lt; 5_000 cells)</li><li><code>rows_to_check::Integer=30</code>: [not applicable to <code>CSV.Rows</code>] a multithreaded parsed file will be split up into <code>ntasks</code> # of equal chunks; <code>rows_to_check</code> controls the # of rows are checked to ensure parsing correctly found valid rows; for certain files with very large quoted text fields, <code>lines_to_check</code> may need to be higher (10, 30, etc.) to ensure parsing correctly finds these rows</li><li><code>source</code>: [only applicable for vector of inputs to <code>CSV.File</code>] a <code>Symbol</code>, <code>String</code>, or <code>Pair</code> of <code>Symbol</code> or <code>String</code> to <code>Vector</code>. As a single <code>Symbol</code> or <code>String</code>, provides the column name that will be added to the parsed columns, the values of the column will be the input &quot;name&quot; (usually file name) of the input from whence the value was parsed. As a <code>Pair</code>, the 2nd part of the pair should be a <code>Vector</code> of values matching the length of the # of inputs, where each value will be used instead of the input name for that inputs values in the auto-added column.</li></ul><p><strong>Parsing options:</strong></p><ul><li><code>missingstring</code>: either a <code>nothing</code>, <code>String</code>, or <code>Vector{String}</code> to use as sentinel values that will be parsed as <code>missing</code>; if <code>nothing</code> is passed, no sentinel/missing values will be parsed; by default, <code>missingstring=&quot;&quot;</code>, which means only an empty field (two consecutive delimiters) is considered <code>missing</code></li><li><code>delim=&#39;,&#39;</code>: a <code>Char</code> or <code>String</code> that indicates how columns are delimited in a file; if no argument is provided, parsing will try to detect the most consistent delimiter on the first 10 rows of the file</li><li><code>ignorerepeated::Bool=false</code>: whether repeated (consecutive/sequential) delimiters should be ignored while parsing; useful for fixed-width files with delimiter padding between cells</li><li><code>quoted::Bool=true</code>: whether parsing should check for <code>quotechar</code> at the start/end of cells</li><li><code>quotechar=&#39;&quot;&#39;</code>, <code>openquotechar</code>, <code>closequotechar</code>: a <code>Char</code> (or different start and end characters) that indicate a quoted field which may contain textual delimiters or newline characters</li><li><code>escapechar=&#39;&quot;&#39;</code>: the <code>Char</code> used to escape quote characters in a quoted field</li><li><code>dateformat::Union{String, Dates.DateFormat, Nothing, AbstractDict}</code>: a date format string to indicate how Date/DateTime columns are formatted for the entire file; if given as an <code>AbstractDict</code>, date format strings to indicate how the Date/DateTime columns corresponding to the keys are formatted. The Dict can map column index <code>Int</code>, or name <code>Symbol</code> or <code>String</code> to the format string for that column.</li><li><code>decimal=&#39;.&#39;</code>: a <code>Char</code> indicating how decimals are separated in floats, i.e. <code>3.14</code> uses <code>&#39;.&#39;</code>, or <code>3,14</code> uses a comma <code>&#39;,&#39;</code></li><li><code>groupmark=nothing</code>: optionally specify a single-byte character denoting the number grouping mark, this allows parsing of numbers that have, e.g., thousand separators (<code>1,000.00</code>).</li><li><code>truestrings</code>, <code>falsestrings</code>: <code>Vector{String}</code>s that indicate how <code>true</code> or <code>false</code> values are represented; by default <code>&quot;true&quot;, &quot;True&quot;, &quot;TRUE&quot;, &quot;T&quot;, &quot;1&quot;</code> are used to detect <code>true</code> and <code>&quot;false&quot;, &quot;False&quot;, &quot;FALSE&quot;, &quot;F&quot;, &quot;0&quot;</code> are used to detect <code>false</code>; note that columns with only <code>1</code> and <code>0</code> values will default to <code>Int64</code> column type unless explicitly requested to be <code>Bool</code> via <code>types</code> keyword argument</li><li><code>stripwhitespace=false</code>: if true, leading and trailing whitespace are stripped from string values, including column names</li></ul><p><strong>Column Type Options:</strong></p><ul><li><code>types</code>: a single <code>Type</code>, <code>AbstractVector</code> or <code>AbstractDict</code> of types, or a function of the form <code>(i, name) -&gt; Union{T, Nothing}</code> to be used for column types; if a single <code>Type</code> is provided, <em>all</em> columns will be parsed with that single type; an <code>AbstractDict</code> can map column index <code>Integer</code>, or name <code>Symbol</code> or <code>String</code> to type for a column, i.e. <code>Dict(1=&gt;Float64)</code> will set the first column as a <code>Float64</code>, <code>Dict(:column1=&gt;Float64)</code> will set the column named <code>column1</code> to <code>Float64</code> and, <code>Dict(&quot;column1&quot;=&gt;Float64)</code> will set the <code>column1</code> to <code>Float64</code>; if a <code>Vector</code> is provided, it must match the # of columns provided or detected in <code>header</code>. If a function is provided, it takes a column index and name as arguments, and should return the desired column type for the column, or <code>nothing</code> to signal the column&#39;s type should be detected while parsing.</li><li><code>typemap::IdDict{Type, Type}</code>: a mapping of a type that should be replaced in every instance with another type, i.e. <code>Dict(Float64=&gt;String)</code> would change every detected <code>Float64</code> column to be parsed as <code>String</code>; only &quot;standard&quot; types are allowed to be mapped to another type, i.e. <code>Int64</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Time</code>, and <code>Bool</code>. If a column of one of those types is &quot;detected&quot;, it will be mapped to the specified type.</li><li><code>pool::Union{Bool, Real, AbstractVector, AbstractDict, Function, Tuple{Float64, Int}}=(0.2, 500)</code>: [not supported by <code>CSV.Rows</code>] controls whether columns will be built as <code>PooledArray</code>; if <code>true</code>, all columns detected as <code>String</code> will be pooled; alternatively, the proportion of unique values below which <code>String</code> columns should be pooled (meaning that if the # of unique strings in a column is under 25%, <code>pool=0.25</code>, it will be pooled). If provided as a <code>Tuple{Float64, Int}</code> like <code>(0.2, 500)</code>, it represents the percent cardinality threshold as the 1st tuple element (<code>0.2</code>), and an upper limit for the # of unique values (<code>500</code>), under which the column will be pooled; this is the default (<code>pool=(0.2, 500)</code>). If an <code>AbstractVector</code>, each element should be <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> and the # of elements should match the # of columns in the dataset; if an <code>AbstractDict</code>, a <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> value can be provided for individual columns where the dict key is given as column index <code>Integer</code>, or column name as <code>Symbol</code> or <code>String</code>. If a function is provided, it should take a column index and name as 2 arguments, and return a <code>Bool</code>, <code>Real</code>, <code>Tuple{Float64, Int}</code>, or <code>nothing</code> for each column.</li><li><code>downcast::Bool=false</code>: controls whether columns detected as <code>Int64</code> will be &quot;downcast&quot; to the smallest possible integer type like <code>Int8</code>, <code>Int16</code>, <code>Int32</code>, etc.</li><li><code>stringtype=InlineStrings.InlineString</code>: controls how detected string columns will ultimately be returned; default is <code>InlineString</code>, which stores string data in a fixed-size primitive type that helps avoid excessive heap memory usage; if a column has values longer than 32 bytes, it will default to <code>String</code>. If <code>String</code> is passed, all string columns will just be normal <code>String</code> values. If <code>PosLenString</code> is passed, string columns will be returned as <code>PosLenStringVector</code>, which is a special &quot;lazy&quot; <code>AbstractVector</code> that acts as a &quot;view&quot; into the original file data. This can lead to the most efficient parsing times, but note that the &quot;view&quot; nature of <code>PosLenStringVector</code> makes it read-only, so operations like <code>push!</code>, <code>append!</code>, or <code>setindex!</code> are not supported. It also keeps a reference to the entire input dataset source, so trying to modify or delete the underlying file, for example, may fail</li><li><code>strict::Bool=false</code>: whether invalid values should throw a parsing error or be replaced with <code>missing</code></li><li><code>silencewarnings::Bool=false</code>: if <code>strict=false</code>, whether invalid value warnings should be silenced</li><li><code>maxwarnings::Int=100</code>: if more than <code>maxwarnings</code> number of warnings are printed while parsing, further warnings will be silenced by default; for multithreaded parsing, each parsing task will print up to <code>maxwarnings</code></li><li><code>debug::Bool=false</code>: passing <code>true</code> will result in many informational prints while a dataset is parsed; can be useful when reporting issues or figuring out what is going on internally while a dataset is parsed</li><li><code>validate::Bool=true</code>: whether or not to validate that columns specified in the <code>types</code>, <code>dateformat</code> and <code>pool</code> keywords are actually found in the data. If <code>false</code> no validation is done, meaning no error will be thrown if <code>types</code>/<code>dateformat</code>/<code>pool</code> specify settings for columns not actually found in the data.</li></ul><p><strong>Iteration options:</strong></p><ul><li><code>reusebuffer=false</code>: [only supported by <code>CSV.Rows</code>] while iterating, whether a single row buffer should be allocated and reused on each iteration; only use if each row will be iterated once and not re-used (e.g. it&#39;s not safe to use this option if doing <code>collect(CSV.Rows(file))</code> because only current iterated row is &quot;valid&quot;)</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/ba1f4d2e2b51017dbf7133e29b1105fd3c8116f2/src/rows.jl#L35-L74">source</a></section></article><h3 id="Utilities"><a class="docs-heading-anchor" href="#Utilities">Utilities</a><a id="Utilities-1"></a><a class="docs-heading-anchor-permalink" href="#Utilities" title="Permalink"></a></h3><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="CSV.detect" href="#CSV.detect"><code>CSV.detect</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">CSV.detect(str::String)</code></pre><p>Use the same logic used by <code>CSV.File</code> to detect column types, to parse a value from a plain string. This can be useful in conjunction with the <code>CSV.Rows</code> type, which returns each cell of a file as a String. The order of types attempted is: <code>Int</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Bool</code>, and if all fail, the input String is returned. No errors are thrown. For advanced usage, you can pass your own <code>Parsers.Options</code> type as a keyword argument <code>option=ops</code> for sentinel value detection.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/ba1f4d2e2b51017dbf7133e29b1105fd3c8116f2/src/utils.jl#L440-L448">source</a></section></article><h2 id="Common-terms"><a class="docs-heading-anchor" href="#Common-terms">Common terms</a><a id="Common-terms-1"></a><a class="docs-heading-anchor-permalink" href="#Common-terms" title="Permalink"></a></h2><h3 id="Standard-types"><a class="docs-heading-anchor" href="#Standard-types">Standard types</a><a id="Standard-types-1"></a><a class="docs-heading-anchor-permalink" href="#Standard-types" title="Permalink"></a></h3><p>The types that are detected by default when column types are not provided by the user otherwise. They include: <code>Int64</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Time</code>, <code>Bool</code>, and <code>String</code>.</p><h3 id="newlines"><a class="docs-heading-anchor" href="#newlines">Newlines</a><a id="newlines-1"></a><a class="docs-heading-anchor-permalink" href="#newlines" title="Permalink"></a></h3><p>For all parsing functionality, newlines are detected/parsed automatically, regardless if they&#39;re present in the data as a single newline character (<code>&#39;\n&#39;</code>), single return character (&#39;<code>\r&#39;</code>), or full CRLF sequence (<code>&quot;\r\n&quot;</code>).</p><h3 id="Cardinality"><a class="docs-heading-anchor" href="#Cardinality">Cardinality</a><a id="Cardinality-1"></a><a class="docs-heading-anchor-permalink" href="#Cardinality" title="Permalink"></a></h3><p>Refers to the ratio of unique values to total number of values in a column. Columns with &quot;low cardinality&quot; have a low % of unique values, or put another way, there are only a few unique values for the entire column of data where unique values are repeated many times. Columns with &quot;high cardinality&quot; have a high % of unique values relative to total number of values. Think of these as &quot;id-like&quot; columns where each or almost each value is a unique identifier with no (or few) repeated values.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="index.html">« Home</a><a class="docs-footer-nextpage" href="writing.html">Writing »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.3.0 on <span class="colophon-date" title="Saturday 2 March 2024 21:44">Saturday 2 March 2024</span>. Using Julia version 1.10.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+end</code></pre><p><strong>Arguments</strong></p><p><strong>File layout options:</strong></p><ul><li><code>header=1</code>: how column names should be determined; if given as an <code>Integer</code>, indicates the row to parse for column names; as an <code>AbstractVector{&lt;:Integer}</code>, indicates a set of rows to be concatenated together as column names; <code>Vector{Symbol}</code> or <code>Vector{String}</code> give column names explicitly (should match # of columns in dataset); if a dataset doesn&#39;t have column names, either provide them as a <code>Vector</code>, or set <code>header=0</code> or <code>header=false</code> and column names will be auto-generated (<code>Column1</code>, <code>Column2</code>, etc.). Note that if a row number header and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the header row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header row will actually be the next non-commented row.</li><li><code>normalizenames::Bool=false</code>: whether column names should be &quot;normalized&quot; into valid Julia identifier symbols; useful when using the <code>tbl.col1</code> <code>getproperty</code> syntax or iterating rows and accessing column values of a row via <code>getproperty</code> (e.g. <code>row.col1</code>)</li><li><code>skipto::Integer</code>: specifies the row where the data starts in the csv file; by default, the next row after the <code>header</code> row(s) is used. If <code>header=0</code>, then the 1st row is assumed to be the start of data; providing a <code>skipto</code> argument does <em>not</em> affect the <code>header</code> argument. Note that if a row number <code>skipto</code> and <code>comment</code> or <code>ignoreemptyrows</code> are provided, the data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the data row will actually be the next non-commented row.</li><li><code>footerskip::Integer</code>: number of rows at the end of a file to skip parsing.  Do note that commented rows (see the <code>comment</code> keyword argument) <em>do not</em> count towards the row number provided for <code>footerskip</code>, they are completely ignored by the parser</li><li><code>transpose::Bool</code>: read a csv file &quot;transposed&quot;, i.e. each column is parsed as a row</li><li><code>comment::String</code>: string that will cause rows that begin with it to be skipped while parsing. Note that if a row number header or <code>skipto</code> and <code>comment</code> are provided, the header/data row will be the first non-commented/non-empty row <em>after</em> the row number, meaning if the provided row number is a commented row, the header/data row will actually be the next non-commented row.</li><li><code>ignoreemptyrows::Bool=true</code>: whether empty rows in a file should be ignored (if <code>false</code>, each column will be assigned <code>missing</code> for that empty row)</li><li><code>select</code>: an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;selector&quot; function of the form <code>(i, name) -&gt; keep::Bool</code>; only columns in the collection or for which the selector function returns <code>true</code> will be parsed and accessible in the resulting <code>CSV.File</code>. Invalid values in <code>select</code> are ignored.</li><li><code>drop</code>: inverse of <code>select</code>; an <code>AbstractVector</code> of <code>Integer</code>, <code>Symbol</code>, <code>String</code>, or <code>Bool</code>, or a &quot;drop&quot; function of the form <code>(i, name) -&gt; drop::Bool</code>; columns in the collection or for which the drop function returns <code>true</code> will ignored in the resulting <code>CSV.File</code>. Invalid values in <code>drop</code> are ignored.</li><li><code>limit</code>: an <code>Integer</code> to indicate a limited number of rows to parse in a csv file; use in combination with <code>skipto</code> to read a specific, contiguous chunk within a file; note for large files when multiple threads are used for parsing, the <code>limit</code> argument may not result in an exact # of rows parsed; use <code>ntasks=1</code> to ensure an exact limit if necessary</li><li><code>buffer_in_memory</code>: a <code>Bool</code>, default <code>false</code>, which controls whether a <code>Cmd</code>, <code>IO</code>, or gzipped source will be read/decompressed in memory vs. using a temporary file.</li><li><code>ntasks::Integer=Threads.nthreads()</code>: [not applicable to <code>CSV.Rows</code>] for multithreaded parsed files, this controls the number of tasks spawned to read a file in concurrent chunks; defaults to the # of threads Julia was started with (i.e. <code>JULIA_NUM_THREADS</code> environment variable or <code>julia -t N</code>); setting <code>ntasks=1</code> will avoid any calls to <code>Threads.@spawn</code> and just read the file serially on the main thread; a single thread will also be used for smaller files by default (&lt; 5_000 cells)</li><li><code>rows_to_check::Integer=30</code>: [not applicable to <code>CSV.Rows</code>] a multithreaded parsed file will be split up into <code>ntasks</code> # of equal chunks; <code>rows_to_check</code> controls the # of rows are checked to ensure parsing correctly found valid rows; for certain files with very large quoted text fields, <code>lines_to_check</code> may need to be higher (10, 30, etc.) to ensure parsing correctly finds these rows</li><li><code>source</code>: [only applicable for vector of inputs to <code>CSV.File</code>] a <code>Symbol</code>, <code>String</code>, or <code>Pair</code> of <code>Symbol</code> or <code>String</code> to <code>Vector</code>. As a single <code>Symbol</code> or <code>String</code>, provides the column name that will be added to the parsed columns, the values of the column will be the input &quot;name&quot; (usually file name) of the input from whence the value was parsed. As a <code>Pair</code>, the 2nd part of the pair should be a <code>Vector</code> of values matching the length of the # of inputs, where each value will be used instead of the input name for that inputs values in the auto-added column.</li></ul><p><strong>Parsing options:</strong></p><ul><li><code>missingstring</code>: either a <code>nothing</code>, <code>String</code>, or <code>Vector{String}</code> to use as sentinel values that will be parsed as <code>missing</code>; if <code>nothing</code> is passed, no sentinel/missing values will be parsed; by default, <code>missingstring=&quot;&quot;</code>, which means only an empty field (two consecutive delimiters) is considered <code>missing</code></li><li><code>delim=&#39;,&#39;</code>: a <code>Char</code> or <code>String</code> that indicates how columns are delimited in a file; if no argument is provided, parsing will try to detect the most consistent delimiter on the first 10 rows of the file</li><li><code>ignorerepeated::Bool=false</code>: whether repeated (consecutive/sequential) delimiters should be ignored while parsing; useful for fixed-width files with delimiter padding between cells</li><li><code>quoted::Bool=true</code>: whether parsing should check for <code>quotechar</code> at the start/end of cells</li><li><code>quotechar=&#39;&quot;&#39;</code>, <code>openquotechar</code>, <code>closequotechar</code>: a <code>Char</code> (or different start and end characters) that indicate a quoted field which may contain textual delimiters or newline characters</li><li><code>escapechar=&#39;&quot;&#39;</code>: the <code>Char</code> used to escape quote characters in a quoted field</li><li><code>dateformat::Union{String, Dates.DateFormat, Nothing, AbstractDict}</code>: a date format string to indicate how Date/DateTime columns are formatted for the entire file; if given as an <code>AbstractDict</code>, date format strings to indicate how the Date/DateTime columns corresponding to the keys are formatted. The Dict can map column index <code>Int</code>, or name <code>Symbol</code> or <code>String</code> to the format string for that column.</li><li><code>decimal=&#39;.&#39;</code>: a <code>Char</code> indicating how decimals are separated in floats, i.e. <code>3.14</code> uses <code>&#39;.&#39;</code>, or <code>3,14</code> uses a comma <code>&#39;,&#39;</code></li><li><code>groupmark=nothing</code>: optionally specify a single-byte character denoting the number grouping mark, this allows parsing of numbers that have, e.g., thousand separators (<code>1,000.00</code>).</li><li><code>truestrings</code>, <code>falsestrings</code>: <code>Vector{String}</code>s that indicate how <code>true</code> or <code>false</code> values are represented; by default <code>&quot;true&quot;, &quot;True&quot;, &quot;TRUE&quot;, &quot;T&quot;, &quot;1&quot;</code> are used to detect <code>true</code> and <code>&quot;false&quot;, &quot;False&quot;, &quot;FALSE&quot;, &quot;F&quot;, &quot;0&quot;</code> are used to detect <code>false</code>; note that columns with only <code>1</code> and <code>0</code> values will default to <code>Int64</code> column type unless explicitly requested to be <code>Bool</code> via <code>types</code> keyword argument</li><li><code>stripwhitespace=false</code>: if true, leading and trailing whitespace are stripped from string values, including column names</li></ul><p><strong>Column Type Options:</strong></p><ul><li><code>types</code>: a single <code>Type</code>, <code>AbstractVector</code> or <code>AbstractDict</code> of types, or a function of the form <code>(i, name) -&gt; Union{T, Nothing}</code> to be used for column types; if a single <code>Type</code> is provided, <em>all</em> columns will be parsed with that single type; an <code>AbstractDict</code> can map column index <code>Integer</code>, or name <code>Symbol</code> or <code>String</code> to type for a column, i.e. <code>Dict(1=&gt;Float64)</code> will set the first column as a <code>Float64</code>, <code>Dict(:column1=&gt;Float64)</code> will set the column named <code>column1</code> to <code>Float64</code> and, <code>Dict(&quot;column1&quot;=&gt;Float64)</code> will set the <code>column1</code> to <code>Float64</code>; if a <code>Vector</code> is provided, it must match the # of columns provided or detected in <code>header</code>. If a function is provided, it takes a column index and name as arguments, and should return the desired column type for the column, or <code>nothing</code> to signal the column&#39;s type should be detected while parsing.</li><li><code>typemap::IdDict{Type, Type}</code>: a mapping of a type that should be replaced in every instance with another type, i.e. <code>Dict(Float64=&gt;String)</code> would change every detected <code>Float64</code> column to be parsed as <code>String</code>; only &quot;standard&quot; types are allowed to be mapped to another type, i.e. <code>Int64</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Time</code>, and <code>Bool</code>. If a column of one of those types is &quot;detected&quot;, it will be mapped to the specified type.</li><li><code>pool::Union{Bool, Real, AbstractVector, AbstractDict, Function, Tuple{Float64, Int}}=(0.2, 500)</code>: [not supported by <code>CSV.Rows</code>] controls whether columns will be built as <code>PooledArray</code>; if <code>true</code>, all columns detected as <code>String</code> will be pooled; alternatively, the proportion of unique values below which <code>String</code> columns should be pooled (meaning that if the # of unique strings in a column is under 25%, <code>pool=0.25</code>, it will be pooled). If provided as a <code>Tuple{Float64, Int}</code> like <code>(0.2, 500)</code>, it represents the percent cardinality threshold as the 1st tuple element (<code>0.2</code>), and an upper limit for the # of unique values (<code>500</code>), under which the column will be pooled; this is the default (<code>pool=(0.2, 500)</code>). If an <code>AbstractVector</code>, each element should be <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> and the # of elements should match the # of columns in the dataset; if an <code>AbstractDict</code>, a <code>Bool</code>, <code>Real</code>, or <code>Tuple{Float64, Int}</code> value can be provided for individual columns where the dict key is given as column index <code>Integer</code>, or column name as <code>Symbol</code> or <code>String</code>. If a function is provided, it should take a column index and name as 2 arguments, and return a <code>Bool</code>, <code>Real</code>, <code>Tuple{Float64, Int}</code>, or <code>nothing</code> for each column.</li><li><code>downcast::Bool=false</code>: controls whether columns detected as <code>Int64</code> will be &quot;downcast&quot; to the smallest possible integer type like <code>Int8</code>, <code>Int16</code>, <code>Int32</code>, etc.</li><li><code>stringtype=InlineStrings.InlineString</code>: controls how detected string columns will ultimately be returned; default is <code>InlineString</code>, which stores string data in a fixed-size primitive type that helps avoid excessive heap memory usage; if a column has values longer than 32 bytes, it will default to <code>String</code>. If <code>String</code> is passed, all string columns will just be normal <code>String</code> values. If <code>PosLenString</code> is passed, string columns will be returned as <code>PosLenStringVector</code>, which is a special &quot;lazy&quot; <code>AbstractVector</code> that acts as a &quot;view&quot; into the original file data. This can lead to the most efficient parsing times, but note that the &quot;view&quot; nature of <code>PosLenStringVector</code> makes it read-only, so operations like <code>push!</code>, <code>append!</code>, or <code>setindex!</code> are not supported. It also keeps a reference to the entire input dataset source, so trying to modify or delete the underlying file, for example, may fail</li><li><code>strict::Bool=false</code>: whether invalid values should throw a parsing error or be replaced with <code>missing</code></li><li><code>silencewarnings::Bool=false</code>: if <code>strict=false</code>, whether invalid value warnings should be silenced</li><li><code>maxwarnings::Int=100</code>: if more than <code>maxwarnings</code> number of warnings are printed while parsing, further warnings will be silenced by default; for multithreaded parsing, each parsing task will print up to <code>maxwarnings</code></li><li><code>debug::Bool=false</code>: passing <code>true</code> will result in many informational prints while a dataset is parsed; can be useful when reporting issues or figuring out what is going on internally while a dataset is parsed</li><li><code>validate::Bool=true</code>: whether or not to validate that columns specified in the <code>types</code>, <code>dateformat</code> and <code>pool</code> keywords are actually found in the data. If <code>false</code> no validation is done, meaning no error will be thrown if <code>types</code>/<code>dateformat</code>/<code>pool</code> specify settings for columns not actually found in the data.</li></ul><p><strong>Iteration options:</strong></p><ul><li><code>reusebuffer=false</code>: [only supported by <code>CSV.Rows</code>] while iterating, whether a single row buffer should be allocated and reused on each iteration; only use if each row will be iterated once and not re-used (e.g. it&#39;s not safe to use this option if doing <code>collect(CSV.Rows(file))</code> because only current iterated row is &quot;valid&quot;)</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/acd36a6df5677c159d569a4c799f88a52b9886b5/src/rows.jl#L35-L74">source</a></section></article><h3 id="Utilities"><a class="docs-heading-anchor" href="#Utilities">Utilities</a><a id="Utilities-1"></a><a class="docs-heading-anchor-permalink" href="#Utilities" title="Permalink"></a></h3><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="CSV.detect" href="#CSV.detect"><code>CSV.detect</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia hljs">CSV.detect(str::String)</code></pre><p>Use the same logic used by <code>CSV.File</code> to detect column types, to parse a value from a plain string. This can be useful in conjunction with the <code>CSV.Rows</code> type, which returns each cell of a file as a String. The order of types attempted is: <code>Int</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Bool</code>, and if all fail, the input String is returned. No errors are thrown. For advanced usage, you can pass your own <code>Parsers.Options</code> type as a keyword argument <code>option=ops</code> for sentinel value detection.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/acd36a6df5677c159d569a4c799f88a52b9886b5/src/utils.jl#L440-L448">source</a></section></article><h2 id="Common-terms"><a class="docs-heading-anchor" href="#Common-terms">Common terms</a><a id="Common-terms-1"></a><a class="docs-heading-anchor-permalink" href="#Common-terms" title="Permalink"></a></h2><h3 id="Standard-types"><a class="docs-heading-anchor" href="#Standard-types">Standard types</a><a id="Standard-types-1"></a><a class="docs-heading-anchor-permalink" href="#Standard-types" title="Permalink"></a></h3><p>The types that are detected by default when column types are not provided by the user otherwise. They include: <code>Int64</code>, <code>Float64</code>, <code>Date</code>, <code>DateTime</code>, <code>Time</code>, <code>Bool</code>, and <code>String</code>.</p><h3 id="newlines"><a class="docs-heading-anchor" href="#newlines">Newlines</a><a id="newlines-1"></a><a class="docs-heading-anchor-permalink" href="#newlines" title="Permalink"></a></h3><p>For all parsing functionality, newlines are detected/parsed automatically, regardless if they&#39;re present in the data as a single newline character (<code>&#39;\n&#39;</code>), single return character (&#39;<code>\r&#39;</code>), or full CRLF sequence (<code>&quot;\r\n&quot;</code>).</p><h3 id="Cardinality"><a class="docs-heading-anchor" href="#Cardinality">Cardinality</a><a id="Cardinality-1"></a><a class="docs-heading-anchor-permalink" href="#Cardinality" title="Permalink"></a></h3><p>Refers to the ratio of unique values to total number of values in a column. Columns with &quot;low cardinality&quot; have a low % of unique values, or put another way, there are only a few unique values for the entire column of data where unique values are repeated many times. Columns with &quot;high cardinality&quot; have a high % of unique values relative to total number of values. Think of these as &quot;id-like&quot; columns where each or almost each value is a unique identifier with no (or few) repeated values.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="index.html">« Home</a><a class="docs-footer-nextpage" href="writing.html">Writing »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.3.0 on <span class="colophon-date" title="Friday 8 March 2024 00:12">Friday 8 March 2024</span>. Using Julia version 1.10.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/writing.html b/dev/writing.html
index 612161de..c96e0942 100644
--- a/dev/writing.html
+++ b/dev/writing.html
@@ -9,4 +9,4 @@
 # write a matrix to an in-memory IOBuffer
 io = IOBuffer()
 mat = rand(10, 10)
-CSV.write(io, Tables.table(mat))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/ba1f4d2e2b51017dbf7133e29b1105fd3c8116f2/src/write.jl#L3-L43">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="CSV.RowWriter" href="#CSV.RowWriter"><code>CSV.RowWriter</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia hljs">CSV.RowWriter(table; kwargs...)</code></pre><p>Creates an iterator that produces csv-formatted strings for each row in the input table.</p><p>Supported keyword arguments include:</p><ul><li><code>bufsize::Int=2^22</code>: The length of the buffer to use when writing each csv-formatted row; default 4MB; if a row is larger than the <code>bufsize</code> an error is thrown</li><li><code>delim::Union{Char, String}=&#39;,&#39;</code>: a character or string to print out as the file&#39;s delimiter</li><li><code>quotechar::Char=&#39;&quot;&#39;</code>: ascii character to use for quoting text fields that may contain delimiters or newlines</li><li><code>openquotechar::Char</code>: instead of <code>quotechar</code>, use <code>openquotechar</code> and <code>closequotechar</code> to support different starting and ending quote characters</li><li><code>escapechar::Char=&#39;&quot;&#39;</code>: ascii character used to escape quote characters in a text field</li><li><code>missingstring::String=&quot;&quot;</code>: string to print for <code>missing</code> values</li><li><code>dateformat=Dates.default_format(T)</code>: the date format string to use for printing out <code>Date</code> &amp; <code>DateTime</code> columns</li><li><code>header</code>: pass a list of column names (Symbols or Strings) to use instead of the column names of the input table</li><li><code>newline=&#39;\n&#39;</code>: character or string to use to separate rows (lines in the csv file)</li><li><code>quotestrings=false</code>: whether to force all strings to be quoted or not</li><li><code>decimal=&#39;.&#39;</code>: character to use as the decimal point when writing floating point numbers</li><li><code>transform=(col,val)-&gt;val</code>: a function that is applied to every cell e.g. we can transform all <code>nothing</code> values to <code>missing</code> using <code>(col, val) -&gt; something(val, missing)</code></li><li><code>bom=false</code>: whether to write a UTF-8 BOM header (0xEF 0xBB 0xBF) or not</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/ba1f4d2e2b51017dbf7133e29b1105fd3c8116f2/src/write.jl#L87-L106">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="reading.html">« Reading</a><a class="docs-footer-nextpage" href="examples.html">Examples »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.3.0 on <span class="colophon-date" title="Saturday 2 March 2024 21:44">Saturday 2 March 2024</span>. Using Julia version 1.10.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+CSV.write(io, Tables.table(mat))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/acd36a6df5677c159d569a4c799f88a52b9886b5/src/write.jl#L3-L43">source</a></section></article><article class="docstring"><header><a class="docstring-article-toggle-button fa-solid fa-chevron-down" href="javascript:;" title="Collapse docstring"></a><a class="docstring-binding" id="CSV.RowWriter" href="#CSV.RowWriter"><code>CSV.RowWriter</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia hljs">CSV.RowWriter(table; kwargs...)</code></pre><p>Creates an iterator that produces csv-formatted strings for each row in the input table.</p><p>Supported keyword arguments include:</p><ul><li><code>bufsize::Int=2^22</code>: The length of the buffer to use when writing each csv-formatted row; default 4MB; if a row is larger than the <code>bufsize</code> an error is thrown</li><li><code>delim::Union{Char, String}=&#39;,&#39;</code>: a character or string to print out as the file&#39;s delimiter</li><li><code>quotechar::Char=&#39;&quot;&#39;</code>: ascii character to use for quoting text fields that may contain delimiters or newlines</li><li><code>openquotechar::Char</code>: instead of <code>quotechar</code>, use <code>openquotechar</code> and <code>closequotechar</code> to support different starting and ending quote characters</li><li><code>escapechar::Char=&#39;&quot;&#39;</code>: ascii character used to escape quote characters in a text field</li><li><code>missingstring::String=&quot;&quot;</code>: string to print for <code>missing</code> values</li><li><code>dateformat=Dates.default_format(T)</code>: the date format string to use for printing out <code>Date</code> &amp; <code>DateTime</code> columns</li><li><code>header</code>: pass a list of column names (Symbols or Strings) to use instead of the column names of the input table</li><li><code>newline=&#39;\n&#39;</code>: character or string to use to separate rows (lines in the csv file)</li><li><code>quotestrings=false</code>: whether to force all strings to be quoted or not</li><li><code>decimal=&#39;.&#39;</code>: character to use as the decimal point when writing floating point numbers</li><li><code>transform=(col,val)-&gt;val</code>: a function that is applied to every cell e.g. we can transform all <code>nothing</code> values to <code>missing</code> using <code>(col, val) -&gt; something(val, missing)</code></li><li><code>bom=false</code>: whether to write a UTF-8 BOM header (0xEF 0xBB 0xBF) or not</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/JuliaData/CSV.jl/blob/acd36a6df5677c159d569a4c799f88a52b9886b5/src/write.jl#L87-L106">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="reading.html">« Reading</a><a class="docs-footer-nextpage" href="examples.html">Examples »</a><div class="flexbox-break"></div><p class="footer-message">Powered by <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> and the <a href="https://julialang.org/">Julia Programming Language</a>.</p></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="auto">Automatic (OS)</option><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> version 1.3.0 on <span class="colophon-date" title="Friday 8 March 2024 00:12">Friday 8 March 2024</span>. Using Julia version 1.10.2.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>