Clean up whitespace

JuliaData · Aug 2, 2014 · 37a1c86 · 37a1c86
1 parent 4e4f055
commit 37a1c86
Show file tree

Hide file tree

Showing 23 changed files with 169 additions and 169 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -57,7 +57,7 @@ DataFrames v0.5.0 Release Notes
 ===============================
 
 Improved I/O and more-Julian idioms.
-  
+
 New features
 ------------
   * Write HTML tables via writemime ([#433])

diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@ DataFrames.jl
 [![Coverage Status](https://coveralls.io/repos/JuliaStats/DataFrames.jl/badge.png?branch=master)](https://coveralls.io/r/JuliaStats/DataFrames.jl?branch=master)
 [![Package Evaluator](http://iainnz.github.io/packages.julialang.org/badges/DataFrames_0.3.svg)](http://iainnz.github.io/packages.julialang.org/?pkg=DataFrames&ver=0.3)
 
-Tools for working with data in Julia. 
+Tools for working with data in Julia.
 
 **Installation**: at the Julia REPL: `Pkg.add("DataFrames")`
 

diff --git a/benchmark/datamatrix.jl b/benchmark/datamatrix.jl
@@ -14,15 +14,15 @@ f2() = *(dm_a, dm_b)
 f3() = *(dm_a_na, dm_b_na)
 
 df1 = benchmark(f1,
-	            "Linear Algebra",
+                "Linear Algebra",
                 "Matrix Multiplication w/ No NA's",
                 1_000)
 df2 = benchmark(f2,
-	            "Linear Algebra",
+                "Linear Algebra",
                 "DataMatrix Multiplication w/ No NA's",
                 1_000)
 df3 = benchmark(f3,
-	            "Linear Algebra",
+                "Linear Algebra",
                 "DataMatrix Multiplication w/ NA's",
                 1_000)
 

diff --git a/benchmark/io.jl b/benchmark/io.jl
@@ -2,7 +2,7 @@ N = 10
 folders = ["definedtypes", "factors", "newlines", "quoting",
            "scaling", "separators", "typeinference"]
 
-cd(Pkg.dir("DataFrames", "test", "data")) do 
+cd(Pkg.dir("DataFrames", "test", "data")) do
     for folder in folders
         for filename in readdir(folder)
             relpath = joinpath(folder, filename)

diff --git a/doc/other/03_design_details.md b/doc/other/03_design_details.md
@@ -212,8 +212,8 @@ A `PooledDataArray` can be constructed from an `Array` or `DataArray`, and as wi
 
 By default, the index into the pool of values is a Uint32, allowing 2^32 possible pool values.  If you know that you will only have a much smaller number of unique values, you can specify a smaller reference index type, to save space:
 
-	pda5 = PooledDataArray(String, Uint8, 5000, 2)  # Create a 5000x2 array of String values, 
-	                                                # initialized to NA, 
+	pda5 = PooledDataArray(String, Uint8, 5000, 2)  # Create a 5000x2 array of String values,
+	                                                # initialized to NA,
                                                         # with at most 2^8=256 unique values
 
 `PooledDataVectors`s can be used as columns in DataFrames.
@@ -557,7 +557,7 @@ The basic mechanism for spliting data is the `groupby()` function, which will pr
 	end
 
 The `|>` (pipe) operator for `GroupedDataFrame` allows you to run simple functions on the columns of the induced `DataFrame`. You pass a simple function by producing a symbol with its name:
- 
+
 	groupby(iris, "Species") |> :mean
 
 Another simple way to split-and-apply (without clear combining) is to use the `map()` function:

diff --git a/doc/other/05_function_reference_guide.md b/doc/other/05_function_reference_guide.md
@@ -30,7 +30,7 @@ A deep copy of `df`. Copies of each column are made.
 
 #### `similar(df::DataFrame, nrow)`
 
-A new DataFrame with `nrow` rows and the same column names and types as `df`. 
+A new DataFrame with `nrow` rows and the same column names and types as `df`.
 
 
 ### Basics
@@ -64,11 +64,11 @@ Number of columns in `df`.
 
 Whether the number of columns equals zero.
 
-#### `head(df::AbstractDataFrame)` and `head(df::AbstractDataFrame, i::Int)` 
+#### `head(df::AbstractDataFrame)` and `head(df::AbstractDataFrame, i::Int)`
 
 First `i` rows of `df`. Defaults to 6.
 
-#### `tail(df::AbstractDataFrame)` and `tail(df::AbstractDataFrame, i::Int)` 
+#### `tail(df::AbstractDataFrame)` and `tail(df::AbstractDataFrame, i::Int)`
 
 Last `i` rows of `df`. Defaults to 6.
 
@@ -107,7 +107,7 @@ with one argument normally indexes by columns: `df["col"]`,
 `df[["col1","col3"]]` or `df[i]`. With two arguments, rows and columns
 are selected. Indexing along rows works like Matrix indexing. Indexing
 along columns works like Matrix indexing with the addition of column
-name access. 
+name access.
 
 #### `getindex(df::DataFrame, ind)`  or `df[ind]`
 
@@ -134,7 +134,7 @@ Returns the column `Index` for `df`.
 
 See the Indexing section for these operations on column indexes.
 
-#### `colnames(df::DataFrame)` or `names(df::DataFrame)` 
+#### `colnames(df::DataFrame)` or `names(df::DataFrame)`
 
 The column names as an `Array{ByteString}`
 
@@ -167,11 +167,11 @@ Nondestructive version. Return a DataFrame based on the columns in
 
 Delete rows at positions specified by `inds` from the given DataFrame.
 
-#### `cbind(df1, df2, ...)` or `hcat(df1, df2, ...)` or `[df1 df2 ...]`  
+#### `cbind(df1, df2, ...)` or `hcat(df1, df2, ...)` or `[df1 df2 ...]`
 
 Concatenate columns. Duplicated column names are adjusted.
 
-#### `rbind(df1, df2, ...)` or `vcat(df1, df2, ...)` or `[df1, df2, ...]`  
+#### `rbind(df1, df2, ...)` or `vcat(df1, df2, ...)` or `[df1, df2, ...]`
 
 Concatenate rows.
 
@@ -332,7 +332,7 @@ ByteStrings and Vector{ByteStrings} to Indices.
 
 Normal meanings.
 
-#### `del(x::Index, idx::Integer)`,  `del(x::Index, s::ByteString)`,  
+#### `del(x::Index, idx::Integer)`,  `del(x::Index, s::ByteString)`,
 
 Delete the name `s` or name at position `idx` in `x`.
 
@@ -361,7 +361,7 @@ Vector{Bool}, AbstractDataVector{Bool}, or AbstractDataVector{Int}.
 #### `set_group(idx::Index, newgroup, names::Vector{ByteString})`
 
 Add a group to `idx` with name `newgroup` that includes the names in
-the vector `names`.  
+the vector `names`.
 
 #### `get_groups(idx::Index)`
 
@@ -375,20 +375,20 @@ Set groups in `idx` based on the mapping given by `gr`.
 ## Missing Values
 
 Missing value behavior is implemented by instantiations of the `AbstractDataVector`
-abstract type. 
+abstract type.
 
 #### `NA`
 
 A constant indicating a missing value.
-  
+
 #### `isna(x)`
 
 Return a `Bool` or `Array{Bool}` (if `x` is an `AbstractDataVector`)
 that is `true` for elements with missing values.
 
 #### `nafilter(x)`
 
-Return a copy of `x` after removing missing values. 
+Return a copy of `x` after removing missing values.
 
 #### `nareplace(x, val)`
 

diff --git a/doc/sections/02_getting_started.md b/doc/sections/02_getting_started.md
@@ -92,7 +92,7 @@ We can also look at small subsets of the data in a couple of different ways:
 
     head(df)
     tail(df)
-    
+
     df[1:3, :]
 
 Having seen what some of the rows look like, we can try to summarize the

diff --git a/prototypes/benchmark_datastream.jl b/prototypes/benchmark_datastream.jl
@@ -3,19 +3,19 @@ filename = Pkg.dir("DataFrames", "test", "data", "big_data.csv")
 minibatch_sizes = [1, 5, 25, 100, 1_000, 10_000]
 
 for f in (colmeans, colvars, cor)
-	for minibatch_size in minibatch_sizes
-		ds = DataStream(filename, minibatch_size)
-		N = 3
-		df = benchmark(() -> apply(f, (ds,)),
-			           "DataStream Functions",
-			           join({
-			           	      string(f),
-			           	      "w/ minibatches of",
-			           	      minibatch_size,
-			           	      "rows"
-			           	    }, " "),
-			           N)
-		# TODO: Keep permanent record
-		printtable(df, header=false)
-	end
+    for minibatch_size in minibatch_sizes
+        ds = DataStream(filename, minibatch_size)
+        N = 3
+        df = benchmark(() -> apply(f, (ds,)),
+                       "DataStream Functions",
+                       join({
+                              string(f),
+                              "w/ minibatches of",
+                              minibatch_size,
+                              "rows"
+                            }, " "),
+                       N)
+        # TODO: Keep permanent record
+        printtable(df, header=false)
+    end
 end
diff --git a/prototypes/dataframe_blocks.jl b/prototypes/dataframe_blocks.jl
@@ -17,8 +17,8 @@ end
 
 Base.show(io::IO, dt::DDataFrame) = println("$(nrow(dt))x$(ncol(dt)) DDataFrame. $(length(dt.rrefs)) blocks over $(length(union(dt.procs))) processors")
 
-gather(dt::DDataFrame) = reduce((x,y)->vcat(fetch(x), fetch(y)), dt.rrefs) 
-#convert(::Type{DataFrame}, dt::DDataFrame) = reduce((x,y)->vcat(fetch(x), fetch(y)), dt.rrefs) 
+gather(dt::DDataFrame) = reduce((x,y)->vcat(fetch(x), fetch(y)), dt.rrefs)
+#convert(::Type{DataFrame}, dt::DDataFrame) = reduce((x,y)->vcat(fetch(x), fetch(y)), dt.rrefs)
 
 # internal methods
 function _dims(dt::DDataFrame, rows::Bool=true, cols::Bool=true)
@@ -78,8 +78,8 @@ function as_dataframe(bio::BlockableIO; kwargs...)
         push!(poargs, kwdict[argname])
     end
 
-    po = DataFrames.ParseOptions(kwdict[:header], 
-                kwdict[:separator], 
+    po = DataFrames.ParseOptions(kwdict[:header],
+                kwdict[:separator],
                 kwdict[:quotemark],
                 kwdict[:decimal],
                 kwdict[:nastrings],
@@ -113,7 +113,7 @@ function _check_readtable_kwargs(kwargs...)
         (kw[1] in [:skipstart, :skiprows]) && error("dreadtable does not support $(kw[1])")
     end
     for (idx,kw) in enumerate(kwargs)
-        if (kw[1]==:header) 
+        if (kw[1]==:header)
             (kw[2] != false) && error("dreadtable does not support reading of headers")
             splice!(kwargs, idx)
             break
@@ -125,7 +125,7 @@ end
 
 function dreadtable(b::Block; kwargs...)
     kwargs = _check_readtable_kwargs(kwargs...)
-    if (b.affinity == Blocks.no_affinity) 
+    if (b.affinity == Blocks.no_affinity)
         b.affinity = [[w] for w in workers()]
     end
     rrefs = pmap(x->as_dataframe(x;kwargs...), b; fetch_results=false)
@@ -147,7 +147,7 @@ function dreadtable(io::Union(Base.AsyncStream,IOStream), chunk_sz::Int, merge_c
         rrefs = vcat_refs
         procs = uniqprocs
     end
-    
+
     DDataFrame(rrefs, procs)
 end
 
@@ -221,7 +221,7 @@ function _colranges(t::DataFrame, cnames)
         cc = t[colnames[cidx]]
         for idx in 1:nrows
             ccval = cc[idx]
-            if !isna(ccval) 
+            if !isna(ccval)
                 (isna(_min) || (_min > ccval)) && (_min = ccval)
                 (isna(_max) || (_max < ccval)) && (_max = ccval)
                 _sum += ccval
@@ -407,7 +407,7 @@ function deleterows!(dt::DDataFrame, keep_inds::Vector{Int})
         beg_row = end_row+1
     end
     dt_keep_inds = DDataFrame(split_inds, dt.procs)
-    
+
     pmap((x,y)->begin DataFrames.deleterows!(fetch(x),y[1].data); nothing; end, Block(dt), Block(dt_keep_inds))
     _dims(dt, true, false)
 end
@@ -423,7 +423,7 @@ for f in (:(DataArrays.isna), :complete_cases)
             vcat(pmap(x->($f)(fetch(x)), Block(dt))...)
         end
     end
-end    
+end
 function complete_cases!(dt::DDataFrame)
     pmap(x->begin complete_cases!(fetch(x)); nothing; end, Block(dt))
     _dims(dt, true, true)
@@ -471,7 +471,7 @@ for f in (:colmins, :colmaxs, :colprods, :colsums, :colmeans)
             ($f)(vcat(pmap(x->($f)(fetch(x)), Block(dt))...))
         end
     end
-end    
+end
 
 for f in DataFrames.array_arithmetic_operators
     @eval begin
@@ -500,7 +500,7 @@ ncol(dt::DDataFrame) = dt.ncols
 DataArrays.head(dt::DDataFrame) = remotecall_fetch(dt.procs[1], x->head(fetch(x)), dt.rrefs[1])
 DataArrays.tail(dt::DDataFrame) = remotecall_fetch(dt.procs[end], x->tail(fetch(x)), dt.rrefs[end])
 colnames(dt::DDataFrame) = dt.colindex.names
-function colnames!(dt::DDataFrame, vals) 
+function colnames!(dt::DDataFrame, vals)
     pmap(x->colnames!(fetch(x), vals), Block(dt))
     names!(dt.colindex, vals)
 end
@@ -527,21 +527,21 @@ for f in [:vcat, :hcat, :rbind, :cbind]
         function ($f)(dt::DDataFrame...)
             rrefs = pmap((x...)->($f)([fetch(y) for y in x]...), [Block(a) for a in dt]...; fetch_results=false)
             procs = dt[1].procs
-            DDataFrame(rrefs, procs)   
+            DDataFrame(rrefs, procs)
         end
     end
 end
 
 function Base.merge(dt::DDataFrame, t::DataFrame, bycol, jointype)
     (jointype != "inner") && error("only inner joins are supported")
-    
+
     rrefs = pmap((x)->merge(fetch(x),t), Block(dt); fetch_results=false)
     DDataFrame(rrefs, dt.procs)
 end
 
 function Base.merge(t::DataFrame, dt::DDataFrame, bycol, jointype)
     (jointype != "inner") && error("only inner joins are supported")
-    
+
     rrefs = pmap((x)->merge(t,fetch(x)), Block(dt); fetch_results=false)
     DDataFrame(rrefs, dt.procs)
 end
@@ -555,7 +555,7 @@ function colwise(f::Function, r::Function, dt::DDataFrame)
     combined = hcat(resarr...)
     map(x->r([combined[x, :]...]), 1:size(combined,1))
 end
-function colwise(fns::Vector{Function}, rfns::Vector{Function}, dt::DDataFrame) 
+function colwise(fns::Vector{Function}, rfns::Vector{Function}, dt::DDataFrame)
     nfns = length(fns)
     (nfns != length(rfns)) && error("number of operations must match number of reduce operations")
     resarr = pmap((x)->colwise(fns,fetch(x)), Block(dt))
@@ -568,7 +568,7 @@ function colwise(dt::DDataFrame, s::Vector{Symbol}, reduces::Vector{Function}, c
     resarr = pmap((x)->colwise(fetch(x), s, cn), Block(dt))
     combined = vcat(resarr...)
     resdf = DataFrame()
-    
+
     for (idx,(colname,col)) in enumerate(combined)
         resdf[colname] = (reduces[idx%nfns+1])(col)
     end

diff --git a/prototypes/datastream.jl b/prototypes/datastream.jl
@@ -426,7 +426,7 @@ end
 #   # Now compute covariances during second pass
 #   ns = zeros(Int, p, p)
 #   covariances = datazeros(p, p)
- 
+
 #   for minibatch in ds
 #     for row_index in 1:nrow(minibatch)
 #       for column_index in 1:p