# DataFrame Basics

In [27]:
using DataFrames, CSV, SQLite, Query

┌ Info: Precompiling Query [1a8c2f83-1ff3-5112-b086-8aa67b057ba1]
└ @ Base loading.jl:1278


In [10]:
df₁ = DataFrame(A = 1:4, B = ["M", "F","F","M"])

Unnamed: 0_level_0,A,B
Unnamed: 0_level_1,Int64,String
1,1,M
2,2,F
3,3,F
4,4,M


In [6]:
df₁.A

4-element Array{Int64,1}:
 1
 2
 3
 4

In [8]:
df₁[!,:A]

4-element Array{Int64,1}:
 1
 2
 3
 4

In [9]:
df₁[:,:A]

4-element Array{Int64,1}:
 1
 2
 3
 4

In [12]:
df₁[:,1]

4-element Array{Int64,1}:
 1
 2
 3
 4

In [13]:
df₁.A === df₁[!,:A] #actual column in the dataframe

true

In [15]:
df₁.A === df₁[:,:A] #copy of column in the dataframe

false

In [16]:
firstcolumn = :A

:A

In [17]:
df₁[!, firstcolumn] === df₁.A

true

In [18]:
names(df₁)

2-element Array{Symbol,1}:
 :A
 :B

## Build column by column

In [7]:
df₂ = DataFrame()

In [8]:
df₂.A = 1:8

1:8

In [9]:
df₂.B = ["M", "F", "F", "M", "F", "M", "M", "F"]

8-element Array{String,1}:
 "M"
 "F"
 "F"
 "M"
 "F"
 "M"
 "M"
 "F"

In [11]:
size(df₂,1) # size along the first dimension - rows

8

In [13]:
size(df₂,2) # size along the second dimension - columns

2

In [14]:
size(df₂)

(8, 2)

## Build Row by Row

In [19]:
df₃ = DataFrame(A = Int[], B = String[]) # make an empty dataframe requiring first column have integers and second have strings

Unnamed: 0_level_0,A,B
Unnamed: 0_level_1,Int64,String


In [20]:
push!(df₃, (1, "M"))

Unnamed: 0_level_0,A,B
Unnamed: 0_level_1,Int64,String
1,1,M


In [21]:
push!(df₃, Dict(:B => "F", :A => 3))

Unnamed: 0_level_0,A,B
Unnamed: 0_level_1,Int64,String
1,1,M
2,3,F


## Constructing from other table type

In [22]:
df₄= DataFrame(a=[1,2,3], b=[:a,:b,:c])

Unnamed: 0_level_0,a,b
Unnamed: 0_level_1,Int64,Symbol
1,1,a
2,2,b
3,3,c


In [25]:
CSV.write("dataframe.csv",df₄)

"dataframe.csv"

In [28]:
df₄ = df₄ |> @map({a=_.a+1, _.b}) |> DataFrame

Unnamed: 0_level_0,a,b
Unnamed: 0_level_1,Int64,Symbol
1,2,a
2,3,b
3,4,c


In [30]:
df₅ = DataFrame(A = 1:2:1000, B = repeat(1:10, inner=50), C = 1:500)

Unnamed: 0_level_0,A,B,C
Unnamed: 0_level_1,Int64,Int64,Int64
1,1,1,1
2,3,1,2
3,5,1,3
4,7,1,4
5,9,1,5
6,11,1,6
7,13,1,7
8,15,1,8
9,17,1,9
10,19,1,10


In [31]:
first(df₅,6)

Unnamed: 0_level_0,A,B,C
Unnamed: 0_level_1,Int64,Int64,Int64
1,1,1,1
2,3,1,2
3,5,1,3
4,7,1,4
5,9,1,5
6,11,1,6


In [33]:
last(df₅,6)

Unnamed: 0_level_0,A,B,C
Unnamed: 0_level_1,Int64,Int64,Int64
1,989,10,495
2,991,10,496
3,993,10,497
4,995,10,498
5,997,10,499
6,999,10,500


In [34]:
df₅[1:5,:]

Unnamed: 0_level_0,A,B,C
Unnamed: 0_level_1,Int64,Int64,Int64
1,1,1,1
2,3,1,2
3,5,1,3
4,7,1,4
5,9,1,5


In [35]:
df₅[[1,5,10],:] # Rows 1,5,10 and all columns

Unnamed: 0_level_0,A,B,C
Unnamed: 0_level_1,Int64,Int64,Int64
1,1,1,1
2,9,1,5
3,19,1,10


In [36]:
df₅[:,[:A, :B]]

Unnamed: 0_level_0,A,B
Unnamed: 0_level_1,Int64,Int64
1,1,1
2,3,1
3,5,1
4,7,1
5,9,1
6,11,1
7,13,1
8,15,1
9,17,1
10,19,1


In [39]:
df₅[[3,1],[:C]] #rows 3 and 1, column C

Unnamed: 0_level_0,C
Unnamed: 0_level_1,Int64
1,3
2,1


In [41]:
df₅[:,[:A]] # the [:A] returns a dataframe object from A

Unnamed: 0_level_0,A
Unnamed: 0_level_1,Int64
1,1
2,3
3,5
4,7
5,9
6,11
7,13
8,15
9,17
10,19


In [42]:
df₅[:,:A] #Returns an vector of elements from A

500-element Array{Int64,1}:
   1
   3
   5
   7
   9
  11
  13
  15
  17
  19
  21
  23
  25
   ⋮
 977
 979
 981
 983
 985
 987
 989
 991
 993
 995
 997
 999