Tips and Tricks

Matt Dowle edited this page Aug 22, 2014 · 1 revision

1. How to update/remove columns of a data.table

set.seed(1)
n=10000
DT = data.table(x=rnorm(10*n),y=rnorm(10*n))

DT[,z:=x*y] #vanilla update note the := operator 
head(DT,2L)
#         x      y        z
#1: -0.6265 0.7914 -0.49580
#2:  0.1836 0.3922  0.07202

DT[,`:=`(u=z+1, v=z-1)] #update several columns at once
head(DT,2L)
#         x      y        z      u      v
#1: -0.6265 0.7914 -0.49580 0.5042 -1.496
#2:  0.1836 0.3922  0.07202 1.0720 -0.928

DT[, c("z","u","v"):=NULL] #remove several columns at once
head(DT,2L)
#         x      y
#1: -0.6265 0.7914
#2:  0.1836 0.3922

DT[, c('u','v') := list(tmp <- x*y, sqrt(abs(tmp)))] #this allows to update by reference and reuse a column just after it is defined.
head(DT,2L)
#         x      y        u      v
#1: -0.6265 0.7914 -0.49580 0.7041
#2:  0.1836 0.3922  0.07202 0.2684