## select() Function from dplyr

In [None]:
# select() is a function from dplyr R package that is used to select data frame variables by name, by index

    # Syntax of select()
    select(x, variables_to_select)

In [1]:
df <- data.frame(
  id = c(10,11,12,13),
  name = c('sai','ram','deepika','sahithi'),
  gender = c('M','M','F','F'),
  dob = as.Date(c('1990-10-02','1981-3-24','1987-6-14','1985-8-16')),
  state = c('CA','NY','DE',NA),
  row.names=c('r1','r2','r3','r4')
)

df

Unnamed: 0_level_0,id,name,gender,dob,state
Unnamed: 0_level_1,<dbl>,<chr>,<chr>,<date>,<chr>
r1,10,sai,M,1990-10-02,CA
r2,11,ram,M,1981-03-24,NY
r3,12,deepika,F,1987-06-14,DE
r4,13,sahithi,F,1985-08-16,


In [6]:
library('dplyr')

# Select Variables by Index Position

df %>% select(2,3)

Unnamed: 0_level_0,name,gender
Unnamed: 0_level_1,<chr>,<chr>
r1,sai,M
r2,ram,M
r3,deepika,F
r4,sahithi,F


In [7]:
# Select columns by list of index or position

df %>% select(c(2,3))

Unnamed: 0_level_0,name,gender
Unnamed: 0_level_1,<chr>,<chr>
r1,sai,M
r2,ram,M
r3,deepika,F
r4,sahithi,F


In [8]:
# Select columns by index range
df %>% select(2:3)

Unnamed: 0_level_0,name,gender
Unnamed: 0_level_1,<chr>,<chr>
r1,sai,M
r2,ram,M
r3,deepika,F
r4,sahithi,F


In [9]:
# Select Variables by Name
df %>% select('name','gender')

Unnamed: 0_level_0,name,gender
Unnamed: 0_level_1,<chr>,<chr>
r1,sai,M
r2,ram,M
r3,deepika,F
r4,sahithi,F


In [10]:
df %>% select(c('name','gender'))

Unnamed: 0_level_0,name,gender
Unnamed: 0_level_1,<chr>,<chr>
r1,sai,M
r2,ram,M
r3,deepika,F
r4,sahithi,F


In [11]:
# Select All Variables Between 2 Variables

df %>% select('name':'state')

Unnamed: 0_level_0,name,gender,dob,state
Unnamed: 0_level_1,<chr>,<chr>,<date>,<chr>
r1,sai,M,1990-10-02,CA
r2,ram,M,1981-03-24,NY
r3,deepika,F,1987-06-14,DE
r4,sahithi,F,1985-08-16,


In [12]:
# Select All Variables that starts with

df %>% select(starts_with('gen'))

Unnamed: 0_level_0,gender
Unnamed: 0_level_1,<chr>
r1,M
r2,M
r3,F
r4,F


In [13]:
# Select All Variables that ends with
df %>% select(ends_with('e'))

Unnamed: 0_level_0,name,state
Unnamed: 0_level_1,<chr>,<chr>
r1,sai,CA
r2,ram,NY
r3,deepika,DE
r4,sahithi,


In [14]:
# Select Variables containing character

df %>% select(contains('a'))

Unnamed: 0_level_0,name,state
Unnamed: 0_level_1,<chr>,<chr>
r1,sai,CA
r2,ram,NY
r3,deepika,DE
r4,sahithi,


In [15]:
# Select All Numeric Variables

df %>% select_if(is.numeric)

Unnamed: 0_level_0,id
Unnamed: 0_level_1,<dbl>
r1,10
r2,11
r3,12
r4,13


In [17]:
# Use Select using negation
df %>% select(-c('name','gender'))

Unnamed: 0_level_0,id,dob,state
Unnamed: 0_level_1,<dbl>,<date>,<chr>
r1,10,1990-10-02,CA
r2,11,1981-03-24,NY
r3,12,1987-06-14,DE
r4,13,1985-08-16,


In [18]:
# dplyr - Select columns not start with a string
df %>% select(-starts_with('gen'))

Unnamed: 0_level_0,id,name,dob,state
Unnamed: 0_level_1,<dbl>,<chr>,<date>,<chr>
r1,10,sai,1990-10-02,CA
r2,11,ram,1981-03-24,NY
r3,12,deepika,1987-06-14,DE
r4,13,sahithi,1985-08-16,


In [20]:
read.csv("sampledata\\vehicle.csv") -> data

In [22]:
data2 = data %>% 
          select(-starts_with("user."), -starts_with("milestone."), 
         -starts_with("pull_"), -ends_with("url"))

head(data2)

Unnamed: 0_level_0,Vehicle,fm,Mileage,lh,lc,mc,State
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
1,1,0,863,1.1,66.3,697.23,MS
2,2,10,4644,2.4,233.03,119.66,CA
3,3,15,16330,4.2,325.08,175.46,WI
4,4,0,13,1.0,66.64,0.0,OR
5,5,13,22537,4.5,328.66,175.46,AZ
6,6,21,40931,3.1,205.28,175.46,FL
