### Selection and Indexing the Dataframes

In [1]:
# For practice purpose let us make a dataframe from vectors
# Making a data frame from scratch
Days <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")
Temps <- c(32,34,33.5,31,33,33.8,35)
Rain <- c(T,T,F,F,T,F,T)
df1<- data.frame(Days,Temps,Rain)
df1

Days,Temps,Rain
<fct>,<dbl>,<lgl>
Mon,32.0,True
Tue,34.0,True
Wed,33.5,False
Thu,31.0,False
Fri,33.0,True
Sat,33.8,False
Sun,35.0,True


In [2]:
# Grabbing a column from df
df1['Days'] #With bracket notation. Notice this returns a df object

Days
<fct>
Mon
Tue
Wed
Thu
Fri
Sat
Sun


In [3]:
df1$Days # with $ notation, notice, this returns a vector object

In [4]:
df1[,1] # This is conventional df[row_start:row_end,col_start:col_end]

#### Subset for filtering the data

In [5]:
# syntax subset (df, subset = (logical condition))
subset(df1, subset= (Rain == TRUE))


Unnamed: 0_level_0,Days,Temps,Rain
Unnamed: 0_level_1,<fct>,<dbl>,<lgl>
1,Mon,32,True
2,Tue,34,True
5,Fri,33,True
7,Sun,35,True


#### Order function


In [6]:
sorted.temp <- order(df1['Temps'])

In [7]:
sorted.temp

In [8]:
df1[sorted.temp,] # using this as rows index we can get a sorted low to high 

Unnamed: 0_level_0,Days,Temps,Rain
Unnamed: 0_level_1,<fct>,<dbl>,<lgl>
4,Thu,31.0,False
1,Mon,32.0,True
5,Fri,33.0,True
3,Wed,33.5,False
6,Sat,33.8,False
2,Tue,34.0,True
7,Sun,35.0,True


#### 4 Ways of getting a column data into a vector

In [9]:
# Let us use builtin data for this
head(mtcars)

Unnamed: 0_level_0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Mazda RX4,21.0,6,160,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2
Valiant,18.1,6,225,105,2.76,3.46,20.22,1,0,3,1


In [10]:
# Using $ notation
mtcars$mpg

In [11]:
# Index notation for column 1
mtcars[,1]

In [13]:
# Index notation with column name
mtcars[,'mpg']

In [14]:
# double bracket notation 
mtcars[['mpg']]

### 2 ways of accessing the column data as a dataframe


In [15]:
mtcars['mpg']

Unnamed: 0_level_0,mpg
Unnamed: 0_level_1,<dbl>
Mazda RX4,21.0
Mazda RX4 Wag,21.0
Datsun 710,22.8
Hornet 4 Drive,21.4
Hornet Sportabout,18.7
Valiant,18.1
Duster 360,14.3
Merc 240D,24.4
Merc 230,22.8
Merc 280,19.2


In [16]:
mtcars[1]

Unnamed: 0_level_0,mpg
Unnamed: 0_level_1,<dbl>
Mazda RX4,21.0
Mazda RX4 Wag,21.0
Datsun 710,22.8
Hornet 4 Drive,21.4
Hornet Sportabout,18.7
Valiant,18.1
Duster 360,14.3
Merc 240D,24.4
Merc 230,22.8
Merc 280,19.2


In [20]:
head(mtcars[c('mpg','cyl')]) # getting multiple columns as a dataframe

Unnamed: 0_level_0,mpg,cyl
Unnamed: 0_level_1,<dbl>,<dbl>
Mazda RX4,21.0,6
Mazda RX4 Wag,21.0,6
Datsun 710,22.8,4
Hornet 4 Drive,21.4,6
Hornet Sportabout,18.7,8
Valiant,18.1,6


### Adding new rows to a dataframe

In [31]:
#let us work on a newly created dataframe
# Creating a dataframe from vectors
AGE <- c(21,23,28,27,30)
NAME <- c('Raj',"Rani","Laila",'John',"Julia")
GENDER <- c('Male','Female','Female','Male','Female')
studentdf <- data.frame(NAME,AGE,GENDER)
studentdf

NAME,AGE,GENDER
<fct>,<dbl>,<fct>
Raj,21,Male
Rani,23,Female
Laila,28,Female
John,27,Male
Julia,30,Female


In [32]:
df_new <- data.frame(NAME = 'Latha', AGE = 31, GENDER = 'Female')

In [33]:
df_new

NAME,AGE,GENDER
<fct>,<dbl>,<fct>
Latha,31,Female


In [34]:
studentdf <- rbind(studentdf,df_new) # Using R bind add the rows together

In [35]:
studentdf

NAME,AGE,GENDER
<fct>,<dbl>,<fct>
Raj,21,Male
Rani,23,Female
Laila,28,Female
John,27,Male
Julia,30,Female
Latha,31,Female


#### Aggregrate operations

In [1]:
head(mtcars)

Unnamed: 0_level_0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Mazda RX4,21.0,6,160,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2
Valiant,18.1,6,225,105,2.76,3.46,20.22,1,0,3,1


In [4]:
aggregate(x = mtcars$mpg, by = list(cylinders = mtcars$cyl), FUN = mean)
# x = column on which the aggregation to be performed
# FUN = function to be performed on the x
# by = list(variable for which the grouping is needed)

cylinders,x
<dbl>,<dbl>
4,26.66364
6,19.74286
8,15.1
