In [1]:
# Vectors are strings of numbers, characters or logical data (one-dimension array).
# In other words, a vector is a simple tool to store your grouped data.
# In R, you create a vector with the combine function c().
# You place the vector elements separated by a comma between the brackets.
# Vectors will be very useful in the future as they allow you to apply operations on a series of data easily.
# Note that the items in a vector must be of the same class, for example all should be either number, character, or logical.

In [2]:
#numeric vectors
release_year <- c(1985, 1999, 2015, 1964)
release_year

In [3]:
#character vector
# Create genre vector and assign values to it 
titles <- c("Toy Story", "Akira", "The Breakfast Club")
titles

In [4]:
#logical veectors
titles == "Akira" # which item in `titles` is equal to "Akira"?

In [5]:
###vector operations
#add to a vector
release_year <- c(release_year, 2016:2018)
release_year

In [6]:
#find length of a vector
release_year
length(release_year)

In [7]:
#head and tail of a vector
head(release_year) #first six items
head(release_year, 2) #first 2
tail(release_year) #last six items
tail(release_year, 2) #last two items

In [8]:
#sort a vector
sort(release_year)

In [9]:
#sort in descending order
sort(release_year, decreasing = TRUE)

In [10]:
#min and max values
min(release_year)
max(release_year)

In [11]:
#average
cost_2014 <- c(8.6, 8.5, 8.1)

# sum results in the sum of all elements in the vector
avg_cost_2014 <- sum(cost_2014)/3
avg_cost_2014

In [12]:
#mean function
mean_cost_2014 <- mean(cost_2014)
mean_cost_2014

In [13]:
#give names to values in a vector
#Creating a year vector
release_year <- c(1985, 1999, 2010, 2002)

#Assigning names
names(release_year) <- c("The Breakfast Club", "American Beauty", "Black Swan", "Chicago")

release_year

In [14]:
release_year[c("American Beauty", "Chicago")]

In [15]:
release_year[1] + 100 #adding 100 to the first item changes the year

In [17]:
#retrive names
names(release_year)[1:3]

In [18]:
#summarize vectors
summary(cost_2014)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   8.10    8.30    8.50    8.40    8.55    8.60 

In [19]:
#make logical comparisons across multple vectors
movies_years <- c(1998, 2010, 2016)
movies_years > 2014

In [20]:
#equal and not equal to
movies_years == 2015 # is equal to 2015?
movies_years != 2015

In [21]:
#subsetting vectors
movie_years <- c(1985, 1999, 2002, 2010, 2012)
movie_years[2] #second item

In [22]:
#retrieve multiple items
movie_years[c(1,3)] #first and third items

In [23]:
#negative indexing, slice
titles <- c("Black Swan", "Jumanji", "City of God", "Toy Story", "Casino")
titles[-1]

In [24]:
#not available
age_restric <- c(14, 12, 10, NA, 18, NA)
age_restric

In [25]:
release_year[movie_years > 2000] #returns a vector for elements that returned TRUE for the condition

In [26]:
release_year
release_year[c(T, F, F, F)] #returns the values that are TRUE

In [27]:
##Factors

In [28]:
# Factors are variables in R which take on a limited number of different values;
# such variables are often refered to as categorical variables. The difference between a categorical
# variable and a continuous variable is that a categorical variable can belong to a limited number of categories.
genre_vector <- c("Comedy", "Animation", "Crime", "Comedy", "Animation")
genre_vector

In [29]:
#create factor level
genre_factor <- as.factor(genre_vector)
levels(genre_factor)

In [30]:
summary(genre_factor) #summarise factors

In [31]:
sort(summary(genre_factor)) #sorts values by ascending order

In [32]:
#ordered factors
movie_length <- c("Very Short", "Short", "Medium","Short", "Long",
                        "Very Short", "Very Long")
movie_length

In [33]:
movie_length_ordered <- factor(movie_length, ordered = TRUE , 
                                 levels = c("Very Short" , "Short" , "Medium", 
                                            "Long","Very Long"))
movie_length_ordered

In [34]:
#summary
summary(movie_length_ordered)