# Arithmetic & Logical Operators, R Data Types

## Variables

First way to declare a variable:  use the `<-`  
`name_of_variable <- value`

Second way to declare a variable:  use the `=`  
`name_of_variable = value`

In [None]:
# Declare variables of different types
# Numeric
x <- 28
class(x)

In [None]:
# String
y <- "R is Fantastic"
class(y)

In [None]:
# Boolean
z <- TRUE
class(z)

In [None]:
# Print variable x
x <- 42
x

In [None]:
#Example:
y  <- 10
y

In [None]:
#Example:
# We call x and y and apply a subtraction
x-y

## Vectors



In [None]:
# Numerical
vec_num <- c(1, 10, 49)
vec_num

In [None]:
# Character 
vec_chr <- c("a", "b", "c")
vec_chr

In [None]:
# Boolean 
vec_bool <-  c(TRUE, FALSE, TRUE)
vec_bool

In [None]:
# Example:

# Create the vectors
vect_1 <- c(1, 3, 5)
vect_2 <- c(2, 4, 6)
# Take the sum of A_vector and B_vector
sum_vect <- vect_1 + vect_2
# Print out total_vector
sum_vect

In [None]:
# Slice the first five rows of the vector
slice_vector <- c(10,9,8,7,6,5,4,3,2,1)
pesho  <- slice_vector[2:5]
pesho

In [None]:
# Faster way to create adjacent values
c(1:10)

## Arithmetic Operators



In [None]:
# Addition
3 + 4

In [None]:
# Multiplication
3*5

In [None]:
# Division
(5+5)/2

In [None]:
# Exponentiation
2^5
2**5

In [None]:
# Modulo
28%%6

## Logical Operators


In [None]:
# Create a vector from 1 to 10
logical_vector <- c(1:10)
logical_vector
logical_vector>5

In [None]:
# Print value strictly above 5
logical_vector[(logical_vector>5)]

In [None]:
# Print 5 and 6
logical_vector <- c(1:10)
logical_vector[(logical_vector>4) & (logical_vector<7)]

# Data manipulation

## Create a Matrix in R

`matrix(data, nrow, ncol, byrow = FALSE)`

`data`: The collection of elements that R will arrange into the rows and columns of the matrix  
`nrow`: Number of rows  
`ncol`: Number of columns  
`byrow`: The rows are filled from the left to the right. We use `byrow = FALSE` (default values), if we want the matrix to be filled by the columns i.e. the values are filled top to bottom.  

In [None]:
# Construct a matrix with 5 rows that contain the numbers 1 up to 10 and byrow =  TRUE 
matrix_a <-matrix(1:10, byrow = TRUE, nrow = 5)
matrix_a

In [None]:
# Print dimension of the matrix with dim()
dim(matrix_a)

In [None]:
# Construct a matrix with 5 rows that contain the numbers 1 up to 10 and byrow =  FALSE
matrix_b <-matrix(1:10, byrow = FALSE, nrow = 5)
matrix_b

In [None]:
# Print dimension of the matrix with dim()
dim(matrix_b)

In [None]:
# You can also create a 4x3 matrix using ncol. R will create 3 columns and fill the row from top to bottom.
matrix_c <-matrix(1:12, byrow = FALSE, ncol = 3)
matrix_c

In [None]:
dim(matrix_c)

In [None]:
# Add a Column to a Matrix with the cbind()

# concatenate c(1:5) to the matrix_a
matrix_a1 <- cbind(c(1:5),matrix_a)
# Check the dimension
matrix_a1

In [None]:
dim(matrix_a1)

In [None]:
# Concatenate matrix
matrix_a2 <-matrix(13:24, byrow = FALSE, ncol = 3)

matrix_c <-matrix(1:12, byrow = FALSE, ncol = 3)
matrix_d <- cbind(matrix_a2, matrix_c)
matrix_d

In [None]:
dim(matrix_d)

In [None]:
# Append
matrix_c <-matrix(1:12, byrow = FALSE, ncol = 3)
# Create a vector of 3 columns
add_row <- c(1:3)
# Append to the matrix
matrix_c <- rbind(matrix_c, add_row)
matrix_c
# Check the dimension
dim(matrix_c)

Select elements one or many elements from a matrix in R programming by using the square brackets [ ].

`matrix_c[1,2]` selects the element at the first row and second column.  
`matrix_c[1:3,2:3]` results in a R slice matrix with the data on the rows 1, 2, 3 and columns 2, 3,  
`matrix_c[,1]` selects all elements of the first column.  
`matrix_c[1,]` selects all elements of the first row.  

In [None]:
matrix_c

In [None]:
slice1 <- matrix_c[1,2]
slice1

In [None]:
slice2 <- matrix_c[1:3,2:3]
slice2

In [None]:
slice3 <- matrix_c[,1]
slice3

In [None]:
slice4 <- matrix_c[1,]
slice4

## Factor in R

Factor in R is a categorical variable that stores both string and integer data values as levels.

Syntax

`factor(x = character(), levels, labels = levels, ordered = is.ordered(x))`  
Arguments:  
`x`: A vector of categorical data in R. Need to be a string or integer, not decimal.  
`Levels`: A vector of possible values taken by x. This argument is optional. The default value is the unique list of items of the vector x.  
`Labels`: Add a label to the x categorical data in R. For example, 1 can take the label `male` while 0, the label `female`.  
`ordered`: Determine if the levels should be ordered in categorical data in R.  

In [None]:
# Create gender vector
gender_vector <- c("Male", "Female", "Female", "Male", "Male")
class(gender_vector)

In [None]:
# Convert gender_vector to a factor
factor_gender_vector <-factor(gender_vector)
factor_gender_vector

In [None]:
class(factor_gender_vector)

In [None]:
# Nominal Categorical Variable
# A categorical variable has several values but the order does not matter. 

# Create a color vector
color_vector <- c('blue', 'red', 'green', 'white', 'black', 'yellow')
# Convert the vector to factor
factor_color <- factor(color_vector)
factor_color

In [None]:
# Create Ordinal categorical vector 
day_vector <- c('evening', 'morning', 'afternoon', 'midday', 'midnight', 'evening')
# Convert `day_vector` to a factor with ordered level
factor_day <- factor(day_vector, order = TRUE, levels =c('morning', 'midday', 'afternoon', 'evening', 'midnight'))
# Levels: morning < midday < afternoon < evening < midnight
# Print the new variable
factor_day

In [None]:
# Count the number of occurence of each level
summary(factor_day)

In [None]:
# Continuous Variables
dataset <- mtcars
class(dataset$mpg)
dataset$mpg

## Create a Data Frame

`data.frame(df, stringsAsFactors = TRUE)`

Arguments:

`df`: It can be a matrix to convert as a data frame or a collection of variables to join  
`stringsAsFactors`: Convert string to factor by default  
We can create a dataframe in R for our first data set by combining four variables of same length.  

In [None]:
# Create a, b, c, d variables
a <- c(10,20,30,40)
b <- c('book', 'pen', 'textbook', 'pencil_case')
c <- c(TRUE,FALSE,TRUE,FALSE)
d <- c(2.5, 8, 10, 7)
# Join the variables to create a data frame
df <- data.frame(a,b,c,d)
df

In [None]:
# Name the data frame
names(df) <- c('ID', 'items', 'store', 'price')
df

In [None]:
# Print the structure
str(df)

In [None]:
# Slice Data Frame
## Select row 1 in column 2
df[1,2]

In [None]:
## Select Rows 1 to 2
df[1:2,]

In [None]:
## Select Columns 1
df[,1]

In [None]:
## Select Rows 1 to 3 and columns 3 to 4
df[1:3, 3:4]

In [None]:
# Slice with columns name
df[, c('ID', 'store')]

In [None]:
#Append a Column to Data Frame

# Create a new vector
quantity <- c(10, 35, 40, 5)
# Add `quantity` to the `df` data frame
df$quantity <- quantity
df

In [None]:
# Select the column ID
df$ID

# Functions in R

## Syntax

`function (arglist)  {`  
`  #Function body`  
`}`  

In [None]:
square_function<- function(n) 
{
  # compute the square of integer `n`
  n^2
}  
# calling the function and passing value 4
square_function(4.32)

## Some useful functions

In [None]:
set.seed(0)
## Create the data
x = rnorm(1000)
x

In [None]:
ts <- cumsum(x)
ts

In [None]:
## Stationary the serie
diff_ts <- diff(ts)
par(mfrow=c(1,2))


In [None]:
# Plot the series
plot(ts, type='l')
plot(diff(ts), type='l')
dt <- cars

In [None]:
## number columns
length(dt)

In [None]:
## number rows
length(dt[,1])

## Math functions

`abs(x)` Takes the absolute value of `x`  
`log(x,base=y)`	Takes the logarithm of `x` with base `y` if base is not specified, returns the natural logarithm  
`exp(x)` Returns the exponential of `x`  
`sqrt(x)` Returns the square root of `x`  
`factorial(x)` Returns the factorial of `x (x!)`  

In [None]:
# sequence of number from 44 to 55 both including incremented by 1
x_vector <- seq(45,55, by = 1)
x_vector

In [None]:
#logarithm
log(x_vector)

In [None]:
#exponential
exp(x_vector)

In [None]:
#squared root
sqrt(x_vector)

In [None]:
#factorial
factorial(x_vector)

## Statistical functions

In [None]:
speed <- dt$speed
speed

In [None]:
# Mean speed of cars dataset
mean(speed)

In [None]:
# Median speed of cars dataset
median(speed)

In [None]:
# Variance speed of cars dataset
var(speed)

In [None]:
# Standard deviation speed of cars dataset
sd(speed)

In [None]:
# Standardize vector speed of cars dataset
head(scale(speed), 5)

In [None]:
# Quantile speed of cars dataset
quantile(speed)

In [None]:
# Summary speed of cars dataset
summary(speed)

## The else if statement

We can further customize the control level with the else if statement. With `else if`, you can add as many conditions as we want. The syntax is:

`if (condition1) {`  
`    expr1`  
`    } else if (condition2) {`  
`    expr2`  
`    } else if  (condition3) {`  
`    expr3`  
`    } else {`  
`    expr4`  
`}`  

In [None]:
# Case:
# Imagine we have three different kind of products with different VAT applied:
# We can write a chain to apply the correct VAT rate to the product a customer bought.

# Create a, b, c, d variables
Categories <- c('A','B','C')
Products <- c('Book, magazine, newspaper, etc..', 'Vegetable, meat, beverage, etc..', 'Tee-shirt, jean, pant, etc..')
VAT <- c('8%','10%','20%')
# Join the variables to create a data frame
df <- data.frame(Categories,Products,VAT)
df

In [None]:
category <- 'A'
price <- 10
if (category =='A'){
  cat('A VAT rate of 8% is applied.','The total price is',price *1.08)  
} else if (category =='B'){
    cat('A VAT rate of 10% is applied.','The total price is',price *1.10)  
} else {
    cat('A VAT rate of 20% is applied.','The total price is',price *1.20)  
}

## For Loop Syntax

`For (i in vector) {`  
`    Expression of i`	
`}`  

R will loop over all the variables `in` `vector` and do the computation written inside the `exp`.  

In [None]:
# Example: iterate over all the elements of a vector and print the current value.

# Create fruit vector
fruit <- c('Apple', 'Orange', 'Passion fruit', 'Banana')
# Create the for statement
for ( i in fruit){ 
 print(i)
}

# Inputs & Outputs

## Read CSV

Comes with the internal library `utils`  
 
`read.csv(file, header = TRUE, sep = ",")`  

Argument:

`file`: PATH where the file is stored  
`header`: confirm if the file has a header or not, by default, the header is set to `TRUE`  
`sep`: the symbol used to split the variable. By default, `,`.  
For windows user:  
`"C:\Users\USERNAME\Downloads\FILENAME.csv"`  

In [None]:
PATH <- 'https://raw.githubusercontent.com/guru99-edu/R-Programming/master/mtcars.csv'                
df <- read.csv(PATH, header =  TRUE, sep = ',')
df

In [None]:
length(df)

In [None]:
class(df$X)

## Read Excel files

Needs `readxl` library  

The function `read_excel()` is of great use when it comes to opening xls and xlsx extention.  

The syntax is:  
`read_excel(PATH, sheet = NULL, range= NULL, col_names = TRUE)`  
arguments:  
`PATH`: Path where the excel is located  
`sheet`: Select the sheet to import. By default, all  
`range`: Select the range to import. By default, all non-null cells  
`col_names`: Select the columns to import. By default, all non-null columns  

In [None]:
# Install and activate library `readxl`  
require(readxl)
library(readxl)
# We can import the spreadsheets from the readxl library and count the number of columns in the first sheet.
readxl_example()

In [None]:
# Store the path of `datasets.xlsx`
example <- readxl_example("datasets.xlsx")
# Import the spreadsheet
df <- read_excel(example)
df

In [None]:
# Count the number of columns
length(df)

## Export to file

`write.csv(df, path)`

arguments
`df`: Dataset to save. Need to be the same name of the data frame in the environment.
`path`: A string. Set the destination path. Path + filename + extension or the filename + extension if the folder is the same as the working directory

For Windows:
`'C:\Users\USERNAME\Documents\'`

In [None]:
# Example
write.csv(df, "table_VAT.csv")

## Simple plots

In [None]:
# read some data
acs <- read.csv(url("http://stat511.cwick.co.nz/homeworks/acs_or.csv"))
acs

In [None]:
# Scatter plot
plot(x = acs$age_husband , y = acs$age_wife, type = 'p')

In [None]:
# Histogram
hist(acs$number_children)

In [None]:
# Bar Plots
counts <- table(acs$bedrooms)
barplot(counts, main="Bedrooms Distribution",  xlab="Number of Bedrooms")

## Export plot

In [None]:
png(file="saving_plot2.png",width=600, height=600)
counts <- table(acs$bedrooms)
barplot(counts, main="Bedrooms Distribution",  xlab="Number of Bedrooms")
dev.off()

# Moving average example

## Environment preparation

In [None]:
install.packages("readxl", lib='C:/Users/Junior/Documents/R/win-library/3.6')
install.packages("tseries", lib='C:/Users/Junior/Documents/R/win-library/3.6')
install.packages("forecast", lib='C:/Users/Junior/Documents/R/win-library/3.6')
install.packages("data.table", lib='C:/Users/Junior/Documents/R/win-library/3.6')
install.packages("TTR", lib='C:/Users/Junior/Documents/R/win-library/3.6')

library(readxl)
library(forecast)
library(tseries)
library(data.table)
library(TTR)

In [None]:
# Load the data
data <- read_xlsx("01. Belgium.xlsx", sheet =1, col_names = TRUE)
# Data preparation
colnames(data) <- c("GEO/TIME", "TimeT", "Y", "X1", "X2", "NOCOL", "CODE", "COUNTRY", "START", "END","TOTOAL_ROWS", "TRAIN_ROWS", "TEST_ROWS")
data

In [None]:
total_rows <- as.numeric(data[1,11])
train_rows <- as.numeric(data[1,12])
test_rows <- as.numeric(data[1,13])
workingNumber<-train_rows+1
data <- data[ ,-1]
data <- data[ , -c(5:12)]
data

## Modelling & Forecasting

In [None]:
i=1
current_row=1
working_Y <-rep(0, total_rows)
working_Y <- as.data.frame(working_Y)
working_SMA <- as.data.frame(shift(SMA(data[,2],n=3),1))
for (i in 1:total_rows) {
  if (current_row <= train_rows ) {
    working_Y[i,1] <- data[i,2]
  }else{ working_Y[i,1] <- working_SMA[i,1]}
  
  working_SMA <- as.data.frame(shift(SMA(working_Y[,1],n=3),1))
  current_row = current_row + 1
}

forecast11 <- as.data.frame(shift(SMA(working_Y, 3),1))
forecast11


## Analysis

In [None]:
# Calculate mean relative error
abs11yf <- abs(forecast11-data[,2])
abs11y <- abs(data[,2])
R11 <<- sum(abs11yf[4:train_rows,1])/sum(abs11y[4:train_rows,1])
S11 <<- sum(abs11yf[(train_rows+1):total_rows,1])/sum(abs11y[(train_rows+1):total_rows,1])

In [None]:
plot.ts(data[,2], type="l", col = "blue", main = "Simple moving average (3 periods) : F(t)=(Y(t-1)+Y(t-2)+Y(t-3))/3" )
lines(forecast11, type="l", col = "red")

cat("\n","\n","Simple moving average (3 periods) : F(t)=(Y(t-1)+Y(t-2)+Y(t-3))/3:  ","\n", "R =", round(R11*100, digits = 2), "%;", "  S =", round(S11*100, digits = 2))