# Regular Expressions

### Regex in R

In [None]:
grep("@.*", c("testing@testing.com", "not an email", "test2@testing.com"))

In [None]:
grep("@.*", c("testing@testing.com", "not an email", "test2@testing.com"), value=TRUE)

In [None]:
grep("@.*", "@newdomain.com", c("testing@testing.com", "not an email", "test2@testing.com"), value=TRUE)

In [None]:
matches <- regexpr("@.*", c("testing@testing.com", "not an email", "test2@testing.com"))
regmatches(c("test@testing.com", "not an email", "test2@testing.com"), matches)

### Problem Solving

In [None]:
Name <- c("John Doe", "John Doe", "Mark Mann", "Barry Goode")
Email <- c("doej@example.com", "jadoe@example.ca", "mmann@example.ca", "bgoode@example.com")
email_df <- data.frame(Name, Email)
email_df

Name,Email
<chr>,<chr>
John Doe,doej@example.com
John Doe,jadoe@example.ca
Mark Mann,mmann@example.ca
Barry Goode,bgoode@example.com


In [None]:
matches <- regexpr("@.*\\.", email_df[, "Email"])
email_df[, "Domain"] <-  regmatches(email_df[, "Email"], matches)
email_df

Name,Email,Domain
<chr>,<chr>,<chr>
John Doe,doej@example.com,@example.
John Doe,jadoe@example.ca,@example.
Mark Mann,mmann@example.ca,@example.
Barry Goode,bgoode@example.com,@example.


# Date Format in R

In [None]:
Actor.Name <- c("Leonardo Dicaprio", "Eddie REdmayne", "Matthew McConaughey", "Daniel Day-Lewis")
Date.of.Birth <- c(153360000, 379123200, -5011200, -400032000)
bestActors <- data.frame(Actor.Name, Date.of.Birth)
bestActors

Actor.Name,Date.of.Birth
<chr>,<dbl>
Leonardo Dicaprio,153360000
Eddie REdmayne,379123200
Matthew McConaughey,-5011200
Daniel Day-Lewis,-400032000


In [None]:
actors.birthday <- as.POSIXct(bestActors$Date.of.Birth, origin = "1970-01-01")
actors.birthday

[1] "1974-11-11 UTC" "1982-01-06 UTC" "1969-11-04 UTC" "1957-04-29 UTC"

In [None]:
actors.birthday <- as.Date(actors.birthday)
actors.birthday

In [None]:
Actress.Name <- c("Brie Larson", "Julianne Moore", "Cate Blanchett", "Jennifer Lawrence")
Date.of.Birth <- c("1989/10/01", "1960/12/03", "1869/05/14", "1990/08/15")
bestActresses <- data.frame(Actor.Name, Date.of.Birth)
bestActresses

Actor.Name,Date.of.Birth
<chr>,<chr>
Leonardo Dicaprio,1989/10/01
Eddie REdmayne,1960/12/03
Matthew McConaughey,1869/05/14
Daniel Day-Lewis,1990/08/15


In [None]:
actresses.birthday <- as.Date(bestActresses$Date.of.Birth, "%Y/%m/%d")
actresses.birthday

In [None]:
as.Date("27/06/94", "%d/%m/%y")

In [None]:
as.Date("1994/06/27") - as.Date("1959/01/01")

Time difference of 12961 days

In [None]:
as.Date("1994/06/27") > as.Date("1959/01/01")

In [None]:
as.Date("1994/06/27") - 14

In [None]:
weekdays(Sys.Date())

In [None]:
months(Sys.Date())

In [None]:
quarters(Sys.Date())

In [None]:
julian(Sys.Date())

In [None]:
seq(Sys.Date(), by="month", length.out=4)

# Debugging

### Producing an error

In [None]:
"a" + 10

ERROR: Error in "a" + 10: non-numeric argument to binary operator


### Error halts execution

In [None]:
for (i in 1:3) {
  print(i + "a")
}

ERROR: Error in i + "a": non-numeric argument to binary operator


### Error catching with tryCatch



In [None]:
tryCatch(10+10)

In [None]:
tryCatch("a"+10)

ERROR: Error in "a" + 10: non-numeric argument to binary operator


In [None]:
tryCatch("a"+10, error=function(a) print("Error"))

[1] "Error"


In [None]:
tryCatch("a"+10, error=function(a) print("10a"))

[1] "10a"


In [None]:
tryCatch(
  for (i in 1:3) {
    print(i + "a")
  },
  error=function(e) print(e)
)

<simpleError in i + "a": non-numeric argument to binary operator>


In [None]:
as.integer("A")

“NAs introduced by coercion”


In [None]:
tryCatch(as.integer("A"), error=function(e) print("warning"))

“NAs introduced by coercion”
