Skip to content

Commit

Permalink
adding more beaches for modeling
Browse files Browse the repository at this point in the history
  • Loading branch information
nicklucius committed Sep 12, 2017
1 parent c0c180a commit 5a4c44f
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 17 deletions.
Binary file modified Data/df.Rds
Binary file not shown.
42 changes: 25 additions & 17 deletions Master.R
Expand Up @@ -37,12 +37,20 @@ rm(list=ls()[!ls() %in% keep])
# Comment out the predictors that you do not want to use
#-------------------------------------------------------------------------------

# remove older years
df <- df[df$Year %in% c("2012", "2013", "2014", "2015", "2016"),]

# set predictors
df_model <- df[, c("Escherichia.coli", #dependent variable
"Client.ID",
# "precipProbability",
# "Water.Level",
"Rogers_Escherichia.coli",
"n12th_Escherichia.coli",
"Foster_Escherichia.coli",
"North_Avenue_Escherichia.coli",
"n39th_Escherichia.coli",
"Albion_Escherichia.coli",
# "Rogers_Escherichia.coli",
# "Howard_Escherichia.coli",
# "n57th_Escherichia.coli",
# "n63rd_Escherichia.coli",
Expand All @@ -52,16 +60,16 @@ df_model <- df[, c("Escherichia.coli", #dependent variable
# "Rainbow_Escherichia.coli",
# "Ohio_DNA.Geo.Mean",
# "North_Avenue_DNA.Geo.Mean",
"n63rd_DNA.Geo.Mean",
"South_Shore_DNA.Geo.Mean",
"Montrose_DNA.Geo.Mean",
"Calumet_DNA.Geo.Mean",
"Rainbow_DNA.Geo.Mean",
"Date", #Must use for splitting data, not included in model
"Predicted.Level" #Must use for USGS model comparison, not included in model
# "n63rd_DNA.Geo.Mean",
# "South_Shore_DNA.Geo.Mean",
# "Montrose_DNA.Geo.Mean",
# "Calumet_DNA.Geo.Mean",
# "Rainbow_DNA.Geo.Mean",
"Date" #Must use for splitting data, not included in model
# "Predicted.Level" #Must use for USGS model comparison, not included in model
)]
# to run without USGS for comparison, comment out "Predicted.Level" above and uncomment next line
# df_model$Predicted.Level <- 1 #meaningless value
df_model$Predicted.Level <- 1 #meaningless value

#-------------------------------------------------------------------------------
# CHOOSE TEST/TRAIN SETS
Expand All @@ -70,8 +78,8 @@ df_model <- df[, c("Escherichia.coli", #dependent variable
# If you set kFolds to FALSE, the model will use trainStart, trainEnd, etc. (see below)
#-------------------------------------------------------------------------------

kFolds <- TRUE #If TRUE next 4 lines will not be used but cannot be commented out
trainStart <- "2006-01-01"
kFolds <- FALSE #If TRUE next 4 lines will not be used but cannot be commented out
trainStart <- "2012-01-01"
trainEnd <- "2015-12-31"
testStart <- "2016-01-01"
testEnd <- "2016-12-31"
Expand Down Expand Up @@ -107,25 +115,25 @@ downsample <- FALSE #If FALSE comment out the next 3 lines
#-------------------------------------------------------------------------------

excludeBeaches <- c(
# "12th",
"12th",
# "31st",
# "39th",
"39th",
# "57th",
"63rd",
# "Albion",
"Albion",
"Calumet",
# "Foster",
"Foster",
# "Howard",
# "Jarvis",
# "Juneway",
# "Leone",
"Montrose",
# "North Avenue",
"North Avenue",
# "Oak Street",
# "Ohio",
# "Osterman",
"Rainbow",
"Rogers",
# "Rogers",
"South Shore"
)

Expand Down
2 changes: 2 additions & 0 deletions R/20_Clean.R
Expand Up @@ -129,6 +129,8 @@ names(df)[names(df) == "North Avenue_DNA.Geo.Mean"] <- "North_Avenue_DNA.Geo.Mea
names(df)[names(df) == "Oak Street_DNA.Geo.Mean"] <- "Oak_Street_DNA.Geo.Mean"
names(df)[names(df) == "South Shore_DNA.Geo.Mean"] <- "South_Shore_DNA.Geo.Mean"
names(df)[names(df) == "South Shore_Escherichia.coli"] <- "South_Shore_Escherichia.coli"
names(df)[names(df) == "North Avenue_Escherichia.coli"] <- "North_Avenue_Escherichia.coli"



#remove times from Date variable
Expand Down

0 comments on commit 5a4c44f

Please sign in to comment.