In [6]:
library(caret)
library(randomForest)

run_model = function(model_type, trn_data, trn_in_data, trn_out_data, tst_in_data, tst_out_data, fmla){
  if (model_type == 'poisson'){
    print('normalizing')
  	train_col_stds = apply(trn_in_data, 2, sd)
  	train_col_means = colMeans(trn_in_data)
  
  	train_normalized = t((t(trn_in_data)-train_col_means)/train_col_stds)
  	test_normalized = t((t(tst_in_data)-train_col_means)/train_col_stds)
  
  	pca = prcomp(train_normalized)
  
  	trn_preprocessed = predict(pca, train_normalized)
  	tst_preprocessed = predict(pca, test_normalized)
  
  	fmla = as.formula(paste(out_col_name, "~", paste(colnames(trn_preprocessed), collapse="+")))
  
  	train_data = cbind(as.data.frame(trn_preprocessed), num_flooded = model_data[prt$Resample1, out_col_name])
  	trn_in_data = trn_preprocessed
  	tst_in_data = tst_preprocessed
  	output = glm(fmla, data=train_data, family = poisson)
  }
  else if (model_type == 'rf'){
	output = randomForest(fmla, data=trn_data, importance = TRUE, ntree=100, mtry=16)
	impo = as.data.frame(output$importance)
	impo = impo[,1]
  }

  pred_trn = predict(output, newdata = as.data.frame(trn_in_data), type='response')
  pred_tst = predict(output, newdata = as.data.frame(tst_in_data), type='response')
  
  if (model_type == 'rf'){
       return(list(pred_trn, pred_tst, impo))
  }
  else {
       return(list(pred_trn, pred_tst))
  }
  
}

remove_cols= function(l, cols){
    return(l[! l %in% cols])
}



df = read.csv('for_model_avgs.csv', )

colnames(df)

set.seed(5)

df = df[df[,'rd']>0.01,]

cols_to_remove = c('event_name', 'event_date', 'num_flooded')
in_col_names = remove_cols(colnames(df), cols_to_remove)
out_col_name = 'num_flooded'

model_data = df[, append(in_col_names, out_col_name)]
model_data = na.omit(model_data)

import_df = data.frame(matrix(nrow=length(in_col_names)))
all_pred_tst = c()
all_pred_trn = c()
all_tst = c()
all_trn = c()
fomla = as.formula(paste(out_col_name, "~", paste(in_col_names, collapse="+")))
model_types = c('rf', 'poisson')
suffix = 'out'

for (i in 1:101){
  prt = createDataPartition(model_data[, out_col_name], p=0.7)
  train_data = model_data[prt$Resample1,]
  train_in_data = data.frame(train_data[, in_col_names])
  colnames(train_in_data) = in_col_names
  train_out_data = train_data[, out_col_name]
  test_in_data = data.frame(model_data[-prt$Resample1, in_col_names])
  colnames(test_in_data) = in_col_names
  test_out_data = model_data[-prt$Resample1, out_col_name]
  
  for (model in model_types){
	  print(paste("run: ", i, sep = ''))
    
    model_results = run_model(model, train_data, train_in_data, train_out_data, test_in_data, test_out_data, fomla)
	  pred_train = model_results[1]
	  pred_test = model_results[2]

	  all_trn_df = data.frame(train_out_data, unlist(pred_train))
	  colnames(all_trn_df) = c('all_trn', 'all_pred_trn')
	  all_tst_df = data.frame(test_out_data, unlist(pred_test))
	  colnames(all_tst_df) = c('all_tst', 'all_pred_tst')
	  write.table(all_trn_df, paste(model, '_', suffix, '_train.csv', sep=""), append=TRUE,  sep=",", col.names = F)
	  write.table(all_tst_df, paste(model, '_', suffix, '_test.csv', sep=""), append=TRUE,  sep=",", col.names = F)

	  if (model == 'rf'){
      impo = model_results[3]
	    import_df = cbind(import_df, impo)
	  }
	}
}

colnames(import_df) = 1:ncol(import_df)
row.names(import_df) = in_col_names
write.csv(import_df, paste('rf_impo_', suffix, sep=""), append=TRUE)


[1] "run: 1"
[1] "run: 1"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 2"
[1] "run: 2"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 3"
[1] "run: 3"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 4"
[1] "run: 4"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 5"
[1] "run: 5"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 6"
[1] "run: 6"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 7"
[1] "run: 7"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 8"
[1] "run: 8"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 9"
[1] "run: 9"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 10"
[1] "run: 10"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 11"
[1] "run: 11"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 12"
[1] "run: 12"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 13"
[1] "run: 13"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 14"
[1] "run: 14"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 15"
[1] "run: 15"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 16"
[1] "run: 16"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 17"
[1] "run: 17"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 18"
[1] "run: 18"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 19"
[1] "run: 19"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 20"
[1] "run: 20"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 21"
[1] "run: 21"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 22"
[1] "run: 22"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 23"
[1] "run: 23"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 24"
[1] "run: 24"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 25"
[1] "run: 25"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 26"
[1] "run: 26"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 27"
[1] "run: 27"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 28"
[1] "run: 28"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 29"
[1] "run: 29"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 30"
[1] "run: 30"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 31"
[1] "run: 31"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 32"
[1] "run: 32"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 33"
[1] "run: 33"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 34"
[1] "run: 34"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 35"
[1] "run: 35"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 36"
[1] "run: 36"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 37"
[1] "run: 37"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 38"
[1] "run: 38"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 39"
[1] "run: 39"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 40"
[1] "run: 40"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 41"
[1] "run: 41"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 42"
[1] "run: 42"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 43"
[1] "run: 43"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 44"
[1] "run: 44"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 45"
[1] "run: 45"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 46"
[1] "run: 46"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 47"
[1] "run: 47"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 48"
[1] "run: 48"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 49"
[1] "run: 49"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 50"
[1] "run: 50"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 51"
[1] "run: 51"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 52"
[1] "run: 52"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 53"
[1] "run: 53"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 54"
[1] "run: 54"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 55"
[1] "run: 55"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 56"
[1] "run: 56"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 57"
[1] "run: 57"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 58"
[1] "run: 58"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 59"
[1] "run: 59"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 60"
[1] "run: 60"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 61"
[1] "run: 61"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 62"
[1] "run: 62"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 63"
[1] "run: 63"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 64"
[1] "run: 64"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 65"
[1] "run: 65"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 66"
[1] "run: 66"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 67"
[1] "run: 67"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 68"
[1] "run: 68"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 69"
[1] "run: 69"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 70"
[1] "run: 70"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 71"
[1] "run: 71"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 72"
[1] "run: 72"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 73"
[1] "run: 73"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 74"
[1] "run: 74"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 75"
[1] "run: 75"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 76"
[1] "run: 76"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 77"
[1] "run: 77"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 78"
[1] "run: 78"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 79"
[1] "run: 79"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 80"
[1] "run: 80"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 81"
[1] "run: 81"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 82"
[1] "run: 82"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 83"
[1] "run: 83"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 84"
[1] "run: 84"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 85"
[1] "run: 85"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 86"
[1] "run: 86"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 87"
[1] "run: 87"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 88"
[1] "run: 88"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 89"
[1] "run: 89"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 90"
[1] "run: 90"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 91"
[1] "run: 91"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 92"
[1] "run: 92"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 93"
[1] "run: 93"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 94"
[1] "run: 94"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 95"
[1] "run: 95"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 96"
[1] "run: 96"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 97"
[1] "run: 97"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 98"
[1] "run: 98"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 99"
[1] "run: 99"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 100"
[1] "run: 100"
[1] "normalizing"


“glm.fit: fitted rates numerically 0 occurred”

[1] "run: 101"
[1] "run: 101"
[1] "normalizing"


“attempt to set 'append' ignored”