In [None]:
library(caret)
library(ranger)
library(rpart)

In [None]:
### 1. Imports the data sets and cleans up some of the data

In [None]:
adult = read.csv("adult.csv", header = FALSE)
names(adult) <- c("age", "workclass", "fnlwgt", "education", "education_num", "marital_status", "occupation", "relationship", "race", "sex", "capital_gain", "capital_loss", "hours_per_week", "native_country", "50k")
levels(adult$workclass) = c(" Private", " Federal-gov", " Local-gov", " Never-worked", " Private", " Self-emp-inc", " Self-emp-not-inc", " State-gov", " Without-pay"  )
levels(adult$`native_country`) = c("North-America", "Asia", "North-America", "Asia","South-America", "Central-America", "Central-America", "Central-America", "South-America", "Europe", "Europe", "Europe", "Europe", "Central-America", "Central-America", "Europe", "Central-America", "Asia", "Europe", "Asia", "Asia", "Europe", "Europe", "Central-America", "Asia", "Asia", "North-America", "Central-America", "North-America", "South-America", "Asia", "Europe", "Europe", "North-America", "Europe", "Asia", "Asia", "Asia", "Central-America", "North-America", "Asia", "Europe")
levels(adult$education) = c("high-school", "high-school", "high-school", "early-school", "early-school", "early-school", "early-school", "college", "college", "college", "grad", "high-school", "grad", "early-school", "grad", "college")
levels(adult$occupation) = c("blue-collar", "admin","military", "blue-collar", "admin", "blue-collar", "blue-collar", "blue-collar", "other", "service", "other", "military", "admin", "admin", "blue-collar" )
names(adult)[15] = "target"

In [None]:
TITANIC = read.csv("titanic.csv")
TITANIC$Survived = factor(TITANIC$Survived)
TITANIC$Pclass = factor(TITANIC$Pclass)
TITANIC$Name = NULL
TITANIC$Ticket = NULL
TITANIC$PassengerId = NULL
TITANIC$Cabin = NULL
TITANIC$Age[is.na(TITANIC$Age)] = mean(TITANIC$Age, na.rm = TRUE)
levels(TITANIC$Embarked) = c("S","C","Q","S")
names(TITANIC)[1] = "target"

In [None]:
### 2. Trains a 7 fold random forest model.

In [None]:
set.seed(10)
model1 <- train(target~.,data = TITANIC, method = "ranger", 
               trControl = trainControl(method ="cv", number = 7, verboseIter = TRUE))
print(model1)

In [None]:
model2 <- train(target~.,data = adult, method = "ranger", 
               trControl = trainControl(method ="cv", number = 3, verboseIter = TRUE))
print(model2)

In [None]:
### 3. Trains a decision tree with 10 folds

In [None]:
model3 <- train(target~.,data = TITANIC, method = "rpart", 
               trControl = trainControl(method ="cv", number = 10, verboseIter = TRUE))
print(model3)

In [None]:
### 4. prints the plot of the random forest.

In [None]:
plot(model1)
plot(model2)

In [None]:
### 5. Tunes the 3 hyperparameters in the random forest

In [None]:
myGrid = expand.grid(mtry = c(1:(ncol(TITANIC)-1)), splitrule = c("gini","extratrees"),
                             min.node.size = c(1:20))
model4 <- train(target~.,data = TITANIC, method = "ranger", 
               trControl = trainControl(method ="cv", number = 10, verboseIter = TRUE),
               tuneGrid = myGrid)
print(model4)

In [None]:
### 6. Prints out the plot for the random forest model

In [None]:
plot(model4)

In [None]:
### 8. Creates a tuned glmnet model for the data sets, and prints out the plot of the model.

In [None]:
levels(TITANIC$target) = c("Dead", "Survived")
myControl <- trainControl(method = "cv", number = 10, summaryFunction = twoClassSummary,
classProbs = TRUE, verboseIter = TRUE)
model5 <- train(target~.,data = TITANIC, method = "glmnet", trControl = myControl)

myGrid2 = expand.grid(alpha = 0:1,lambda = seq(0.0001, 0.1, length = 10))


model6 <- train(target~.,data = TITANIC, method = "glmnet", 
               trControl = myControl, tuneGrid = myGrid2)
plot(model5)
print(model5)
plot(model6)
print(model6)

In [None]:
levels(adult$target) = c("lessthanfiftyK","greaterthanfiftyK")
model7 <- train(target~.,data = adult, method = "glmnet", trControl = myControl)

model8 <- train(target~.,data = adult, method = "glmnet", 
               trControl = myControl, tuneGrid = myGrid2)
plot(model7)
print(model7)
plot(model8)
print(model8)