**Iris Dataset ML Exercises**

In [189]:
from sklearn.datasets import load_iris # Iris dataset
from sklearn.model_selection import KFold # K-fold Cross Validation
from sklearn import tree # Decision Tree
from sklearn import ensemble # Random forest
from sklearn import metrics # Accuracy scores

import graphviz

# Load in our dataset
irisData = load_iris()

<h1>Decision Tree Model</h1>

Initial experimentation on Decision Trees

In [183]:
# Decision tree initialization
classTree = tree.DecisionTreeClassifier()

# Decision tree training (induction + pruning)
classTree = classTree.fit(irisData.data, irisData.target)

# Decision tree visualization
dotData = tree.export_graphviz(classTree, out_file = None, 
                     feature_names = irisData.feature_names,  
                     class_names = irisData.target_names,  
                     filled = True, rounded = True,  
                     special_characters = True)  
# graph = graphviz.Source(dotData)  
# graph.render("Iris")

<h1>Decision Tree Model with 5-fold Cross Validation</h1>

Wrapping decision trees work with K-fold cross validation

In [184]:
# Cross validation parameters
k = 5
kFold = KFold(k, True, 1)

# Collected scores
allScores = []

# Needed datasets
trainSets = []
testSets = []
trainTargets = []
testTargets = []

for train, test in kFold.split(irisData.data):
    # Training values
    trainingSet = [irisData.data[i] for i in train] 
    trainSets.append(trainingSet)
    trainingTargs = [irisData.target[i] for i in train] 
    trainTargets.append(trainingTargs)
    
    # Testing values
    testingSet = [irisData.data[i] for i in test] 
    testSets.append(testingSet)
    testingTargs = [irisData.target[i] for i in test] 
    testTargets.append(testingTargs)

In [185]:
# Training and Testing for each validation fold
for i in range(0, k):
    # Tree init
    irisTree = tree.DecisionTreeClassifier()
    
    # Tree training
    irisTree = irisTree.fit(trainSets[i], trainTargets[i])
    
    # Tree testing
    testPredict = irisTree.predict(testSets[i])
    
    # Tree accuracy
    accuracy = metrics.accuracy_score(testTargets[i], testPredict)
    
    # Recording accuracy score
    allScores.append(accuracy)

In [186]:
# Results
for score in allScores:
    print("Accuracy: {0:0.4f}".format(score))
# Summary of scores
final = np.mean(allScores)
print("\nMean Accuracy: {0:0.4f}".format(final))

Accuracy: 0.9667
Accuracy: 0.9667
Accuracy: 0.9667
Accuracy: 0.9333
Accuracy: 0.8333

Mean Accuracy: 0.9333


<h1>Random Forest Model with 5-fold Cross Validation</h1>

Initial experimentation on Random Forests

In [187]:
forestScores = []

# Training and Testing for each validation fold
for i in range(0, k):
    # Forest init
    forest = ensemble.RandomForestClassifier()
    
    # Forest training
    forest = forest.fit(trainSets[i], trainTargets[i])
    
    # Forest testing
    testPredict = forest.predict(testSets[i])
#     print(testPredict)
    
    # Forest accuracy
    accuracy = metrics.accuracy_score(testTargets[i], testPredict)
    
    # Recording accuracy score
    forestScores.append(accuracy)



In [188]:
# Results
for score in forestScores:
    print("Accuracy: {0:0.4f}".format(score))
# Summary of scores
final = np.mean(forestScores)
print("\nMean Accuracy: {0:0.4f}".format(final))

Accuracy: 0.9667
Accuracy: 0.9667
Accuracy: 0.9667
Accuracy: 0.9333
Accuracy: 0.9000

Mean Accuracy: 0.9467
