<a href="https://colab.research.google.com/github/MaximoDouglas/DL_BookProjects/blob/master/code/colab/mcc_iris_flowers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Get the data files

In [0]:
!wget "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"


# Code - Direct from the book (adapted)

Accuracy: 88.67% 

Standard deviation: 21.09%

In [0]:
# Multiclass Classification with the Iris Flowers Dataset
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# ------- Begin data preprocessing

# load dataset
dataframe = pandas.read_csv("iris.data", header=None)
dataset = dataframe.values
X = dataset[:,0:4].astype(float)
Y = dataset[:,4]

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

dummy_y = np_utils.to_categorical(encoded_Y)
# ------- End data preprocessing

# define baseline model
def baseline_model():
  # create model
  model = Sequential()
  model.add(Dense(4, input_dim=4, activation="relu", kernel_initializer="normal"))
  model.add(Dense(3, kernel_initializer="normal", activation="sigmoid"))
  
  # Compile model
  model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])
  return model

estimator = KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=5)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)

# Summarize
print("Accuracy: %.2f%%"%(results.mean()*100))
print("Standard deviation: %.2f%%"%(results.std()*100))

# Code - modified by Maximo, D.H.

Accuracy: 88.67%

Standard deviation: **19.10%**

Changes: 
*   Different data preprocessing
*   StratifiedKFold


In [0]:
# Multiclass Classification with the Iris Flowers Dataset
import numpy
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn import preprocessing

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# ------- Begin data preprocessing

# load dataset
df = pd.read_csv("iris.data", header=None)
X = df.drop(df.columns[-1],1)
y = df[df.columns[-1]]

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
encoded_y = encoder.transform(y)

# ------- End data preprocessing

# define baseline model
def create_model():
  # create model
  model = Sequential()
  model.add(Dense(4, input_dim=4, activation="relu", kernel_initializer="normal"))
  model.add(Dense(3, kernel_initializer="normal", activation="sigmoid"))
  
  # Compile model
  model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])
  return model

model = KerasClassifier(build_fn=create_model, epochs=200, batch_size=5)
skfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(model, X, encoded_y, cv=skfold)

# Summarize
print("Accuracy: %.2f%%"%(results.mean()*100))
print("Standard deviation: %.2f%%"%(results.std()*100))

# Code - modified by Maximo, D.H. 2

Accuracy: **97.33%**

Standard deviation: **5.33%**

Changes: 
*   preprocessing.scale()
*   StratifiedKFold


In [0]:
# Multiclass Classification with the Iris Flowers Dataset
import numpy
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn import preprocessing

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# ------- Begin data preprocessing

# load dataset
df = pd.read_csv("iris.data", header=None)
X = preprocessing.scale(numpy.array(df.drop(df.columns[-1],1)))
y = df[df.columns[-1]]

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
encoded_y = encoder.transform(y)

# ------- End data preprocessing

# define baseline model
def create_model():
  # create model
  model = Sequential()
  model.add(Dense(4, input_dim=4, activation="relu", kernel_initializer="normal"))
  model.add(Dense(3, kernel_initializer="normal", activation="sigmoid"))
  
  # Compile model
  model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])
  return model

model = KerasClassifier(build_fn=create_model, epochs=200, batch_size=5)
skfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(model, X, encoded_y, cv=skfold)

# Summarize
print("Accuracy: %.2f%%"%(results.mean()*100))
print("Standard deviation: %.2f%%"%(results.std()*100))