# IRIS Flower Data set



###### The Iris flower data set is a multivariate data set introduced by the British Statistician and Biologist Ronald Fisher in his 19936 paper.

The dataset contains a set of 150 records under 5 attributes:

* Petal Length  (numeric or continuous)
* Petal Width  (numeric or continuous)
* Sepal Length  (numeric or continuous)
* Sepal Width  (numeric or continuous)
* Class (The label) ( catagorical)

For Further Reading refer https://en.m.wikipedia.org/wiki/Iris_flower_data_set

#### Importing Necessary Modules

In [1]:
import tensorflow as tf
import pandas as pd

#### Reading data

In [2]:
iris = pd.read_csv("iris.csv")

In [3]:
# Displaying the shape of the data 
iris.shape

(150, 5)

In [4]:
# Displaying the first 5 Rows of the dataset
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [5]:
# Displaying the discreption of the dataset
iris.describe()#.transpose()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [6]:
# List of columns
iris.columns

Index([u'sepal_length', u'sepal_width', u'petal_length', u'petal_width',
       u'species'],
      dtype='object')

#### Dividing the dataset into feature and labels

In [7]:
# Assigning all columns except for the "species"
# column as a feature columns
features = iris.drop("species", axis=1)

In [8]:
# Displaying the First 5 Rows of the feature datas
features.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [9]:
# Assigning the "species" column as a label column
labels = iris["species"]

In [10]:
# Displaying the first 5 rows of the labes
labels.head()

0    setosa
1    setosa
2    setosa
3    setosa
4    setosa
Name: species, dtype: object

##### Tensorflow won't be able to understand string data type as labels so we need to convert the labels into integers using pandas' apply(custom_function) method

In [11]:
# listing the available set of labels
set(labels)

{'setosa', 'versicolor', 'virginica'}

In [12]:
# our custom function to convert string into intiger
def strToInt(label):
    """
    This Function revieves a string and returns the index of it
    from a list (labels) as an integer
    """
    labels = ['setosa', 'versicolor', 'virginica']
    return labels.index(label)
    

In [13]:
# Applying the custom function to the labels so that the labels are integers
labels = labels.apply(strToInt)

In [14]:
labels.head()

0    0
1    0
2    0
3    0
4    0
Name: species, dtype: int64

# Performing train-test split

In [15]:
from sklearn.model_selection import train_test_split

In [16]:
# Randomly spliting the features and labels into training (70%) and testing (30%) datasets with a constant seed
x_train, x_test, y_train, y_test = train_test_split(features,labels,test_size=0.3, random_state=101)

In [17]:
x_train.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
13,4.3,3.0,1.1,0.1
102,7.1,3.0,5.9,2.1
67,5.8,2.7,4.1,1.0
34,4.9,3.1,1.5,0.2
98,5.1,2.5,3.0,1.1


In [18]:
x_test.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
33,5.5,4.2,1.4,0.2
16,5.4,3.9,1.3,0.4
43,5.0,3.5,1.6,0.6
129,7.2,3.0,5.8,1.6
50,7.0,3.2,4.7,1.4


In [19]:
y_train.head()

13     0
102    2
67     1
34     0
98     1
Name: species, dtype: int64

In [20]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((105, 4), (105,), (45, 4), (45,))

#### Creating Feature columns for the classifier

In [21]:
features.columns

Index([u'sepal_length', u'sepal_width', u'petal_length', u'petal_width'], dtype='object')

In [22]:
# Preparing the feature columns for the Estimator
# We use .numeric_column since all the features are numbers (Floating)
sepLen = tf.feature_column.numeric_column("sepal_length")
sepWid = tf.feature_column.numeric_column("sepal_width")
petLen = tf.feature_column.numeric_column("petal_length")
petWid = tf.feature_column.numeric_column("petal_width")

In [23]:
# Merging all the feature columns into a single list
featureColumns = [sepLen, sepWid, petLen, petWid]

In [24]:
# Creating Input Function for the tf.estimator model
inputFunction = tf.estimator.inputs.pandas_input_fn(x=features, y=labels, batch_size=10, num_epochs=None, shuffle=True)

###  Creating a Linear Classifier Model

In [25]:
classifier = tf.estimator.LinearClassifier(feature_columns=featureColumns, n_classes=3)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_tf_random_seed': 1, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_save_checkpoints_steps': None, '_model_dir': '/tmp/tmpSC8KyH', '_save_summary_steps': 100}


In [26]:
# Training the model for 1000 steps on the dataset
classifier.train(input_fn=inputFunction, steps=1000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpSC8KyH/model.ckpt.
INFO:tensorflow:loss = 10.9861, step = 1
INFO:tensorflow:global_step/sec: 323.82
INFO:tensorflow:loss = 4.01058, step = 101 (0.317 sec)
INFO:tensorflow:global_step/sec: 330.79
INFO:tensorflow:loss = 3.85604, step = 201 (0.305 sec)
INFO:tensorflow:global_step/sec: 365.605
INFO:tensorflow:loss = 2.07055, step = 301 (0.271 sec)
INFO:tensorflow:global_step/sec: 375.029
INFO:tensorflow:loss = 2.17138, step = 401 (0.264 sec)
INFO:tensorflow:global_step/sec: 343.116
INFO:tensorflow:loss = 1.36344, step = 501 (0.292 sec)
INFO:tensorflow:global_step/sec: 251.017
INFO:tensorflow:loss = 1.42001, step = 601 (0.396 sec)
INFO:tensorflow:global_step/sec: 260.795
INFO:tensorflow:loss = 2.74418, step = 701 (0.386 sec)
INFO:tensorflow:global_step/sec: 302.252
INFO:tensorflow:loss = 2.15902, step = 801 (0.339 sec)
INFO:tensorflow:global_step/sec: 301.226
INFO:tensorflow:loss = 2.21824, ste

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x7fcead62f550>

### Evaluating the classifier (Testing)

In [27]:
predictionInputFunction = tf.estimator.inputs.pandas_input_fn(x=x_test, batch_size=len(x_test), shuffle=False)

In [28]:
predicted = classifier.predict(input_fn=predictionInputFunction)

In [29]:
predictions = list(predicted)

INFO:tensorflow:Restoring parameters from /tmp/tmpSC8KyH/model.ckpt-1000


In [30]:
predictions

[{'class_ids': array([0]),
  'classes': array(['0'], dtype=object),
  'logits': array([ 9.51269531,  3.22308803, -6.25453234], dtype=float32),
  'probabilities': array([  9.98147845e-01,   1.85205217e-03,   1.41766989e-07], dtype=float32)},
 {'class_ids': array([0]),
  'classes': array(['0'], dtype=object),
  'logits': array([ 8.49812126,  2.89982986, -5.59972191], dtype=float32),
  'probabilities': array([  9.96308744e-01,   3.69051402e-03,   7.51239554e-07], dtype=float32)},
 {'class_ids': array([0]),
  'classes': array(['0'], dtype=object),
  'logits': array([ 6.11604548,  2.42542219, -4.29748678], dtype=float32),
  'probabilities': array([  9.75622594e-01,   2.43480690e-02,   2.92915429e-05], dtype=float32)},
 {'class_ids': array([2]),
  'classes': array(['2'], dtype=object),
  'logits': array([-6.59617662,  1.39884043,  1.44357121], dtype=float32),
  'probabilities': array([  1.64772486e-04,   4.88738626e-01,   5.11096597e-01], dtype=float32)},
 {'class_ids': array([1]),
  'classe

In [31]:
testPrediction = [pred["class_ids"][0] for pred in predictions]

#####  Getting a full report on the model's performance on the test data

In [32]:
from sklearn.metrics import classification_report

In [33]:
print(classification_report(y_test, testPrediction))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00        13
          1       1.00      0.95      0.97        20
          2       0.92      1.00      0.96        12

avg / total       0.98      0.98      0.98        45



# Done!