In [1]:
# Multiclass Classification with TensorFlow: A Step-by-Step Guide Using the Iris Dataset

The Dataset: Iris Flower Classification
The Iris dataset consists of the following columns:

Sepal Length (in cm)
Sepal Width (in cm)
Petal Length (in cm)
Petal Width (in cm)
Flower: The species of the flower, which is the target variable.
You can download the dataset here.

In [2]:
# Step 1: Load and Explore the Data
import pandas as pd

# Load the dataset
df = pd.read_csv('https://raw.githubusercontent.com/fenago/datasets/main/iris.csv')

# Display the first few rows of the dataset
print(df.head())

# Display the column names
print(df.columns)

   Sepal Length  Sepal Width  Petal Length  Petal Width       Flower
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa
Index(['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width', 'Flower'], dtype='object')


In [3]:
# Step 2: Preprocess the Data
from sklearn.preprocessing import LabelEncoder

# Handle missing values if necessary
df = df.dropna()

# Separate input features and target variable
X = df.drop(columns=['Flower'])  # 'Flower' is the target column
y = df['Flower'].values

# Encode the target variable y (Flower)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)  # Converts the three flower types into 0, 1, or 2

# Check the number of features
n_features = X.shape[1]

In [4]:
# Step 3: Split the Data into Train and Test Sets
from sklearn.model_selection import train_test_split

# Split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [5]:
# Step 4: Define the Model for Multiclass Classification
# For a multiclass classification problem, the output layer needs to have softmax activation to predict probabilities for each class. Additionally, the loss function will be categorical cross-entropy, which is standard for multiclass problems.
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

# Define the model
model = Sequential()
model.add(Dense(10, activation='relu', input_shape=(n_features,)))
model.add(Dense(8, activation='relu'))
model.add(Dense(3, activation='softmax'))  # 3 neurons for 3 classes (Iris-setosa, Iris-versicolor, Iris-virginica)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Explanation:
Input Layer: The input shape corresponds to the number of features in the dataset.
Hidden Layers: We have two hidden layers with 10 and 8 neurons respectively, both using ReLU activation.
Output Layer: The output layer has 3 neurons (one for each class) and uses softmax activation to output probabilities for each class.
Loss Function: Since this is a multiclass classification problem, we use sparse categorical cross-entropy as the loss function. This is appropriate when the target variable is encoded as integers (e.g., 0, 1, 2).

In [6]:
# Step 5: Train the Model
# Train the model
model.fit(X_train, y_train, epochs=150, batch_size=32, verbose=0)

<keras.src.callbacks.history.History at 0x7d7323ea2c20>

In [7]:
# Step 6: Evaluate the Model
# Evaluate the model
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print('Test Accuracy: %.3f' % acc)

Test Accuracy: 0.980


In [8]:
# Step 7: Make Predictions
# Example prediction
import numpy as np

sample = np.array([[5.0, 3.6, 1.4, 0.2]])  # Example input for a new flower
prediction = model.predict(sample)

# Convert the prediction probabilities into a class label
predicted_class = np.argmax(prediction, axis=1)
print('Predicted class:', predicted_class)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Predicted class: [0]
