# Classification using ANN
### Author: Prof. Sandro Camargo <github.com/sandrocamargo>
### Data Mining Course <https://moodle.unipampa.edu.br/moodle/course/view.php?id=5213>
#### This script uses the basic concepts of ANN.
##### In this script, we used the iris dataset https://archive.ics.uci.edu/dataset/53/iris

To open this notebook in your Google Colab environment, [click here](http://colab.research.google.com/github/Sandrocamargo/data-mining/blob/main/Python/md05_Classification_ANN.ipynb).

In [None]:
# Download and unzip the dataset
!wget -c https://archive.ics.uci.edu/static/public/53/iris.zip
!unzip -u iris.zip

In [None]:
# import and inspect the dataset
import pandas as pd

data = pd.read_csv('iris.data', header=None)
data.columns = ['Sepal Length','Sepal Width','Petal Length','Petal Width','Species']
data.head() # Show first 5 samples

In [None]:
# Getting to know your data",
import seaborn as sns
import matplotlib.pyplot as plt

sns.pairplot(data, hue='Species', markers=["o", "s", "D"])
plt.savefig("iris-pairplot.pdf")

In [None]:
# split dataset into train and test sets
from sklearn import model_selection
from sklearn.preprocessing import LabelBinarizer

# Store the inputs in the matrix X and the outputs in the array y
X = data.iloc[:,0:4]
print(X.describe())

y = data.iloc[:,4]
print("\n",y.value_counts(),"\n")

target_names = list(set(y))

lb = LabelBinarizer()
y_bin = lb.fit_transform(y)

train_x, test_x, train_y, test_y = model_selection.train_test_split(X, y_bin, train_size=0.8, stratify=y, random_state=10)

# Verifying dataset dimensions
print('The training dataset (inputs) dimensions are: ', train_x.shape)
print('The training dataset (outputs) dimensions are: ', train_y.shape)
print('The testing dataset (inputs) dimensions are: ', test_x.shape)
print('The testing dataset (outputs) dimensions are: ', test_y.shape)

In [None]:
# Classification report on training set
from sklearn.neural_network import MLPClassifier
import numpy as np

clf = MLPClassifier(hidden_layer_sizes=(10,), max_iter=5000, random_state=1, verbose=True)
clf.fit(train_x, train_y)

# Predict back on train set to check
predicted_train_y = clf.predict(train_x)
predicted_labels = np.argmax(predicted_train_y, axis=1)  # convert from one-hot to class index

# Recover class labels from indices
class_labels = lb.classes_[predicted_labels]

# Copy train_x for safe plotting
tmpdata = train_x.copy()
tmpdata['Species'] = class_labels

ax = sns.scatterplot(data=tmpdata, x='Petal Width', y='Petal Length', hue='Species', style='Species')

plt.title("Iris - Training Data")
plt.savefig("iris-train-ann.pdf")

import matplotlib.pyplot as plt

# Plot training loss
plt.figure(figsize=(8, 5))
plt.plot(clf.loss_curve_, label='Training Loss')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("MLPClassifier Training Loss Curve")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("iris-loss-curve.pdf")
plt.show()

In [None]:
# Classification report on training set
from sklearn.metrics import classification_report, confusion_matrix

predicted = clf.predict(train_x.iloc[:,0:4])

print("Confusion Matrix:\n",confusion_matrix(np.argmax(train_y, axis=1), np.argmax(predicted, axis=1)))

print(classification_report(np.argmax(train_y, axis=1), np.argmax(predicted, axis=1), target_names=target_names))

In [None]:
# Predict back on train set to check
predicted_test_y = clf.predict(test_x)
predicted_labels = np.argmax(predicted_test_y, axis=1)  # convert from one-hot to class index

# Recover class labels from indices
class_labels = lb.classes_[predicted_labels]

# Showing the performance on testing set
tmpdata = test_x.copy()
tmpdata['Species'] = class_labels

# Plotting the training set
ax = sns.scatterplot(data=tmpdata, x='Petal Width', y='Petal Length', hue='Species', style='Species')

plt.title("Iris - Test Data")
plt.savefig("iris-test-knn.pdf")

In [None]:
# Showing the performance on testing set
from sklearn.metrics import classification_report

predicted = clf.predict(test_x.iloc[:,0:4])

print("Confusion Matrix:\n",confusion_matrix(np.argmax(test_y, axis=1), np.argmax(predicted, axis=1)))

print(classification_report(np.argmax(test_y, axis=1), np.argmax(predicted, axis=1), target_names=target_names))