# Iris Classification Refactored
This notebook is a rewritten, clean version of the original script.

In [None]:

"""
iris_classification_refactor.py

Purpose:
  - Load Iris features & labels from NumPy files
  - Explore basic dataset statistics
  - Standardize features
  - Build, train and evaluate a small neural classifier using TensorFlow/Keras
  - Plot training curves and simple 2D scatter visualizations

Notes:
  - Make sure 'Iris_data.npy' and 'Iris_labels.npy' are in the same directory
  - Tested with: Python 3.8+, tensorflow 2.x, scikit-learn, matplotlib, numpy
"""

import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from typing import Tuple

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)


def load_data(features_path: str = "Iris_data.npy",
              labels_path: str = "Iris_labels.npy") -> Tuple[np.ndarray, np.ndarray]:
    if not (os.path.exists(features_path) and os.path.exists(labels_path)):
        raise FileNotFoundError("Make sure Iris_data.npy and Iris_labels.npy exist in the working directory.")
    X = np.load(features_path)
    y = np.load(labels_path)
    return X, y


def quick_stats(X: np.ndarray, y: np.ndarray) -> None:
    n_samples, n_features = X.shape
    n_classes = len(np.unique(y))
    print(f"Samples: {n_samples}, Features: {n_features}, Classes: {n_classes}")
    print(f"Feature means: {np.round(X.mean(axis=0), 3)}")
    print(f"Feature std devs: {np.round(X.std(axis=0), 3)}")
    print(f"Labels distribution: {dict(zip(*np.unique(y, return_counts=True)))}")


def plot_raw_data(X: np.ndarray, y: np.ndarray) -> None:
    plt.figure(figsize=(6, 5))
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap="viridis", edgecolor="k", s=50)
    plt.xlabel("Feature 0")
    plt.ylabel("Feature 1")
    plt.title("Raw data (first two feature dims)")
    plt.grid(alpha=0.3)
    plt.show()


def prepare_data(X: np.ndarray, y: np.ndarray, test_size: float = 0.2):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=RANDOM_SEED, stratify=y
    )
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled, y_train, y_test, scaler


def build_model(input_dim: int, num_classes: int) -> tf.keras.Model:
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(input_dim,)),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(num_classes, activation="softmax")
    ], name="Iris_Classifier")

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model


def plot_training(history: tf.keras.callbacks.History) -> None:
    hist = history.history
    epochs = range(1, len(hist["loss"]) + 1)

    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(epochs, hist["loss"], label="train_loss")
    if "val_loss" in hist:
        plt.plot(epochs, hist["val_loss"], label="val_loss")
    plt.title("Loss")
    plt.xlabel("Epoch")
    plt.legend()
    plt.grid(alpha=0.3)

    plt.subplot(1, 2, 2)
    plt.plot(epochs, hist["accuracy"], label="train_acc")
    if "val_accuracy" in hist:
        plt.plot(epochs, hist["val_accuracy"], label="val_acc")
    plt.title("Accuracy")
    plt.xlabel("Epoch")
    plt.legend()
    plt.grid(alpha=0.3)

    plt.show()


def visualize_predictions(model: tf.keras.Model, X_test: np.ndarray, y_test: np.ndarray, scaler: StandardScaler = None) -> None:
    preds = np.argmax(model.predict(X_test), axis=1)

    plt.figure(figsize=(6, 5))
    plt.scatter(X_test[:, 0], X_test[:, 1], c=preds, cmap="plasma", s=60, marker="o", edgecolor="k", alpha=0.9)
    plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap="cool", s=12, marker="x")
    plt.title("Test data: large markers = predicted class, small x = true class")
    plt.xlabel("Feature 0 (scaled)")
    plt.ylabel("Feature 1 (scaled)")
    plt.grid(alpha=0.25)
    plt.show()


def main():
    X, y = load_data("Iris_data.npy", "Iris_labels.npy")
    quick_stats(X, y)
    plot_raw_data(X, y)
    X_tr, X_te, y_tr, y_te, scaler = prepare_data(X, y, test_size=0.2)
    print(f"Train / Test sizes: {X_tr.shape[0]} / {X_te.shape[0]}")
    model = build_model(input_dim=X_tr.shape[1], num_classes=len(np.unique(y)))
    model.summary()
    history = model.fit(
        X_tr, y_tr,
        validation_split=0.15,
        epochs=40,
        batch_size=16,
        verbose=1
    )
    plot_training(history)
    loss, acc = model.evaluate(X_te, y_te, verbose=0)
    print(f"Test loss: {loss:.4f}  |  Test accuracy: {acc*100:.2f}%")
    visualize_predictions(model, X_te, y_te, scaler=scaler)


if __name__ == "__main__":
    main()
