<a href="https://colab.research.google.com/github/NHleza/Week--3/blob/main/Week3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# AI Tools Assignment: Mastering the AI Toolkit 🛠️🧠
# Members: [Hleza Nqobile]

# ===========================
# PART 1: Image Classification with TensorFlow (Fashion MNIST)
# ===========================

import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Load Fashion MNIST dataset from TensorFlow Datasets
(ds_train, ds_test), ds_info = tfds.load(
    'fashion_mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

# Normalize images: uint8 [0,255] -> float32 [0.0,1.0]
def normalize_img(image, label):
    return tf.cast(image, tf.float32) / 255., label

# Prepare training and test datasets
ds_train = ds_train.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_train = ds_train.cache().shuffle(ds_info.splits['train'].num_examples).batch(128).prefetch(tf.data.AUTOTUNE)

ds_test = ds_test.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_test = ds_test.batch(128).cache().prefetch(tf.data.AUTOTUNE)

# Build a simple CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28,28,1)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Dropout(0.25),  # Regularization
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model for 10 epochs
history = model.fit(ds_train, epochs=10, validation_data=ds_test)

# Plot training and validation accuracy and loss
plt.figure(figsize=(14,5))

plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.savefig("fashion_mnist_training_plot.png")  # Save for report
plt.show()

# Evaluate model on test dataset
test_images = []
test_labels = []
for images, labels in tfds.as_numpy(ds_test):
    test_images.append(images)
    test_labels.append(labels)
test_images = np.vstack(test_images)
test_labels = np.hstack(test_labels)

predictions = model.predict(test_images)
pred_labels = np.argmax(predictions, axis=1)

print("Classification Report:\n")
print(classification_report(test_labels, pred_labels, target_names=ds_info.features['label'].names))

# Confusion matrix heatmap
cm = confusion_matrix(test_labels, pred_labels)
plt.figure(figsize=(10,8))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=ds_info.features['label'].names, yticklabels=ds_info.features['label'].names, cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.savefig("fashion_mnist_confusion_matrix.png")  # Save for report
plt.show()

# ===========================
# PART 2: Named Entity Recognition (NER) with spaCy
# ===========================

import spacy
from spacy import displacy

# Load pre-trained English model
nlp = spacy.load("en_core_web_sm")

# Real-world example text for NER
text = (
    "Apple is looking at buying U.K. startup for $1 billion. "
    "Elon Musk founded SpaceX in 2002 and Tesla in 2003."
)

doc = nlp(text)

print("Named Entities in the text:")
for ent in doc.ents:
    print(f"{ent.text} - {ent.label_}")

# Render and save NER visualization as SVG
svg = displacy.render(doc, style="ent", jupyter=False)
with open("ner_visualization.svg", "w", encoding="utf-8") as f:
    f.write(svg)

# ===========================
# PART 3: Ethical Reflection (to include in your report)
# ===========================

"""
Ethical Reflection:

- Bias & Fairness: Fashion MNIST is a balanced dataset, but real-world datasets may have biases that must be identified and mitigated.
- Privacy: No personal or sensitive data is used here. For real applications, ensure data privacy and consent.
- Transparency: Model architecture, hyperparameters, and training details are documented for reproducibility.
- Optimization: Regularization (dropout, L2) was applied to prevent overfitting. Further hyperparameter tuning can improve performance.
- Responsible Use: AI outputs, especially in sensitive domains, should be validated by domain experts before deployment.
"""

# ===========================
# END OF NOTEBOOK
# ===========================


