# ATK Classifier Training
Upload ZIP dataset, train model, download untuk Streamlit

## 1. Upload Dataset ZIP
ZIP folder dataset_alat_tulis lalu upload

In [None]:
from google.colab import files
import zipfile, os

print('Upload dataset_alat_tulis.zip...')
uploaded = files.upload()

for f in uploaded.keys():
    with zipfile.ZipFile(f, 'r') as z:
        z.extractall('.')
    print(f'Extracted {f}')

DATASET_DIR = 'dataset_alat_tulis'
if os.path.exists(DATASET_DIR):
    for c in os.listdir(DATASET_DIR):
        print(f'{c}: {len(os.listdir(os.path.join(DATASET_DIR, c)))} images')

## 2. Setup

In [None]:
import os, json, cv2, imghdr
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from datetime import datetime

IMG_SIZE = 300
BATCH_SIZE = 15
EPOCHS = 15
LR = 0.001
DATASET_DIR = 'dataset_alat_tulis'

print(f'TF: {tf.__version__}')
print(f'GPU: {tf.config.list_physical_devices("GPU")}')

## 3. Clean Images

In [None]:
exts = ['jpeg', 'jpg', 'png']
removed = valid = 0
for cls in os.listdir(DATASET_DIR):
    p = os.path.join(DATASET_DIR, cls)
    if os.path.isdir(p):
        for img in os.listdir(p):
            ip = os.path.join(p, img)
            try:
                if cv2.imread(ip) is None or imghdr.what(ip) not in exts:
                    os.remove(ip); removed += 1
                else: valid += 1
            except: os.remove(ip); removed += 1
print(f'Valid: {valid}, Removed: {removed}')

## 4. Load Data

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
    DATASET_DIR, validation_split=0.1, subset='training',
    seed=123, image_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE)
val_ds = tf.keras.utils.image_dataset_from_directory(
    DATASET_DIR, validation_split=0.1, subset='validation',
    seed=123, image_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE)

class_names = train_ds.class_names
print(f'Classes: {class_names}')

AUTO = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(AUTO)
val_ds = val_ds.cache().prefetch(AUTO)

## 5. Build Model

In [None]:
model = models.Sequential([
    layers.Rescaling(1./255, input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(class_names), activation='softmax')
])
model.compile(optimizer=optimizers.Adam(LR),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])
model.summary()

## 6. Train

In [None]:
MODEL_PATH = 'best_model.keras'
callbacks = [
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    ModelCheckpoint(MODEL_PATH, monitor='val_accuracy', save_best_only=True)
]
history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS, callbacks=callbacks)
print('Done!')

## 7. Results

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(acc, label='Train')
plt.plot(val_acc, label='Val')
plt.title('Accuracy'); plt.legend()
plt.subplot(1,2,2)
plt.plot(loss, label='Train')
plt.plot(val_loss, label='Val')
plt.title('Loss'); plt.legend()
plt.show()

print(f'Accuracy: {acc[-1]:.2%}')
print(f'Val Accuracy: {val_acc[-1]:.2%}')

## 8. Confusion Matrix

In [None]:
y_true, y_pred = [], []
for imgs, lbls in val_ds:
    preds = model.predict(imgs, verbose=0)
    y_true.extend(lbls.numpy())
    y_pred.extend(np.argmax(preds, axis=1))

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Pred'); plt.ylabel('True')
plt.show()
print(classification_report(y_true, y_pred, target_names=class_names))

## 9. Save & Download

In [None]:
META_PATH = 'best_model.json'
meta = {
    'class_names': class_names,
    'input_size': [IMG_SIZE, IMG_SIZE],
    'metrics': {'accuracy': float(acc[-1]), 'val_accuracy': float(val_acc[-1])},
    'timestamp': datetime.now().isoformat()
}
with open(META_PATH, 'w') as f:
    json.dump(meta, f, indent=2)
print(f'Saved: {MODEL_PATH}, {META_PATH}')

In [None]:
from google.colab import files
files.download(MODEL_PATH)
files.download(META_PATH)
print('\nPindahkan ke folder models/ lalu jalankan:')
print('streamlit run app/main.py')