# Geo Fossils-I classification (TensorFlow, basic)
Simple image-only classifier:
- Load Geo Fossils-I class folders
- Resize/pad to 224, ImageNet normalize
- Train EfficientNetB0 head (no texture features)
- Save confusion matrix

In [2]:
import json
import os
import random
from pathlib import Path
from typing import Dict, List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from sklearn.model_selection import train_test_split

tf.get_logger().setLevel("ERROR")

class Config:
    def __init__(self, root: Path, img_size: int = 224, batch_size: int = 32, epochs: int = 12):
        self.root = Path(root)
        self.img_size = img_size
        self.batch_size = batch_size
        self.epochs = epochs
        self.figdir = Path("figures_tf_basic"); self.figdir.mkdir(parents=True, exist_ok=True)
        self.device = "cuda" if tf.config.list_physical_devices("GPU") else "cpu"
        print("Using device:", self.device)

cfg = Config(root=Path("geo fossil I"), batch_size=32, epochs=10)

Using device: cpu


In [3]:
# Utils and dataset splits
def set_seed(seed=13):
    random.seed(seed); np.random.seed(seed); tf.random.set_seed(seed)

def list_images_by_class(root: Path):
    manifest = []
    classes = sorted([d for d in root.iterdir() if d.is_dir()])
    class_to_idx = {c.name: i for i, c in enumerate(classes)}
    for c in classes:
        for p in sorted(c.glob("*.jpg")):
            manifest.append({"path": str(p), "label": class_to_idx[c.name], "classname": c.name})
    return manifest, classes

def stratified_split(manifest: List[Dict], train_ratio=0.7, val_ratio=0.15):
    df = pd.DataFrame(manifest)
    train_df, temp_df = train_test_split(df, test_size=1-train_ratio, stratify=df["label"], random_state=42)
    rel_val = val_ratio/(1-train_ratio)
    val_df, test_df = train_test_split(temp_df, test_size=1-rel_val, stratify=temp_df["label"], random_state=99)
    return train_df.to_dict("records"), val_df.to_dict("records"), test_df.to_dict("records")

set_seed()
manifest, classes = list_images_by_class(cfg.root)
train_rec, val_rec, test_rec = stratified_split(manifest)
with open("splits_tf_basic.json", "w") as f:
    json.dump({k:[r["path"] for r in v] for k,v in zip(["train","val","test"],[train_rec,val_rec,test_rec])}, f, indent=2)
print("Classes:", [c.name for c in classes])

Classes: ['Ammonites', 'Belemnites', 'Corals', 'Crinoids', 'Leaf fossils', 'Trilobites']


In [4]:
# tf.data pipeline (image-only)
def decode_and_resize(path, label):
    img_bytes = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img_bytes, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    # resize with pad to square
    img = tf.image.resize_with_pad(img, cfg.img_size, cfg.img_size)
    img = (img - tf.constant([0.485,0.456,0.406])) / tf.constant([0.229,0.224,0.225])
    return img, label

def make_ds(records, shuffle=True):
    paths = [r["path"] for r in records]; labels = [r["label"] for r in records]
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(paths), seed=42, reshuffle_each_iteration=True)
    ds = ds.map(decode_and_resize, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(cfg.batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

train_ds = make_ds(train_rec, shuffle=True)
val_ds = make_ds(val_rec, shuffle=False)
test_ds = make_ds(test_rec, shuffle=False)

In [5]:
# Model: EfficientNetB0
base = tf.keras.applications.EfficientNetB0(include_top=False, weights="imagenet", pooling="avg")
inp = tf.keras.Input(shape=(cfg.img_size, cfg.img_size, 3))
x = base(inp)
x = tf.keras.layers.Dense(256, activation="relu")(x)
x = tf.keras.layers.Dropout(0.3)(x)
out = tf.keras.layers.Dense(len(classes), activation="softmax")(x)
model = tf.keras.Model(inputs=inp, outputs=out)
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [6]:
# Train
es = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True, monitor="val_accuracy")
history = model.fit(train_ds, validation_data=val_ds, epochs=cfg.epochs, callbacks=[es])

Epoch 1/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 2s/step - accuracy: 0.6079 - loss: 1.2399 - val_accuracy: 0.1944 - val_loss: 1.8088
Epoch 2/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 2s/step - accuracy: 0.9535 - loss: 0.3970 - val_accuracy: 0.2278 - val_loss: 1.7827
Epoch 3/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 2s/step - accuracy: 0.9809 - loss: 0.1336 - val_accuracy: 0.2222 - val_loss: 1.7520
Epoch 4/10
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 2s/step - accuracy: 0.9964 - loss: 0.0588 - val_accuracy: 0.2889 - val_loss: 1.7037
Epoch 5/10
[1m18/27[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m19s[0m 2s/step - accuracy: 0.9916 - loss: 0.0510

KeyboardInterrupt: 

In [None]:
# Evaluate and confusion matrix
y_true, y_pred = [], []
for imgs, labels in test_ds:
    preds = model.predict(imgs, verbose=0)
    y_true.extend(labels.numpy().tolist())
    y_pred.extend(np.argmax(preds, axis=1).tolist())
cm = confusion_matrix(y_true, y_pred)
fig, ax = plt.subplots(figsize=(6,5))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[c.name for c in classes])
disp.plot(ax=ax, cmap="Blues", colorbar=False)
ax.set_title("Confusion (TF basic)")
fig.tight_layout(); fig.savefig(cfg.figdir/"cm_tf_basic.png", dpi=200); plt.close(fig)
print("Test acc:", (np.array(y_true)==np.array(y_pred)).mean())
print("Saved confusion to", cfg.figdir/"cm_tf_basic.png")

Figures: `figures_tf_basic/cm_tf_basic.png`