In [7]:
import pandas as pd
import os
import shutil
import numpy as np
import cv2  # For image loading
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import RandomOverSampler

# Load both CSVs
species_df = pd.read_csv("KGA_S1_report_lila.csv") 
image_df = pd.read_csv("KGA_S1_report_lila_image_inventory.csv") 

# Extract only needed columns
species_df = species_df[['capture_id', 'question__species']]
image_df = image_df[['capture_id', 'image_path_rel']]

# Merge both dataframes on 'capture_id'
merged_df = pd.merge(image_df, species_df, on="capture_id", how="inner")
# Add a column to indicate if the image is empty
merged_df['is_empty'] = merged_df['question__species'].apply(lambda x: 1 if x == 'blank' else 0)

# Print dataset overview
print(merged_df[:50])

# Save the prepared dataset
merged_df.to_csv("dataset.csv", index=False)


         capture_id                                image_path_rel  \
0    KGA_S1#A01#1#1  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0001.JPG   
1    KGA_S1#A01#1#1  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0002.JPG   
2    KGA_S1#A01#1#1  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0003.JPG   
3    KGA_S1#A01#1#2  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0004.JPG   
4    KGA_S1#A01#1#2  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0005.JPG   
5    KGA_S1#A01#1#2  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0006.JPG   
6    KGA_S1#A01#1#3  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0007.JPG   
7    KGA_S1#A01#1#3  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0008.JPG   
8    KGA_S1#A01#1#3  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0009.JPG   
9    KGA_S1#A01#1#4  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0010.JPG   
10   KGA_S1#A01#1#4  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0011.JPG   
11   KGA_S1#A01#1#4  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0012.JPG   
12   KGA_S1#A01#1#5  KGA_S1/A01/A01_R1/KGA_S1_A01_R1_IMAG0013.JPG   
13   KGA_S1#A01#1#5  KGA_S1/A01/A0

In [8]:
# Image dimensions
IMG_SIZE = (128, 128)  # Resize all images to 128x128
DATA_DIR = r"KGA_S1/A09/A09_R1"  # Base directory

# Initialize lists
X, y = [], []

# Process each image
for index, row in merged_df.iterrows():
    img_path = row["image_path_rel"]

    # Filter only images in the specific subdirectory
    if DATA_DIR in img_path and os.path.exists(img_path):
        print(f"Processing {img_path}")
        img = cv2.imread(img_path)  # Load image
        img = cv2.resize(img, IMG_SIZE)  # Resize
        img = img / 255.0  # Normalize pixels (0-1)
        
        X.append(img)
        y.append(row['is_empty'])

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

print(f"Loaded {len(X)} images with shape {X.shape}")


Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0001.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0002.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0003.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0004.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0005.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0006.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0007.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0008.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0009.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0011.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0012.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0013.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0014.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0015.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0016.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0017.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0018.JPG
Processing KGA_S1/A09/A09_R1/KGA_S1_A09_R1_IMAG0

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = keras.models.Sequential([
    layers.InputLayer(input_shape=(128, 128, 3)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.GlobalAveragePooling2D(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=20,
    batch_size=32,
    callbacks=[early_stop]
)

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Confusion matrix
from sklearn.metrics import classification_report, confusion_matrix
y_pred = (model.predict(X_test) > 0.5).astype("int32")

print(classification_report(y_test, y_pred))

model.save("empty_image_detector.h5")




Epoch 1/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 404ms/step - accuracy: 0.5533 - loss: 0.6820 - val_accuracy: 0.5952 - val_loss: 0.6752
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 190ms/step - accuracy: 0.6317 - loss: 0.6576 - val_accuracy: 0.5952 - val_loss: 0.6720
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 195ms/step - accuracy: 0.5698 - loss: 0.6915 - val_accuracy: 0.5952 - val_loss: 0.6733
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 179ms/step - accuracy: 0.6203 - loss: 0.6632 - val_accuracy: 0.5952 - val_loss: 0.6704
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 176ms/step - accuracy: 0.6149 - loss: 0.6592 - val_accuracy: 0.5952 - val_loss: 0.6696
Epoch 6/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 177ms/step - accuracy: 0.6149 - loss: 0.6579 - val_accuracy: 0.5952 - val_loss: 0.6701
Epoch 7/20
[1m6/6[0m [32m━━━━━━━━━━━━

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
import zilong
print(zilong.version())

ModuleNotFoundError: No module named 'zilongi'