In [1]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import pickle

In [8]:
TEST_CSV = "csv_files/test.csv"
TEST_IMG_DIR = "dataset/imgs/test"
MODEL_PATH = "model/cnn_batchwise_best_model.keras"

# Try these label pickles (automatically pick the correct one)
PICKLE_CANDIDATES = [
    "pickle_files/labels_list_vgg16_fine_tuned_non_batch.pkl",
    "pickle_files/labels_list_vgg16_deep_non_batch.pkl",
    "pickle_files/labels_list_cnn_batchwise.pkl",
    "pickle_files/labels_list_cnn_non_batch.pkl"
]

OUTPUT_CSV = "csv_files/final_test_predictions.csv"


In [9]:
model = load_model(MODEL_PATH)
model.summary()

In [10]:
class_names = None
for p in PICKLE_CANDIDATES:
    if os.path.exists(p):
        print("Trying to load:", p)
        with open(p, "rb") as f:
            labels = pickle.load(f)

        if isinstance(labels, dict):  # name -> idx mapping
            inv = {v:k for k,v in labels.items()}
            class_names = [inv[i] for i in sorted(inv.keys())]
        elif isinstance(labels, list):
            class_names = labels

        if class_names:
            print("Loaded class names:", class_names)
            break

Trying to load: pickle_files/labels_list_vgg16_fine_tuned_non_batch.pkl
Loaded class names: ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']


In [15]:
df = pd.read_csv(TEST_CSV)
print("Loaded test.csv with rows:", len(df))

df = df.head(5000)
print("Testing only first 5000 images.")

# Detect image column
possible_cols = ["image", "img", "Image", "image_path", "path", "filename"]
img_col = None
for c in possible_cols:
    if c in df.columns:
        img_col = c
        break
if img_col is None:
    img_col = df.columns[0]   # fallback

print("Using image column:", img_col)

Loaded test.csv with rows: 79726
Testing only first 5000 images.
Using image column: FileName


In [16]:
df["full_path"] = df[img_col].apply(lambda x: os.path.join(TEST_IMG_DIR, os.path.basename(str(x))))

missing = df[~df["full_path"].apply(os.path.exists)]
if len(missing) > 0:
    print("WARNING: Missing image paths:", len(missing))
    print(missing.head())
else:
    print("All test image paths found.")

All test image paths found.


In [17]:
input_shape = model.input_shape
H, W = input_shape[1], input_shape[2]
print("Model expects input size:", H, "x", W)

Model expects input size: 224 x 224


In [18]:
preds_list = []

for idx, row in df.iterrows():
    img_path = row["full_path"]

    # Load + preprocess
    img = load_img(img_path, target_size=(H, W))
    img = img_to_array(img)
    img = img / 255.0
    img = np.expand_dims(img, axis=0)

    # Predict
    pred = model.predict(img, verbose=0)
    pred_idx = np.argmax(pred)
    pred_label = class_names[pred_idx]

    preds_list.append([os.path.basename(img_path), pred_label, float(pred.max())])

In [19]:
pred_df = pd.DataFrame(preds_list, columns=["image", "predicted_class", "confidence"])
pred_df.to_csv(OUTPUT_CSV, index=False)

print("Saved predictions to:", OUTPUT_CSV)
print(pred_df.head())

Saved predictions to: csv_files/final_test_predictions.csv
            image predicted_class  confidence
0       img_1.jpg              c5    0.753562
1      img_10.jpg              c5    0.752525
2     img_100.jpg              c5    0.754902
3    img_1000.jpg              c5    0.749055
4  img_100000.jpg              c5    0.751700
