In [1]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### 重命名 Testset， 把1.jpg改成001
tf.keras.utils.image_dataset_from_directory只能这样读取数据：

['test\\1.jpg', 'test\\10.jpg', 'test\\100.jpg', 'test\\101.jpg', 'test\\102.jpg', 'test\\103.jpg', 'test\\104.jpg', 'test\\105.jpg', 'test\\106.jpg', 'test\\107.jpg']

这样没法用TestVal.xlsx(我对照图片人工标注的)对照看正确率。TestVal.xlsx 是按照1，2，3...500的顺序标注的

In [2]:
folder = "train"   # 你的文件夹路径
files = [f for f in os.listdir(folder) if f.endswith(".jpg")]
files = sorted(files, key=lambda x: int(os.path.splitext(x)[0]))

for i, filename in enumerate(files, start=1):
    new_name = f"{i:03d}.jpg"  # 3位数，不足补0
    os.rename(os.path.join(folder, filename),
              os.path.join(folder, new_name))

print("重命名完成")

重命名完成


In [3]:
train = tf.keras.utils.image_dataset_from_directory(
    "train",
    image_size=(128, 128),
    batch_size=32,
    shuffle=True
)

val = tf.keras.utils.image_dataset_from_directory(
    "val",
    image_size=(128, 128),
    batch_size=32,
    shuffle=False
)

test = tf.keras.utils.image_dataset_from_directory(
    "test",
    image_size=(128, 128),
    batch_size=32,
    shuffle=False,
    label_mode=None
)

Found 20000 files belonging to 2 classes.
Found 5000 files belonging to 2 classes.
Found 500 files.


In [4]:
# Normalization layer (0–255 -> 0–1)
normalizer = layers.Rescaling(1./255)

# Image augmentation on train
data_augment = models.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
    layers.RandomTranslation(0.1, 0.1),
])

# Make pipelined datasets
train_ds = (train
            .map(lambda x,y: (normalizer(data_augment(x)), y), num_parallel_calls=tf.data.AUTOTUNE)
            .prefetch(tf.data.AUTOTUNE))

val_ds   = (val
            .map(lambda x,y: (normalizer(x), y), num_parallel_calls=tf.data.AUTOTUNE)
            .prefetch(tf.data.AUTOTUNE))

test_ds  = (test
            .map(lambda x: normalizer(x), num_parallel_calls=tf.data.AUTOTUNE)
            .prefetch(tf.data.AUTOTUNE))

print("Classes:", train.class_names)

Classes: ['cat', 'dog']


In [5]:
model = tf.keras.models.load_model("cnn2.keras")
model.summary()

In [6]:
probs = model.predict(test_ds)               # shape (N,1)
pred_int = (probs.ravel() >= 0.5).astype(int)  # → [0, 1, 0, 1, ...]

# Convert predictions into a DataFrame
prediction = pd.DataFrame({
    "prediction": pred_int
})

print(prediction.head())  

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 99ms/step
   prediction
0           0
1           0
2           0
3           1
4           1


In [7]:
TestVal = pd.read_excel("TestVal.xlsx")
TestVal = TestVal.astype(int)
combined = pd.concat([TestVal, prediction], axis=1)
combined.columns = ["id", "true_label", "prediction"]
print(combined.head(20))

    id  true_label  prediction
0    1           0           0
1    2           0           0
2    3           0           0
3    4           1           1
4    5           1           1
5    6           0           0
6    7           1           1
7    8           1           1
8    9           1           1
9   10           1           1
10  11           1           1
11  12           0           0
12  13           0           0
13  14           1           1
14  15           1           1
15  16           0           1
16  17           1           1
17  18           0           1
18  19           1           1
19  20           0           1


In [8]:
correct = (combined["true_label"] == combined["prediction"]).sum()
total = len(combined)
accuracy = correct / total

print(f"Correct: {correct}/{total}")
print(f"Accuracy: {accuracy*100:.2f}%")

Correct: 455/500
Accuracy: 91.00%
