In [None]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet101, NASNetMobile, InceptionV3
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

tf.config.optimizer.set_jit(False)

gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    try:
        tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError:
        pass

tf.keras.mixed_precision.set_global_policy('mixed_float16')

AUTOTUNE = tf.data.AUTOTUNE

DATA_DIR   = r'C:/Users/maxim/Documents/DLProjectDress/datasets'

BATCH_SIZE = 32
EPOCHS     = 5
LR         = 1e-4
INPUT_SIZE = (224, 224)

# ─── compute global mean & std of the 'year' label ───
all_dfs = [pd.read_csv(os.path.join(DATA_DIR, f'fold{i}.csv'))
           for i in range(10)]
df_all  = pd.concat(all_dfs, ignore_index=True)
YEAR_MEAN = df_all['year'].mean()
YEAR_STD  = df_all['year'].std()
print(f'Label mean year = {YEAR_MEAN:.2f}, std = {YEAR_STD:.2f}')


In [68]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True
)
val_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
def preprocess(path, label, training):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, INPUT_SIZE)
    if training:
        img = tf.image.random_flip_left_right(img)
    img = img / 255.0

    # normalize the year label to zero‐mean, unit‐std
    label = (label - YEAR_MEAN) / YEAR_STD
    return img, tf.cast(label, tf.float32)

def make_dataset(df, training):
    # strip off leading 'datasets/' if present
    df = df.copy()
    df['file'] = df['file'].str.replace(r'^datasets[\\/]', '', regex=True)

    files  = [os.path.join(DATA_DIR, p) for p in df['file']]
    labels = df['year'].values

    ds = tf.data.Dataset.from_tensor_slices((files, labels))
    if training:
        ds = ds.shuffle(buffer_size=len(files))
    ds = ds.map(lambda x,y: preprocess(x,y,training),
                num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
    return ds


In [70]:
def build_model(name, input_shape=(*INPUT_SIZE, 3), lr=LR):
    if name == 'resnet101':
        base = ResNet101(weights='imagenet', include_top=False, input_shape=input_shape)
    elif name == 'nasnetmobile':
        base = NASNetMobile(weights='imagenet', include_top=False, input_shape=input_shape)
    elif name == 'inceptionv3':
        base = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)
    else:
        raise ValueError(f"Unknown model: {name}")

    x = GlobalAveragePooling2D()(base.output)
    out = Dense(1)(x)   # regression head

    model = Model(base.input, out)
    model.compile(optimizer=Adam(lr), loss='mse', metrics=['mae'])
    return model

In [71]:
models = ['resnet101','nasnetmobile','inceptionv3']
results = {m: [] for m in models}

for model_name in models:
    print(f'\n=== {model_name} ===')
    for fold in range(10):
        # ── 1) Load the CSVs from DATA_DIR ──
        val_df   = pd.read_csv(os.path.join(DATA_DIR, f'fold{fold}.csv'))
        train_df = pd.concat([
            pd.read_csv(os.path.join(DATA_DIR, f'fold{i}.csv'))
            for i in range(10) if i != fold
        ], ignore_index=True)

        # ── 2) Strip leading 'datasets/' so paths are RELATIVE to DATA_DIR ──
        for df in (train_df, val_df):
            df['file'] = df['file'].str.replace(
                r'^datasets[\\/]', '', regex=True
            )

        # ── 3) Build tf.data pipelines ──
        train_ds = make_dataset(train_df, training=True)
        val_ds   = make_dataset(val_df,   training=False)

        # ── 4) Train & checkpoint ──
        model = build_model(model_name)
        ckpt = tf.keras.callbacks.ModelCheckpoint(
            f'{model_name}_fold{fold}.h5',
            monitor='val_loss', save_best_only=True
        )
        hist = model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=EPOCHS,
            callbacks=[ckpt],
            verbose=2
        )

        best = min(hist.history['val_loss'])
        results[model_name].append(best)
        print(f' Fold {fold} → best val_mse: {best:.4f}')

print('\nCV MSE per model:')
for m, vals in results.items():
    print(f' {m}:', [f'{v:.4f}' for v in vals])

# (Optional) save the summary
pd.DataFrame(results).to_csv('cv_results.csv', index=False)



=== resnet101 ===
Epoch 1/5
354/354 - 66s - loss: inf - mae: 1765.9618 - val_loss: inf - val_mae: 1714.6575
Epoch 2/5
354/354 - 52s - loss: inf - mae: 1681.9390 - val_loss: inf - val_mae: 1452.7983
Epoch 3/5
354/354 - 52s - loss: inf - mae: 1594.8499 - val_loss: inf - val_mae: 1550.4274
Epoch 4/5
354/354 - 51s - loss: inf - mae: 1500.4697 - val_loss: inf - val_mae: 1427.8801
Epoch 5/5
354/354 - 52s - loss: inf - mae: 1399.6899 - val_loss: inf - val_mae: 1343.1837
 Fold 0 → best val_mse: inf
Epoch 1/5


KeyboardInterrupt: 