In this part of our course, we will apply the model you trained before to new data. We'll start by reusing some of the code you've seen before, slightly adapted to a new folder structure. We'll save the resulting vectors via pickle.

In [1]:
def get_image_embeddings(image_list,
                         model=None,
                         exts=('jpg', 'jpeg', 'png', 'bmp', 'tiff')):
    """
    Given a list of folder paths, returns a DataFrame of image embeddings with class labels.

    Parameters
    ----------
    image_list : list of str
        List of paths to images.
    model : keras.Model, optional
        Preloaded backbone. If None, a ResNet50(include_top=False, pooling='avg') is created.
    exts : tuple of str
        File extensions to include (without dot).

    Returns
    -------
    pd.DataFrame
        Columns:
          - 'filepath': full path to image
          - 'class': Empty
          - 'embedding': numpy.ndarray of shape (2048,)
    """
    # 1) Load model if not provided
    if model is None:
        model = ResNet50(weights='imagenet',
                         include_top=False,
                         pooling='avg')

    records = []
    # 2) For each image, create an embedding
    for i, image_path in enumerate(image_list):
        print(i)
        try:
            # load & preprocess
            img = image.load_img(image_path)  # original size
            x = image.img_to_array(img)
            x = np.expand_dims(x, axis=0)
            x = preprocess_input(x)
            # get embedding
            emb = model.predict(x, verbose=0).reshape(-1)  # (2048,)
            # record
            records.append({
                'filepath': image_path,
                'class': None,
                'embedding': emb
            })
        except Exception as e:
            print(f"Warning: could not process {image_path}: {e}")

    # 3) Pack into DataFrame
    df = pd.DataFrame(records, columns=['filepath', 'class', 'embedding'])
    return df


super_folder = "../IFCBpics/"
all_images = [os.path.join(root, f) for root, dirs, files in os.walk(super_folder) for f in files if ".png" in f]
df = get_image_embeddings(all_images)
joblib.dump(df, "../embedded_photos.pkl")


super_folder = "../all_IFCBnets/"
all_images = [os.path.join(root, f) for root, dirs, files in os.walk(super_folder) for f in files if ".png" in f]
df = get_image_embeddings(all_images)
joblib.dump(df, "../embedded_photos_all.pkl")

NameError: name 'os' is not defined

## Task
Apply the Random Forest model you trained before to these new images (or rather: image vectors). I'll provide you with some code to read in the RF model and the vectors. Please save the predictions in the object DataFrame df, in the column "class".

In [3]:
clf = joblib.load("../clfmodel.pkl")
df = joblib.load("../embedded_photos.pkl")
###or
df = joblib.load("../embedded_photos_all.pkl")


[1;31merror[0m: [1mexternally-managed-environment[0m

[31m×[0m This environment is externally managed
[31m╰─>[0m To install Python packages system-wide, try apt install
[31m   [0m python3-xyz, where xyz is the package you are trying to
[31m   [0m install.
[31m   [0m 
[31m   [0m If you wish to install a non-Debian-packaged Python package,
[31m   [0m create a virtual environment using python3 -m venv path/to/venv.
[31m   [0m Then use path/to/venv/bin/python and path/to/venv/bin/pip. Make
[31m   [0m sure you have python3-full installed.
[31m   [0m 
[31m   [0m If you wish to install a non-Debian packaged Python application,
[31m   [0m it may be easiest to use pipx install xyz, which will manage a
[31m   [0m virtual environment for you. Make sure you have pipx installed.
[31m   [0m 
[31m   [0m See /usr/share/doc/python3.12/README.venv for more information.

[1;35mnote[0m: If you believe this is a mistake, please contact your Python insta

In [None]:
df["class"] = clf.predict(np.vstack(df["embedding"].values))
joblib.dump(df, "../embedded_photos_labelled.pkl")
###or
joblib.dump(df, "../embedded_photos_all_labelled.pkl")