In [None]:
# Install Packages

# for path handling
import pathlib

# for loading images into numpy
import imageio

import numpy as np
import pandas as pd

# for greyscale conversion
from skimage.color import rgb2gray

# for plotting
import matplotlib.pyplot as plt

# Reading Data and Data Preparation

https://www.kaggle.com/code/vanausloos/how-to-read-images-in-python/notebook

In [None]:
# get paths to images
paths_to_images = pathlib.Path('./images').glob('**/*.jpeg')
paths_sorted = sorted([x for x in paths_to_images])

sample_image = imageio.imread(paths_sorted[24])

In [None]:
# initialize data set
train_X = pd.DataFrame([imageio.imread(path) for path in paths_sorted], columns=["image"])

# check if images are loaded propperly

def plot_image(image, suspension_type=""):
    plt.imshow(image, cmap="gray")
    if(suspension_type != ""):
        plt.title(suspension_type)
    plt.show()


plot_image(sample_image)
print("See image of colored bike? Nice! everything worked as expected.")


In [None]:
# Grey Scale images
print(f"Colored (original) images shape: {sample_image.shape}")
# shows height, width and depth (which is colors r,g,b)

sample_image_grey = rgb2gray(sample_image)
print(f"Grey scale images shape: {sample_image_grey.shape}")
# colors have been removed

plot_image(sample_image_grey)
print("See black and white image of the same bike? Lets go!")

# Apply grey scale conversion to all images
train_X["image"] = train_X["image"].apply(rgb2gray)


# Adding target variable
 We can determine wether a bike has full suspension or nor by looking at the file path

In [None]:
def extract_suspension_from_path(path: pathlib.PosixPath):
    if path.match('*/fullys/*'):
        return "fully"
    if path.match("*/hardtails/*"):
        return "hardtail"

train_X["suspension"] = pd.DataFrame([extract_suspension_from_path(path) for path in paths_sorted])

# count types of suspension
print(train_X["suspension"].value_counts())



In [None]:
# check if that worked

# print some images
plot_image(train_X["image"][0], train_X["suspension"][0])
plot_image(train_X["image"][10], train_X["suspension"][10])
plot_image(train_X["image"][100], train_X["suspension"][100])
plot_image(train_X["image"][120], train_X["suspension"][120])

# One Hot Encoding

we need numeric values for ml to work

In [None]:
# RUN THIS CELL ONLY ONCE

# apply one hot encoding to suspension
train_X = pd.concat([train_X, pd.get_dummies(train_X["suspension"], prefix="suspension_is")], axis=1)

In [None]:
# check if one hot encoding worked as expected
print(train_X["suspension_is_fully"].head())
print(train_X["suspension_is_fully"].tail())