In [None]:
#Import needed packages

import datetime
import pathlib
import yaml

#from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder 
from sklearn import svm 
from sklearn.metrics import accuracy_score
import tensorflow as tf

import pickle
from PIL import Image

# Load constantes

In [None]:
with open("config.yaml",'r') as config_file:
    config = yaml.safe_load(config_file)
    IMAGE_WIDTH = config["image_width"]
    IMAGE_HEIGHT = config["image_height"]
    IMAGE_DEPTH = config["image_depth"]
    DATA_DIR= pathlib.Path(config["data_dir"])
    MODELS_DIR = pathlib.Path(config["models_dir"])
    TARGET_NAME= config["target_name"]
    DATA_TRAIN_FILE= config["data_train_file"]
    DATA_TEST_FILE= config["data_test_file"]

# Functions

In [None]:
def load_resize_image(path,height,width):
    """Load an image and resize it to the target size

    Parameters:
        path (Path): path to the file to load and resize
        height (int): the height of the final resized image
        width(int): the width of the resized image 
    Return
    ------
    numpy.array containing resized image
    """
    return np.array(Image.open(path).resize((width,height)))

In [None]:
def build_x_and_y(df: pd.DataFrame, target: str, images: str,encoder):
    """build x tensor and y tensor for model fitting.
    parameters
    ----------
    df(pd.DataFrame): dataframe 
    target(str): name of target column
    images (str): name of resized images column
    encoder: (sklearn.preprocessing.OrdinalEncoder)
        
    Returns
    -------
    x (numpy.array): numpy.array of x values
    y (numpy.array): numpy.array of y values
    """
    
    x= np.array(df.apply(lambda row: np.ndarray.flatten(row[images]),axis=1).to_list())
    y= encoder.transform(df[target].to_numpy().reshape(-1,1)).flatten() 
    return x,y

In [None]:
def build_image_database(path,target):
    """    Build a pandas dataframe with target class and access path to images.

    Parameters:
        - path (Path): Path pattern to read csv file containing images information
        - target(str): The second column to extract from the file

    Return:
        A pandas dataframe,
    -------
    """
    #Load file
    _df= pd.read_csv(path,
            names=["all"],
        )
    #Recover data
    _df["image_id"]=_df["all"].apply(lambda x: x.split(' ')[0])
    _df[target]=_df["all"].apply(lambda x: ' '.join(x.split(' ')[1:]))
    _df[target].unique()

    #Create path
    _df["path"]= _df['image_id'].apply( lambda x: DATA_DIR/"images"/(x+'.jpg')) 
    
    return _df.drop(columns=["all"])

In [None]:
def show_image(df,row,target):
    """show the image in the ligne row and the associated target column

    Args:
        df (pandas.dataFrame): the dataframe of images
        row (int): the index of the row
        target (string): the column name of the associated label
    Return
    ------
    None
    """

    assert target in df.columns, f"Column {target} not found in dataframe"
    assert 'path' in df.columns, f"Column path doens't not exit in dataframe"
    _img = plt.imread(df.loc[row,'path'])
    plt.imshow(_img)
    return

In [None]:
def classify_images(images,model,classes_names=None):
    """Classify images through a tensorflow model.
    
    Parameters:
    -----------
    
    images(np.array): set of images to classify
    model (tensorflow.keras.Model): tensorflow/keras model
    
    Returns
    -------
    predicted classes 
    
    """
    
    classes = model.predict(images).astype(int)
    if classes_names is not None:
        classes = classes_names[list(classes)]
    return classes

In [None]:
def save_model(model ,saving_dir=MODELS_DIR,basename=TARGET_NAME,append_time=False):
    """Save tf/Keras model in saving_dir folder

    Parameters
    ----------
    model (tf/Keras model): model to be saved
    saving_dir (path): location to save model file
    basename (str): the basename of the model
    append_time (bool): indicate if the time will be append to the basename
    """
    model_name = f"{basename}{'_' + datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if append_time   else ''}"
    #use the same extension to keep harmony
    with open(f"{saving_dir}/svms/{model_name}.h5","wb") as file:
        pickle.dump(model,file)
    return model_name

# Read train & test file

In [None]:
train_df = build_image_database(DATA_DIR/DATA_TRAIN_FILE,TARGET_NAME)
test_df = build_image_database(DATA_DIR/DATA_TEST_FILE,TARGET_NAME)

In [None]:
# Previous the dataframe 
train_df.head()

In [None]:
test_df.head()

# View some images

In [None]:
show_image(train_df, np.random.randint(0,train_df.shape[0]), TARGET_NAME)

In [None]:
show_image(test_df,np.random.randint(0,test_df.shape[0]),TARGET_NAME)

# Resize Images

In [None]:
#Resize train images
train_df['resized_image'] = train_df.apply(
        lambda r: load_resize_image(r['path'],IMAGE_HEIGHT,IMAGE_WIDTH),
        axis=1)
#Resize test images
test_df['resized_image'] = test_df.apply(
    lambda r: load_resize_image(r['path'],IMAGE_HEIGHT,IMAGE_WIDTH),
    axis=1)

# Split dataset into x and y

In [None]:
#Create an Ordinal encoder to encode target
encoder = OrdinalEncoder(handle_unknown="use_encoded_value",unknown_value=-99)
encoder.fit(train_df[TARGET_NAME].to_numpy().reshape(-1,1))

In [None]:
X_train,y_train = build_x_and_y(train_df,TARGET_NAME,'resized_image',encoder)
X_test,y_test = build_x_and_y(test_df,TARGET_NAME,'resized_image',encoder)

# Build & train the model

In [None]:
model  = svm.SVC(kernel="poly",probability=True)

In [None]:
%%time 
model.fit(X_train,y_train)

# Evaluation of the model

In [None]:
classes_names = np.array(encoder.categories_[0])

In [None]:
#try some predictions
classify_images(X_test[10:20],model,classes_names)

In [None]:
#Compute the mean accuracy  
svm_accuracy = model.score(X_test,y_test)
svm_accuracy

# Save the model

In [None]:
model_name = save_model(model,MODELS_DIR)

In [None]:
with open(MODELS_DIR/"classes"/f"{model_name}.yaml","w") as classe_file:
    yaml.dump(list(classes_names),classe_file)

# Compare SVM with Neural network

In [None]:
neural_model = tf.keras.models.load_model(MODELS_DIR/f"neural_networks/{model_name}.h5")
x= np.array(test_df["resized_image"].to_list())
neural_prediction = neural_model.predict(x).argmax(axis=1)
neural_accuracy = np.mean(y_test==neural_prediction)
neural_accuracy

In [None]:
print(f"""
======Accuracy=====
Neural network: {round(neural_accuracy,2)},
SVC: {round(svm_accuracy,2)}
""")