In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from PIL import Image

import tensorflow as tf
from keras.src.legacy.preprocessing.image import ImageDataGenerator

import time
from IPython.display import clear_output

In [2]:

# adjust this path to your machine / file structure
#
base_file_path = 'C:/Users/nikoLocal/Documents/Opencampus/Machine_Vision_challenge_data/'

# Import random Test Dataset
submission_csv_name = 'Y_random_nKwalR1.csv' #this is needed only for the file name list
submission_image_path = base_file_path + '/input_test_1a4aqAg/input_test'

submission_test_df = pd.read_csv(os.path.join(base_file_path, submission_csv_name))

#add another column to the dataframe according
dict_numbers = {'GOOD': 0, 'Boucle plate': 1, 'Lift-off blanc': 2, 'Lift-off noir': 3, 'Missing': 4,
                'Short circuit MOS': 5}
dict_strings = {'GOOD': '0_GOOD', 'Boucle plate': '1_Flat loop', 'Lift-off blanc': '2_White lift-off',
                'Lift-off noir': '3_Black lift-off', 'Missing': '4_Missing', 'Short circuit MOS': '5_Short circuit MOS'}

label_list = ['0_GOOD', '1_Flat loop', '2_White lift-off', '3_Black lift-off', '4_Missing', '5_Short circuit MOS']
#label_list.sort()

#Process label submission test dataframe
dict_strings_sub = {0: '0_GOOD', 1: '1_Flat loop', 2: '2_White lift-off', 3: '3_Black lift-off', 4: '4_Missing',
                    5: '5_Short circuit MOS', 6: '6_Drift'}

submission_test_df['LabelStr'] = submission_test_df['Label'].map(dict_strings_sub)

submission_test_df.head()

Unnamed: 0.1,Unnamed: 0,filename,window,lib,Label,LabelStr
0,0,15669dca9fa601ffded9be0f9cd48097d45e7e05d662b8...,2003,Die01,5,5_Short circuit MOS
1,1,16a9a7106da45758c103a182306d6e2991634e0574edce...,2003,Die01,1,1_Flat loop
2,2,216d84546bb291a840ac2ad0752a828f1f3d64abf79191...,2003,Die01,5,5_Short circuit MOS
3,3,219a89d631e95e19643566512d8bcc45233fe4fe0524c9...,2003,Die01,6,6_Drift
4,4,2a441bba89ae0a867b3ee938063d55538379f8e1298b3c...,2003,Die01,1,1_Flat loop


In [3]:
#Load Well performing model

#adjust this path for the model - full path including *.keras
Trained_Model = tf.keras.models.load_model(os.path.join(base_file_path, 'model_evaluation\ImgSz_299_Aug_unbalanced_Cweights_focalLoss\InceptionV3_MultiPhase_full_fine_tune\model.keras'))

In [4]:

# initialize ImageDataGenerators
# use ImageDataGen because it has method flow_from_dataframe() that works really well together with pandas dataframes
# https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator
# although deprecated the functionality can be used as discussed in feedback session


# HYPERPARAMTERS ########
target_size = (299,299) #pixel size to load img - needs to be identical to the model training. Now 299x299
class_mode = 'categorical' # how to store labels - either categorical (one-hot encoding) or as numbers
#class_mode = 'input'
batch_size = 8 #later player around with batch size to see how it affects performance
labelCol = 'LabelStr'
#########################

#normalize pixel intensities
rescale = 1.0/255.0

datagen_submission_test = ImageDataGenerator(
    horizontal_flip=False,
    vertical_flip=False,
    rotation_range=0.0,
    shear_range=0.0,
    rescale=rescale,
    validation_split=0.0)

##########################################################

#generator for submission test data

test_generator_submission = datagen_submission_test.flow_from_dataframe(
    submission_test_df,
    submission_image_path,
    x_col='filename',
    y_col='LabelStr',
    target_size=target_size,
    keep_aspect_ratio = True,
    class_mode=class_mode,
    batch_size=1,
    color_mode="rgb", #also needs to be identical to model used
    shuffle=False,
    seed=42,
    subset='training')

Found 1055 validated image filenames belonging to 7 classes.


In [5]:
label_list_test =  ['0_GOOD','1_Flat loop','2_White lift-off','3_Black lift-off','4_Missing','5_Short circuit MOS','6_Drift']

In [6]:
#make full series of predictions for test data
predicted_labels = Trained_Model.predict(test_generator_submission)
predicted_labels_num = np.argmax(predicted_labels, axis=-1)#convert to numerical - np.argmax directly does the job


[1m 146/1055[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m48s[0m 53ms/step

KeyboardInterrupt: 

In [19]:
predicted_labels_num

array([4, 4, 4, ..., 4, 4, 2], shape=(1055,))

In [20]:
#make dataframe to store processed data
manual_label_df_submission = submission_test_df.copy(deep=True) #deepcopy of the submission data dataframe

manual_label_df_submission['Predicted_Label'] = predicted_labels_num
manual_label_df_submission['Manual_Label'] = 'n.a.'

#save to file
saveName = 'manual_label_df_submission.csv'
SavePath = os.path.join(base_file_path, 'manual_label_df_submission.csv')

# DO NOT set this to True if you do not intend to clear your previous progress
Restart_Flag = False

if not os.path.exists(SavePath):
    manual_label_df_submission.to_csv(SavePath, index=False)
else:
    #load it instead and overwrite the data
    if not Restart_Flag:
        manual_label_df_submission = pd.read_csv(SavePath)
        print('loaded dataframe from file')



loaded dataframe from file


In [21]:
manual_label_df_submission.head()

Unnamed: 0.1,Unnamed: 0,filename,window,lib,Label,LabelStr,Predicted_Label,Manual_Label
0,0,15669dca9fa601ffded9be0f9cd48097d45e7e05d662b8...,2003,Die01,5,5_Short circuit MOS,4,n.a.
1,1,16a9a7106da45758c103a182306d6e2991634e0574edce...,2003,Die01,1,1_Flat loop,4,n.a.
2,2,216d84546bb291a840ac2ad0752a828f1f3d64abf79191...,2003,Die01,5,5_Short circuit MOS,4,n.a.
3,3,219a89d631e95e19643566512d8bcc45233fe4fe0524c9...,2003,Die01,6,6_Drift,4,n.a.
4,4,2a441bba89ae0a867b3ee938063d55538379f8e1298b3c...,2003,Die01,1,1_Flat loop,4,n.a.


In [45]:

i_run_start = 300
stop_index = 500

if stop_index >= manual_label_df_submission['Manual_Label'].size:
    stop_index = manual_label_df_submission['Manual_Label'].size

#    submission_image_path

#need to install package pyqt5
#matplotlib.use('inline')
#plt.ion()
#plt.ioff()

#not used at the moment
certainty_threshold = 0.91

#important to set the batch index to 0 so that we look at the same data every time!
test_generator_submission.batch_index = i_run_start

i_run = i_run_start
for data in test_generator_submission:

    if i_run >= stop_index:
        print("{} items processed".format(i_run))
        break

    if manual_label_df_submission['Manual_Label'].iloc[i_run] != 'n.a.':
        print('Datapoint already labeled - going to next')
        #move to next data point
        i_run = i_run + 1
    else:
        prediction = Trained_Model.predict(data, verbose=0)
        predicted_label = np.argmax(prediction, axis=-1)[0]

        #process every image - do not trust the model certainty
        label_certainty = prediction[0][predicted_label]

        temp_img_generator = np.squeeze(data[0])

        plt.figure()
        plt.imshow(temp_img_generator, cmap='grey')
        plt.title('Pred. Label: {}, certainty: {}'.format(label_list_test[predicted_label],label_certainty))

        plt.pause(0.5)
        #time.sleep(2)

        Input_class = input("Press Enter to continue...")
        #key = keyboard.wait()
        print('Input class: {}'.format(Input_class))

        manual_label_df_submission['Manual_Label'].iloc[i_run] = Input_class

        plt.close()

        #clear output after user input is registered
        clear_output()

        #save to file after each iteration
        manual_label_df_submission.to_csv(SavePath, index=False)

        #move to next data point
        i_run = i_run + 1


#save to model when existing loop to not loose progression
manual_label_df_submission.to_csv(SavePath, index=False)


500 items processed


In [35]:
manual_label_df_submission.to_csv(SavePath, index=False)

In [37]:
manual_label_df_submission['Manual_Label'].iloc[800]

'4'

In [23]:
#save to model when existing loop to not loose progression
manual_label_df_submission.to_csv(SavePath, index=False)

In [24]:
SavePath

'C:/Users/nikoLocal/Documents/Opencampus/Machine_Vision_challenge_data/manual_label_df_submission.csv'

In [25]:
SavePath

'C:/Users/nikoLocal/Documents/Opencampus/Machine_Vision_challenge_data/manual_label_df_submission.csv'