In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
import cv2

from keras.applications import inception_v3
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input as inception_v3_preprocessor

from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model

from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy

from sklearn.model_selection import train_test_split

from tqdm import tqdm
import keras
from os import makedirs
from os.path import expanduser, exists, join

In [2]:
from class_image_classify_model import *

Version 1.2
Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device
/job:localhost/replica:0/task:0/device:XLA_GPU:0 -> device: XLA_GPU device




In [3]:
import pandas as pd
import os

# Load meta data

In [9]:
train_label_raw = pd.read_csv('train_set_1_image_shuffle.csv', usecols=['label', 'image_path'])
train_label_raw

Unnamed: 0,image_path,label
0,images/7081912075039165723_7100.jpeg,0
1,images/6998898528495979803_8750.jpeg,0
2,images/7081261598936960282_3750.jpeg,1
3,images/7033757107044306177_750.jpeg,0
4,images/7079000650503163162_3250.jpeg,0
...,...,...
39908,images/7092751126881258778_4071.jpeg,1
39909,images/7031012310688877850_315.jpeg,0
39910,images/7020713309389294849_00.jpeg,1
39911,images/7009563299805138203_250.jpeg,1


# Load data and train

In [10]:
total_row = train_label_raw.shape[0]
print('Total row:', total_row)

Total row: 39913


In [16]:
start_idx = 35000
end_idx = 39913
num_ep = 10
steps_per_epoch = 256
validation_steps = 32

trainer = image_binary_classify_keras()
checkpoint_path = 'model_checkpoint/checkpoint_30000_35000.h5'
trainer.init_model(checkpoint_path=checkpoint_path)
# trainer.init_model()

train_label_df = train_label_raw.iloc[start_idx:end_idx]
trainer.generate_train_valid(img_path_arr=train_label_df['image_path'], label_arr=train_label_df['label'])
trainer.train_model(num_epochs=num_ep, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps)

# save model
os.makedirs('model_checkpoint', exist_ok=True)
trainer.save_model(f'model_checkpoint/checkpoint_{start_idx}_{end_idx}.h5')

===== Load model successful from model_checkpoint/checkpoint_30000_35000.h5
===== Load images...


  0%|          | 0/4913 [00:00<?, ?it/s]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
===== Saved model.


In [22]:
import gc
gc.collect()

518

In [78]:
# index_lst = [(0, 5000), (5000, 10000), (10000, 15000), (15000, 20000), (20000, 25000), (25000, 30000), (30000, 35000), (35000, 39913)]
# for i, tup_idx in enumerate(index_lst):
#     if i != 0:
#         start_idx = tup_idx[0]
#         end_idx = tup_idx[1]
#         num_ep = 10
#         steps_per_epoch = 256
#         validation_steps = 32

#         trainer = image_binary_classify_keras()
#         checkpoint_path = f'model_checkpoint/checkpoint_{index_lst[i-1][0]}_{index_lst[i-1][1]}.h5'
#         trainer.init_model(checkpoint_path=checkpoint_path)
# #         trainer.init_model()

#         train_label_df = train_label_raw.iloc[start_idx:end_idx]
#         trainer.generate_train_valid(img_path_arr=train_label_df['image_path'], label_arr=train_label_df['label'])
#         trainer.train_model(num_epochs=num_ep, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps)

#         # save model
#         os.makedirs('model_checkpoint', exist_ok=True)
#         trainer.save_model(f'model_checkpoint/checkpoint_{start_idx}_{end_idx}.h5')
#         gc.collect()

# Test

In [4]:
model = keras.models.load_model("model_checkpoint/checkpoint_35000_39913.h5")

In [5]:
train_set_2_image = pd.read_csv('train_set_2_image.csv')
test_set_image = pd.read_csv('test_set_image.csv')

In [70]:
train_set_2_image.shape

(27115, 3)

In [51]:
test_set_image.shape

(12643, 3)

In [62]:
df_test = test_set_image.iloc[10000:12643]
df_test.shape

(2643, 3)

In [66]:
import gc
gc.collect()

710

In [63]:
# Creae the x_test
x_test = []
for image_path in tqdm(df_test['image_path'].values):
     img = cv2.imread(image_path)
     x_test.append(cv2.resize(img, (299, 299)))

# Make it an array
x_test = np.array(x_test, np.float32) / 255.

# Predict x_test
predictions = model.predict(x_test, verbose=2)

# Set column names to those generated by the one-hot encoding earlier
# col_names = one_hot.columns.values
col_names = np.array([0, 1])

# Create the submission data.
submission_results = pd.DataFrame(predictions, columns = col_names)

# Add the id as the first column
submission_results.insert(0, 'label', df_test.reset_index()['label'])
submission_results.insert(0, 'id', df_test.reset_index()['id'])
submission_results.insert(0, 'image_path', df_test.reset_index()['image_path'])

100%|█████████████████████████████████████████████████████████████████████████████| 2643/2643 [00:22<00:00, 115.05it/s]


83/83 - 99s


In [64]:
submission_results

Unnamed: 0,image_path,id,label,0,1
0,images/6988467376165506305_500.jpeg,6988467376165506305,0,0.994877,0.005779
1,images/6988467376165506305_5000.jpeg,6988467376165506305,0,0.967974,0.032380
2,images/6988467376165506305_4250.jpeg,6988467376165506305,0,0.992729,0.008728
3,images/6988467376165506305_00.jpeg,6988467376165506305,0,0.998430,0.001538
4,images/6988467376165506305_3000.jpeg,6988467376165506305,0,0.986167,0.015408
...,...,...,...,...,...
2638,images/6847699753216576769_00.jpeg,6847699753216576769,0,0.997876,0.001886
2639,images/6847699753216576769_535.jpeg,6847699753216576769,0,0.709060,0.191781
2640,images/6847699753216576769_250.jpeg,6847699753216576769,0,0.971020,0.018625
2641,images/7031018951089933594_90.jpeg,7031018951089933594,0,0.233384,0.741816


In [65]:
submission_results.to_csv('test_set_image_result_2.csv', index=False)

In [71]:
train_set_2_image_result_df = pd.concat([pd.read_csv(f'train_set_2_image_result_{i}.csv') for i in range(6)])
test_set_image_result_df = pd.concat([pd.read_csv(f'test_set_image_result_{i}.csv') for i in range(3)])

In [74]:
train_set_2_image_result_df.to_csv('train_set_2_image_result.csv', index=False)
test_set_image_result_df.to_csv('test_set_image_result.csv', index=False)

In [73]:
test_set_image_result_df.shape

(12643, 5)

In [47]:
# new_df = train_label_raw.sample(frac=1)

In [51]:
# new_df.to_csv('train_set_1_image_shuffle.csv', index=False)