In [17]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import imageio
from keras import utils
from keras.models import Sequential
from keras.layers import Dense, Flatten,Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import GlobalAveragePooling2D
from keras.layers import Conv2D, MaxPooling2D
from keras import optimizers

from pathlib import Path
# from fastai import *
# from fastai.vision import *

In [2]:
labels = pd.read_csv("./Downloads/train_master.tsv", sep="\t") #教師ラベルの読み込み

In [3]:
labels.head()

Unnamed: 0,file_name,label_id
0,train_00000.png,11
1,train_00001.png,15
2,train_00002.png,4
3,train_00003.png,14
4,train_00004.png,1


In [4]:
master = pd.read_csv("./Downloads/label_master.tsv", sep="\t") #ラベルマスタの読み込み

In [5]:
master.head()

Unnamed: 0,label_id,label_name
0,0,aquatic_mammals
1,1,fish
2,2,flowers
3,3,food_containers
4,4,fruit_and_vegetables


In [6]:
sample = pd.read_csv("./Downloads/sample_submit.csv", header=None, sep=",")

In [7]:
sample.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
0,test_00000.png,0.036151,0.014607,0.034591,0.079934,0.053778,0.01169,0.039329,0.079885,0.047027,...,0.062593,0.037947,0.080989,0.066377,0.020849,0.057315,0.056351,0.033405,0.078505,0.05343
1,test_00001.png,0.033393,0.037818,0.021027,0.043661,0.082885,0.041105,0.069309,0.059735,0.023879,...,0.077569,0.053666,0.044468,0.04049,0.090077,0.050215,0.052718,0.06794,0.030642,0.078394
2,test_00002.png,0.098902,0.047091,0.022525,0.059422,0.038273,0.011877,0.086344,0.054882,0.046612,...,0.028493,0.027183,0.092749,0.076549,0.026914,0.099406,0.032409,0.03871,0.000611,0.060666
3,test_00003.png,0.054705,0.080344,0.047048,0.083701,0.043293,0.06883,0.026523,0.076391,0.008649,...,0.020802,0.054486,0.065079,0.060174,0.045692,0.052243,0.006735,0.036344,0.070731,0.026432
4,test_00004.png,0.087472,0.045188,0.091187,0.003265,0.110322,0.102048,0.048273,0.080005,0.050142,...,0.096636,0.038774,0.050342,0.044085,0.057429,0.023512,0.018324,0.002596,0.030469,0.001106


In [8]:
train_images = []
for fname in labels["file_name"]:
    path = "./Downloads/train_gig/" + fname
    img = imageio.imread(path)
    train_images.append(img)
train_images = np.array(train_images)
print(type(train_images), train_images.shape)

<class 'numpy.ndarray'> (50000, 32, 32, 3)


In [9]:
test_images = []
for fname in sample[0]:
    path = "./Downloads/test_gig/" + fname
    img = imageio.imread(path)
    test_images.append(img)
test_images = np.array(test_images )
print(type(test_images ), test_images.shape)

<class 'numpy.ndarray'> (10000, 32, 32, 3)


In [10]:
train_images = train_images / 255
test_images = test_images / 255

In [11]:
y = labels["label_id"]

y_categorical = utils.to_categorical(y)
y_categorical[0:10,]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
        0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0.]], dtype=float32)

In [12]:
X_train_image, X_val_image = np.split(train_images, [40000])
y_train_label, y_val_label = np.split(y_categorical, [40000])

In [22]:
batch_size=32
epochs=5

In [23]:
model = Sequential()

model.add(Conv2D(filters=64, kernel_size=(3,3), padding="same", input_shape=(32,32,3)))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Conv2D(filters=64, kernel_size=(3,3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Conv2D(filters=64, kernel_size=(3,3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Conv2D(filters=64, kernel_size=(3,3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Conv2D(filters=64, kernel_size=(3,3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(GlobalAveragePooling2D())

model.add(Dense(units=20))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy",
             optimizer=tf.keras.optimizers.RMSprop(lr=0.0001, decay=1e-6),
              metrics=["accuracy"])

model.fit(X_train_image, y_train_label,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_val_image, y_val_label))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x28681255358>

In [23]:
print(model.evaluate(X_val_image, y_val_label, verbose=0))

[5.215665817260742, 0.04560000076889992]


In [24]:
print(test_images.shape)

predictions = model.predict(test_images)

print(type(predictions))

print(predictions.shape)


(10000, 32, 32, 3)
<class 'numpy.ndarray'>
(10000, 20)


In [25]:
print(predictions[0])

[0.02702333 0.00698016 0.00261377 0.00832208 0.00132367 0.01160004
 0.00808218 0.00596583 0.005077   0.32504183 0.4142036  0.02583359
 0.00340256 0.00570288 0.0030983  0.00517105 0.00607683 0.01733541
 0.03146173 0.08568422]


In [26]:
file_name = sample[0]
df = pd.DataFrame(predictions)

In [27]:
df_concat = pd.concat([file_name, df],axis=1)

In [28]:
df_concat.head()

Unnamed: 0,0,0.1,1,2,3,4,5,6,7,8,...,10,11,12,13,14,15,16,17,18,19
0,test_00000.png,0.027023,0.00698,0.002614,0.008322,0.001324,0.0116,0.008082,0.005966,0.005077,...,0.414204,0.025834,0.003403,0.005703,0.003098,0.005171,0.006077,0.017335,0.031462,0.085684
1,test_00001.png,0.028833,0.021605,0.003511,0.008796,0.006071,0.003886,0.004899,0.018805,0.111331,...,0.04712,0.233613,0.192435,0.017171,0.007342,0.026206,0.171557,0.029992,0.009313,0.017581
2,test_00002.png,0.16122,0.063602,0.012241,0.010989,0.012859,0.015453,0.011368,0.016165,0.041393,...,0.101705,0.043769,0.026851,0.055822,0.024141,0.087196,0.026389,0.044214,0.067225,0.098288
3,test_00003.png,0.05073,0.012371,0.00553,0.045989,0.007782,0.056159,0.075919,0.016157,0.133007,...,0.0158,0.06057,0.07662,0.011814,0.059823,0.019305,0.0382,0.047056,0.153599,0.071543
4,test_00004.png,0.013687,0.062775,0.059148,0.008723,0.04488,0.007301,0.020187,0.009472,0.003982,...,0.552439,0.018991,0.002831,0.029704,0.00627,0.009764,0.014603,0.031015,0.006705,0.030686


In [29]:
df_concat.to_csv('./Downloads/sample_submit_001.csv',index = False, header=None)