In [7]:
!ls


In [2]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sklearn
import sys
import tensorflow as tf
import time

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

In [3]:
!nvidia-smi

In [3]:
!ls ../input/cifar-10

In [4]:
%pip install py7zr
import py7zr

In [5]:
!ls ../input

In [5]:

a =py7zr.SevenZipFile(r'../input/cifar-10/train.7z','r')
a.extractall(path=r'./')
a.close()

In [7]:
!ls 

In [6]:
a=py7zr.SevenZipFile(r'../input/cifar-10/test.7z','r')
a.extractall(path='./') 
a.close()

In [8]:
!cp ../input/cifar-10/sampleSubmission.csv ./

In [10]:
!ls

In [9]:
!cp ../input/cifar-10/trainLabels.csv ./

In [11]:
class_names = [
    'airplane',
    'automobile',
    'bird',
    'cat',
    'deer',
    'dog',
    'frog',
    'horse',
    'ship',
    'truck',
]
train_lables_file = './trainLabels.csv'
test_csv_file = './sampleSubmission.csv'
train_folder = './train'
test_folder = './test'


In [12]:
def parse_csv_file(filepath,folder):
    #文件与类别的对应关系
    results=[]
    with open(filepath,'r') as f:
        lines=f.readlines()[1:]
    for line in lines:
        #训练文件夹里的文件名为 编号.png
        #train label文件里每一行是 编号+种类
        #这里的line是对应关系的line
        image_id,label_str=line.strip('\n').split(',')
        image_full_path=os.path.join(folder,image_id+'.png')
        #使用flow_from_df要求这种格式
        results.append((image_full_path,label_str))
    return results
        

In [17]:
train_labels_info = parse_csv_file(train_lables_file, train_folder)
test_csv_info = parse_csv_file(test_csv_file, test_folder)
import pprint
pprint.pprint(train_labels_info[0:5])
pprint.pprint(test_csv_info[0:5])
print(len(train_labels_info), len(test_csv_info))

In [18]:
train_df=pd.DataFrame(train_labels_info[0:45000])
valid_df=pd.DataFrame(train_labels_info[45000:])
test_df=pd.DataFrame(test_csv_info)

train_df.columns = ['filepath', 'class']  #给df赋列索引名
valid_df.columns = ['filepath', 'class']
test_df.columns = ['filepath', 'class']

print(train_df.head())
print(valid_df.head())
print(test_df.head())


In [19]:
!ls ./train/1.png

In [20]:
height = 32
width = 32
channels = 3
batch_size = 32
num_classes = 10

In [21]:
train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'nearest',
)
train_generator=train_datagen.flow_from_dataframe(
    train_df,
    directory='./',
    x_col='filepath',
    y_col='class',
    classes=class_names,
    target_size=(height,width),
    batch_size=batch_size,
    seed=7,
    shuffle=True,
    class_mode='sparse'
)
valid_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255)
valid_generator = valid_datagen.flow_from_dataframe(
    valid_df,
    directory = './',
    x_col = 'filepath',
    y_col = 'class',
    classes = class_names,
    target_size = (height, width),
    batch_size = batch_size,
    seed = 7,
    shuffle = False,
    class_mode = "sparse")

In [22]:
train_num = train_generator.samples
valid_num = valid_generator.samples
print(train_num, valid_num)


In [24]:
model=keras.models.Sequential([
    keras.layers.Conv2D(filters=128,kernel_size=3,padding='same',
                       activation='selu',input_shape=[width,height,channels]),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=128,kernel_size=3,padding='same',
                       activation='selu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=2),
    
    keras.layers.Conv2D(filters=256,kernel_size=3,padding='same',
                       activation='selu'),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=256,kernel_size=3,padding='same',
                       activation='selu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=2),
    
    keras.layers.Conv2D(filters=512,kernel_size=3,padding='same',
                       activation='selu'),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=512,kernel_size=3,padding='same',
                       activation='selu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=2),
    
    keras.layers.Flatten(),
    keras.layers.Dense(512,activation='relu'),
    keras.layers.Dense(num_classes,activation='softmax')
    
])
model.compile(loss='sparse_categorical_crossentropy',
             optimizer='adam',metrics=['accuracy'])
model.summary()

In [25]:
epochs = 20
history = model.fit_generator(train_generator,
                              steps_per_epoch = train_num // batch_size,
                              epochs = epochs,
                              validation_data = valid_generator,
                              validation_steps = valid_num // batch_size)


In [26]:
print(history.history)

In [27]:
def plot_learning_curves(history, label, epcohs, min_value, max_value):
    data = {}
    data[label] = history.history[label]
    data['val_'+label] = history.history['val_'+label]
    pd.DataFrame(data).plot(figsize=(8, 5))
    plt.grid(True)
    plt.axis([0, epochs, min_value, max_value])
    plt.show()
    
plot_learning_curves(history, 'accuracy', epochs, 0, 1)
plot_learning_curves(history, 'loss', epochs, 0, 2)

In [28]:
test_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255)
test_generator = valid_datagen.flow_from_dataframe(
    test_df,
    directory = './',
    x_col = 'filepath',
    y_col = 'class',
    classes = class_names,
    target_size = (height, width),
    batch_size = batch_size,
    seed = 7,
    shuffle = False,
    class_mode = "sparse")

In [29]:
test_num = test_generator.samples
print(test_num)


In [30]:

!cat /proc/cpuinfo

In [31]:
test_predict = model.predict_generator(test_generator,
                                       workers = 5,
                                       use_multiprocessing = True)

In [32]:
print(test_predict.shape)

In [33]:
#每一个样本预测输出的是10个概率值
print(test_predict[0:5])

In [34]:
#一个向量中谁的值最大，返回其下标
test_predict_class_indices = np.argmax(test_predict, axis = 1)

#%%

print(test_predict_class_indices[0:5])

In [43]:
def generate_submission(filename,predict_class):
    with open(filename,'w') as f:
        f.write('id,label\n')
        for i in range(len(predict_class)):
            f.write('%d,%s\n'%(i+1,predict_class[i]))


In [40]:
test_predict_class = [class_names[index] for index in test_predict_class_indices]


In [44]:
output_file = "./submission.csv"
generate_submission(output_file, test_predict_class)

In [45]:
!ls

In [47]:
!kaggle competitions submit -c cifar-10 -f submission.csv -m "Message"