In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from tensorflow import keras
import tensorflow as tf
from keras import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Activation, BatchNormalization, Flatten
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/cifar-10/trainLabels.csv
/kaggle/input/cifar-10/sampleSubmission.csv
/kaggle/input/cifar-10/test.7z
/kaggle/input/cifar-10/train.7z


In [2]:
!pip install py7zr

Collecting py7zr
  Downloading py7zr-0.18.3-py3-none-any.whl (76 kB)
     |████████████████████████████████| 76 kB 1.4 MB/s             
[?25hCollecting zipfile-deflate64>=0.2.0
  Downloading zipfile_deflate64-0.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (43 kB)
     |████████████████████████████████| 43 kB 1.1 MB/s             
[?25hCollecting pyzstd>=0.14.4
  Downloading pyzstd-0.15.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
     |████████████████████████████████| 2.4 MB 4.3 MB/s            
Collecting brotli>=1.0.9
  Downloading Brotli-1.0.9-cp37-cp37m-manylinux1_x86_64.whl (357 kB)
     |████████████████████████████████| 357 kB 49.3 MB/s            
[?25hCollecting multivolumefile>=0.2.3
  Downloading multivolumefile-0.2.3-py3-none-any.whl (17 kB)
Collecting pyppmd<0.19.0,>=0.18.1
  Downloading pyppmd-0.18.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (131 kB)
     |████████████████████████████████| 131 k

In [3]:
from py7zr import unpack_7zarchive
import shutil

shutil.register_unpack_format('7zip',['.7z'],unpack_7zarchive)

In [4]:
shutil.unpack_archive('../input/cifar-10/train.7z', '/kaggle/temp/')

In [5]:
train_labels = pd.read_csv("../input/cifar-10/trainLabels.csv", header="infer")

classes = train_labels['label'].unique()
print(classes)

['frog' 'truck' 'deer' 'automobile' 'bird' 'horse' 'ship' 'cat' 'dog'
 'airplane']


In [6]:
if not os.path.exists("/kaggle/temp/valid"):
    os.mkdir("/kaggle/temp/valid")
    
parent_path_train = "/kaggle/temp/train"
parent_path_valid = "/kaggle/temp/valid"
parent_path_test = "/kaggle/temp/test"

for class1 in classes:
    path_train = os.path.join(parent_path_train,class1)
    if not os.path.exists(path_train):
        os.mkdir(path_train)
    path_valid = os.path.join(parent_path_valid,class1)
    if not os.path.exists(path_valid):
        os.mkdir(path_valid)
        
for (int_ind,row) in train_labels.iterrows():
    id = str(row["id"])+".png"
    source_path = os.path.join(parent_path_train,id)
    
    p=np.random.random()
    if p<=0.8:
        target_path = os.path.join(parent_path_train,row["label"],id)
        os.replace(source_path, target_path)
    else:
        target_path = os.path.join(parent_path_valid,row["label"],id)
        os.replace(source_path, target_path)

In [7]:
!ls /kaggle/temp/valid
!ls /kaggle/temp/train

airplane  automobile  bird  cat  deer  dog  frog  horse  ship  truck
airplane  automobile  bird  cat  deer  dog  frog  horse  ship  truck


In [8]:
model = Sequential()
model.add(Conv2D(filters=32,kernel_size=(3,3),strides=(1,1),padding='valid',activation=None,use_bias=False,input_shape=(32,32,3)))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Conv2D(filters=48, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Conv2D(filters=80, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Conv2D(filters=96, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Conv2D(filters=128, kernel_size=(3,3), strides=(1,1), padding='valid', activation=None, use_bias=False))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dense(units=10))
model.add(BatchNormalization())
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


2022-03-28 10:59:10.843937: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [9]:
train_datagen = ImageDataGenerator(featurewise_center=False,
                             samplewise_center=False,
                             featurewise_std_normalization=False,
                             samplewise_std_normalization=False,
                             zca_whitening=False,
                             rotation_range=10,
                             zoom_range=0.1,
                             width_shift_range=0.1,
                             height_shift_range=0.1,
                             horizontal_flip=False,
                             vertical_flip=False,
                             rescale=1./255)
valid_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_directory(directory='/kaggle/temp/train/', shuffle=True, target_size=(32,32),batch_size=128)
valid_generator = valid_datagen.flow_from_directory(directory='/kaggle/temp/valid/', shuffle=True, target_size=(32,32),batch_size=128)

Found 40060 images belonging to 10 classes.
Found 9940 images belonging to 10 classes.


In [10]:
model.fit(train_generator,epochs=10, validation_data=valid_generator,steps_per_epoch=train_generator.n//train_generator.batch_size,
         validation_steps= valid_generator.n//valid_generator.batch_size,workers=8,use_multiprocessing=True)

2022-03-28 10:59:13.915783: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f9de81c5290>

In [11]:
shutil.unpack_archive('/kaggle/input/cifar-10/test.7z','/kaggle/temp/test')

In [12]:
shutil.unregister_unpack_format('7zip')

In [13]:
test_datagen = ImageDataGenerator(rescale=1./255)

test_gen = test_datagen.flow_from_directory(directory='/kaggle/temp/test',target_size=(32,32),batch_size=64,class_mode=None,shuffle=False)

Found 300000 images belonging to 1 classes.


In [14]:
test_gen.reset()
predictions_vecs = model.predict(test_gen)

predictions_final = np.argmax(predictions_vecs, axis=1)

In [15]:
print(type(train_generator.class_indices))
print(train_generator.class_indices)

classes = {value:key for (key,value) in train_generator.class_indices.items()}
print(classes)

predicted_classes=np.empty(shape=300000,dtype=np.dtype('U20'))

ind=0
for i in predictions_final.tolist():
    predicted_classes[ind]=classes[i]
    ind=ind+1
    
filenames_wo_ext = []
for fname in test_gen.filenames:
    filenames_wo_ext.append(int(fname.split(sep="/")[1].split(sep=".")[0])-1)

predicted_classes_final = np.empty(shape=300000,dtype=np.dtype('U20'))
predicted_classes_final[filenames_wo_ext]=predicted_classes


<class 'dict'>
{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
{0: 'airplane', 1: 'automobile', 2: 'bird', 3: 'cat', 4: 'deer', 5: 'dog', 6: 'frog', 7: 'horse', 8: 'ship', 9: 'truck'}


In [16]:
sub = pd.read_csv('../input/cifar-10/sampleSubmission.csv',header='infer')
sub.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300000 entries, 0 to 299999
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   id      300000 non-null  int64 
 1   label   300000 non-null  object
dtypes: int64(1), object(1)
memory usage: 4.6+ MB


In [17]:
sub['label'] = predicted_classes_final
sub.to_csv('submission.csv',index=False)