In [None]:
#!pip show keras

Name: Keras
Version: 2.4.3
Summary: Deep Learning for humans
Home-page: https://github.com/keras-team/keras
Author: Francois Chollet
Author-email: francois.chollet@gmail.com
License: MIT
Location: /usr/local/lib/python3.7/dist-packages
Requires: h5py, numpy, pyyaml, scipy
Required-by: textgenrnn, keras-vis, kapre, fancyimpute


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import zipfile
import csv
import sys
import os
from ImageDataAugmentor.image_data_augmentor import *
import albumentations as A


import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.regularizers import l2
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.layers import *

from sklearn.model_selection import train_test_split, StratifiedKFold

import PIL
from PIL import ImageOps, ImageFilter
#увеличим дефолтный размер графиков
from pylab import rcParams
rcParams['figure.figsize'] = 10, 5
#графики в svg выглядят более четкими
%config InlineBackend.figure_format = 'svg' 
%matplotlib inline


print('Python       :', sys.version.split('\n')[0])
print('Numpy        :', np.__version__)
print('Tensorflow   :', tf.__version__)
print('Keras        :', tf.keras.__version__)

Python       : 3.7.10 (default, Feb 20 2021, 21:17:23) 
Numpy        : 1.19.5
Tensorflow   : 2.4.1
Keras        : 2.4.0


In [2]:
EPOCHS               = 5  # эпох на обучение
BATCH_SIZE           = 64 # уменьшаем batch если сеть большая, иначе не поместится в память на GPU
LR                   = 1e-4
VAL_SPLIT            = 0.15 # сколько данных выделяем на тест = 15%

CLASS_NUM            = 10  # количество классов в нашей задаче
IMG_SIZE             = 224 # какого размера подаем изображения в сеть
IMG_CHANNELS         = 3   # у RGB 3 канала
input_shape          = (IMG_SIZE, IMG_SIZE, IMG_CHANNELS)

In [3]:
from google.colab import files
uploaded = files.upload()

Saving sample-submission.csv to sample-submission (2).csv
Saving train.csv to train (2).csv


In [4]:
train_df = pd.read_csv("train.csv")
sample_submission = pd.read_csv("sample-submission.csv")
train_df.head()

Unnamed: 0,Id,Category
0,100155.jpg,0
1,100306.jpg,0
2,100379.jpg,0
3,100380.jpg,0
4,100389.jpg,0


In [5]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15561 entries, 0 to 15560
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Id        15561 non-null  object
 1   Category  15561 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 243.3+ KB


In [6]:
train_df.Category.value_counts()

1    1971
8    1765
6    1733
5    1631
0    1613
3    1528
2    1458
4    1400
9    1255
7    1207
Name: Category, dtype: int64

In [7]:
transform = A.Compose([
                       A.HorizontalFlip(),
                       A.RandomBrightnessContrast(),
                       A.RandomRotate90(),
                       A.RandomScale(),
                       A.Blur(blur_limit=4),
                       A.ChannelDropout(),
                       A.ChannelShuffle(),
                       A.CoarseDropout,
                       A.ColorJitter,
                       A.Downscale(scale_min = 0.75, scale_max = 0.99),
                       A.ElasticTransform(),
                       A.Equalize(),
                       A.ISONoise(color_shift=(0.01,0.3), intensity=(0.1,0.5)),
                       A.MotionBlur(),
                       A.OpticalDistortion()
])

In [11]:
#pip install git+https://github.com/mjkvaak/ImageDataAugmentor

Collecting git+https://github.com/mjkvaak/ImageDataAugmentor
  Cloning https://github.com/mjkvaak/ImageDataAugmentor to /tmp/pip-req-build-nhzpi45f
  Running command git clone -q https://github.com/mjkvaak/ImageDataAugmentor /tmp/pip-req-build-nhzpi45f
Collecting opencv-python>=4.2
[?25l  Downloading https://files.pythonhosted.org/packages/0f/13/192104516c4a3d92dc6b5e106ffcfbf0fe35f3c4faa49650205ff652af72/opencv_python-4.5.1.48-cp37-cp37m-manylinux2014_x86_64.whl (50.4MB)
[K     |████████████████████████████████| 50.4MB 83kB/s 
Building wheels for collected packages: ImageDataAugmentor
  Building wheel for ImageDataAugmentor (setup.py) ... [?25l[?25hdone
  Created wheel for ImageDataAugmentor: filename=ImageDataAugmentor-0.0.0-cp37-none-any.whl size=29530 sha256=170be741ad0923dc19c8c08c71139567f8a1421640081f59d2762e10f629178f
  Stored in directory: /tmp/pip-ephem-wheel-cache-401i8b7q/wheels/d9/10/55/6fca35a4072f87d694876d56ece64db3846cf45e1da1c381fe
Successfully built ImageDataAugm

In [12]:
train_datagen = ImageDataAugmentor(
        rescale=1./255,
        augment=transform,
        preprocess_input=None,
        validation_split=VAL_SPLIT)

In [None]:
RANDOM_SEED = 42
train_generator = train_datagen.flow_from_directory(
    PATH+'train/',      # директория где расположены папки с картинками 
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True, seed=RANDOM_SEED,
    subset='training') # set as training data

test_generator = train_datagen.flow_from_directory(
    PATH+'train/',
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True, seed=RANDOM_SEED,
    subset='validation') # set as validation data

test_sub_generator = test_datagen.flow_from_dataframe( 
    dataframe=sample_submission,
    directory=PATH+'test_upload/',
    x_col="Id",
    y_col=None,
    shuffle=False,
    class_mode=None,
    seed=RANDOM_SEED,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,)

In [None]:
train_generator = train_datagen.flow_from_directory(
        'data/train',
        target_size=(224, 224),
        batch_size=32,
        class_mode='binary')
val_datagen = ImageDataAugmentor(rescale=1./255)
validation_generator = val_datagen.flow_from_directory(
        'data/validation',
        target_size=(224, 224),
        batch_size=32,
        class_mode='binary')