In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

from keras.utils.np_utils import to_categorical
from keras.models import Sequential, load_model, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
%matplotlib inline

In [3]:
def load_nipa_test_img(num):
    
    # 경로만 알아서 잘 하기
    nipa_image = load_img(f'/volumes/KTWusb/nipa/test/{num}.jpg', target_size = (224,224))  # 해당 사이즈로 로딩해준다. vgg16이 224*224 사이즈를 쓰기 때문에.
    img = img_to_array(nipa_image) # 이미지를 넘파이 어레이로 바꿔준다
    return img


def load_nipa_train_img(plnt, dis, specific_index):


    nipa_image = load_img(f'/volumes/KTWusb/nipa/train/{plnt}_{dis}_{specific_index}.jpg', target_size = (224,224))  # 해당 사이즈로 로딩해준다. vgg16이 224*224 사이즈를 쓰기 때문에.
    img = img_to_array(nipa_image) # 이미지를 넘파이 어레이로 바꿔준다
        
        
    return img



## X_train 파트 전처리

In [4]:

# nipa_train은 데이터셋에 있는 tsv 파일임. 이 이름 아님. 그냥 train.tsv였나? 그랬을듯...
df = pd.read_csv('nipa_train.tsv', sep='\t',header=None )

In [5]:
df.columns = ['file', 'plnt', 'dis']

In [6]:
df['file'] = df['file'].str.replace('.jpg', "")

In [7]:
df['specific_index'] = df['file'].str.split("_", expand=True)[2]

In [8]:
df['specific_index'] = df['specific_index'].astype(int)

In [9]:
df = df.sort_values(by=['plnt', 'dis', 'specific_index'])

In [10]:
df.reset_index(inplace=True)

In [11]:
df = df.drop('index', axis=1)

In [18]:
from tqdm import tqdm_notebook

X_train_full = []

for num in tqdm_notebook(range(df.shape[0])):  # df의 행만큼, 즉 데이터 16000개의 크기만큼

    X_train_full.append(load_nipa_train_img(df.iloc[num,1:][0], df.iloc[num,1:][1], df.iloc[num,1:][2]))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """


HBox(children=(FloatProgress(value=0.0, max=4996.0), HTML(value='')))




In [19]:
X_train_full = np.concatenate([arr[np.newaxis] for arr in X_train_full])
X_train_full.shape

# 16000을 돌렸으면 shape로 (16000, 224, 224, 3)이 나와야 한다

(4996, 224, 224, 3)

## y_train 만들기 : 품종코드_질병코드 로 class를 만들어준다

In [20]:
df['class'] = df['plnt'].astype(str) + "_" + df['dis'].astype(str)

In [25]:
# 원핫인코딩을 시켜줌

y_train_full = to_categorical(df['class'].tolist())


In [26]:
y_train_full.shape  # 맞게 되면 이것도 (16000, n) 으로 나옴. 나중에 이 n을 아래 최종 dense에 넣음

(4996, 1321)

In [27]:
# 모델 설계

model = Sequential()
model.add(Conv2D(filters = 16,
                 kernel_size = 4,
                 padding = 'same',
                 strides = 1,
                 activation = 'relu',
                 input_shape = (224,224,3,)  # 3,) 은 센스있게 남겨둠
                ))
model.add(MaxPool2D(pool_size = 2))
model.add(Dropout(0.5))


model.add(Conv2D(filters = 32, kernel_size = 4, padding = 'same', strides = 1, activation = 'relu'))
model.add(MaxPool2D(pool_size = 2))

model.add(Conv2D(filters = 64, kernel_size = 4, padding = 'same', strides = 1, activation = 'relu'))
model.add(MaxPool2D(pool_size = 2))

model.add(Flatten())

model.add(Dense(512, activation = 'relu'))
model.add(Dense(1321, activation = 'softmax'))  # 1321자리에 위 n을 넣어줌 (y_train의 종류만큼 들어가야함)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 16)      784       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 16)      0         
_________________________________________________________________
dropout (Dropout)            (None, 112, 112, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 112, 112, 32)      8224      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 56, 56, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 56, 56, 64)        32832     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 28, 28, 64)        0

In [None]:
# 비용함수 설계

model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])


# 피팅
model.fit(X_train_4996, y_train_4996, batch_size = 128, epochs=1, validation_split = 0.2)

## X_test 만들기

In [36]:
pd.read_csv('nipa_test.tsv', sep='\t',header=None ).shape[0]

3997

In [None]:
from tqdm import tqdm_notebook

X_test = []

for num in tqdm_notebook(range(pd.read_csv('nipa_test.tsv', sep='\t',header=None ).shape[0])):

    X_test.append(load_nipa_test_img(num))

X_test = np.concatenate([arr[np.newaxis] for arr in X_test])
X_test.shape

In [None]:
# 마지막 최종 예측

pred = model.predict(X_test)