In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Input, concatenate, Dropout
from tensorflow.keras.layers import Conv2D, MaxPool2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.utils import to_categorical

from datetime import datetime
import os
import glob
import shutil
import random
import cv2
from google.colab import drive
import zipfile

from tensorflow.keras.applications import Xception, ResNet50, InceptionV3, MobileNet

In [2]:
# 구글 드라이브와 연동하기
drive.mount("/content/gdrive/")

Mounted at /content/gdrive/


In [3]:
# 짚파일을 맨 첫번째 페이지에 노출되게 하기.
dataset_path = "/content/gdrive/MyDrive/Colab Notebooks"
shutil.copy(os.path.join(dataset_path, "food.zip"), "/content")

'/content/food.zip'

In [4]:
# 관리할 폴더 미리 지정해주기(만들어주는게 아님)

DATA_ROOT_DIR = os.path.join("/content", "food")
TRAIN_DATA_ROOT_DIR = os.path.join(DATA_ROOT_DIR, "train") # train이라는 폴더를 만들어 주겠다.
TEST_DATA_ROOT_DIR = os.path.join(DATA_ROOT_DIR, "test")

# label_name_list = 폴더이름이자 정답이름

In [5]:
# 압축 풀어주기 (풀면 파일이 만들어짐)
with zipfile.ZipFile(os.path.join("/content", "food.zip"), "r") as target_file:
  target_file.extractall(TRAIN_DATA_ROOT_DIR)
    

In [19]:
# # 삭제할 파일있으면 아래코드 사용하기
# if os.path.exists("/content/food"):  # COLAB = os.path.exists("/content/GTSRB")
#     shutil.rmtree("/content/food")
#     print("/content/food/train is deleted!")

/content/food/train is deleted!


In [None]:
# # 풀어놓은 전체 데이터를 TRAIN파일에 옮겨주자

# shutil.copytree(DATA_ROOT_DIR, TRAIN_DATA_ROOT_DIR) # COPYTREE는 기존에 똑같은 이름의 폴더가 있으면 에러가 생김.

'/content/food/train'

In [6]:
label_name_list = os.listdir(TRAIN_DATA_ROOT_DIR)
label_name_list

['Pizza', 'pasta', 'chicken', 'Burger']

In [7]:
# TEST폴더 만들어주자 / 기존에 같은 명의의 파일이 있으면 에러생김

if not os.path.exists("/content/food/test"):
    os.mkdir("/content/food/test")
    print("/content/food/test directory is created")
    
    count = 0
    for label_name in label_name_list:
        os.mkdir("/content/food/test/" + label_name.strip())
        print("/content/food/test/" + label_name.strip() + " is created")
        
        count = count + 1
        
    print("총 횟수 : ", count)

/content/food/test directory is created
/content/food/test/Pizza is created
/content/food/test/pasta is created
/content/food/test/chicken is created
/content/food/test/Burger is created
총 횟수 :  4


In [8]:
ratio = 0.2

src_root_dir = "/content/food/train/"
dst_root_dir = "/content/food/test/"

for label_name in label_name_list:
    train_image_file_list = glob.glob(src_root_dir + label_name + "/*.*")
    print("==========================================================")
    
    print("total [%s] image file nums => [%s]"%(label_name, len(train_image_file_list)))
    
    split_num = int(ratio * len(train_image_file_list))
    print("분리할 갯수 : ", split_num)
    
    test_image_file_list = train_image_file_list[0:split_num]
    
    # move
    move_nums = 0
    for image_file in test_image_file_list:
        shutil.move(image_file, dst_root_dir + label_name)
        move_nums = move_nums + 1
        
    print("total move nums => ", move_nums)
    print("===========================================================")

total [Pizza] image file nums => [1500]
분리할 갯수 :  300
total move nums =>  300
total [pasta] image file nums => [1230]
분리할 갯수 :  246
total move nums =>  246
total [chicken] image file nums => [1396]
분리할 갯수 :  279
total move nums =>  279
total [Burger] image file nums => [1500]
분리할 갯수 :  300
total move nums =>  300


In [9]:
# 실제 이미지를 메모리에 올려놓자

train_data_list = []
train_label_list = []

class_dict = {"Pizza":0, "pasta":1,"chicken":2, "Burger":3}


start_time = datetime.now()
for label_name in label_name_list:
  file_path = os.path.join(TRAIN_DATA_ROOT_DIR, label_name)
  img_file_list = glob.glob(file_path + "/*.*")


# 이미지, 색, 순서(bgr->rgb)
  for img in img_file_list:
    src_img = cv2.imread(img,cv2.IMREAD_COLOR)
    src_img = cv2.resize(src_img, dsize=(224,224))
    src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)

    train_data_list.append(src_img)
    train_label_list.append(class_dict[label_name])
end_time = datetime.now()
print("Elapsed Time : ", end_time - start_time)

Elapsed Time :  0:00:10.914221


In [10]:
X_train = np.array(train_data_list).astype("float32")
y_train = np.array(train_label_list).astype("float32")

In [11]:
print(X_train.shape, y_train.shape)

(4501, 224, 224, 3) (4501,)


In [12]:
# 실제 이미지를 메모리에 올려놓자 test

test_data_list = []
test_label_list = []

class_dict = {"Pizza":0, "pasta":1,"chicken":2, "Burger":3}

for label_name in label_name_list:
  file_path = os.path.join(TEST_DATA_ROOT_DIR, label_name)
  img_file_list = glob.glob(file_path + "/*.*")


# 이미지, 색, 순서(bgr->rgb)
  for img in img_file_list:
    src_img = cv2.imread(img,cv2.IMREAD_COLOR)
    src_img = cv2.resize(src_img, dsize=(224,224))
    src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)

    test_data_list.append(src_img)
    test_label_list.append(class_dict[label_name])

In [13]:
X_test = np.array(test_data_list).astype("float32")
y_test = np.array(test_label_list).astype("float32")

In [14]:
print(X_test.shape, y_test.shape)

(1125, 224, 224, 3) (1125,)


In [15]:
# 정규화

X_train = X_train / 255.0
X_test = X_test / 255.0

In [16]:
base_model = MobileNet(weights="imagenet", include_top=False, input_shape=(224,224,3))

model = Sequential()
model.add(base_model)

# model.add(Flatten())

model.add(GlobalAveragePooling2D())
model.add(Dense(128, activation='relu')) #은닉층
model.add(Dense(128, activation='relu'))

model.add(Dense(4, activation = 'softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate=0.00001), metrics=["accuracy"])
base_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5
Model: "mobilenet_1.00_224"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv1 (Conv2D)              (None, 112, 112, 32)      864       
                                                                 
 conv1_bn (BatchNormalizatio  (None, 112, 112, 32)     128       
 n)                                                              
                                                                 
 conv1_relu (ReLU)           (None, 112, 112, 32)      0         
                                                                 
 conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)     288       
                                                        

In [None]:
# # 모델 설계 cmd cmd fdd (sigmoid, binary)
# model = Sequential()

# model.add(Conv2D(kernel_size=3, filters=32, activation="relu", 
#                  padding="SAME", input_shape=(128,128,3)))
# model.add(MaxPool2D(pool_size=(2,2)))
# model.add(Dropout(0.25))

# model.add(Conv2D(kernel_size=3, filters=64, activation="relu", kernel_initializer="he_normal",
#                  padding="SAME",))
# model.add(MaxPool2D(pool_size=(2,2)))
# model.add(Dropout(0.25))

# model.add(Conv2D(kernel_size=3, filters=64, activation="relu", 
#                  padding="SAME",))
# model.add(MaxPool2D(pool_size=(2,2)))
# model.add(Dropout(0.25))
 
# model.add(Flatten())

# model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.25))
# model.add(Dense(6, activation='softmax'))

# # 출력 개수가 1인 이유는 이중분류는 1로만 한다.
# # 다중 출력이라면 원핫코딩의 개수만큼 적어준다.



model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenet_1.00_128 (Functio  (None, 4, 4, 1024)       3228864   
 nal)                                                            
                                                                 
 global_average_pooling2d (G  (None, 1024)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 16)                16400     
                                                                 
 dense_1 (Dense)             (None, 16)                272       
                                                                 
 dense_2 (Dense)             (None, 6)                 102       
                                                                 
Total params: 3,245,638
Trainable params: 3,223,750
Non-

In [17]:
from tensorflow.keras.callbacks import EarlyStopping
earlystopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

result = model.fit(X_train, y_train, epochs=30, validation_split=0.2, callbacks=[earlystopping])


#### 결과 작성
### imagenet, dd 128, adam0.00001, 다중분류
## loss: 0.0066 - accuracy: 0.9994 - val_loss: 0.2899 - val_accuracy: 0.9223

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [18]:
model.evaluate(X_test, y_test)

### imagenet, dd 128, adam0.00001, 다중분류
## [0.15950730443000793, 0.9475555419921875]



[0.15950730443000793, 0.9475555419921875]

In [31]:
test_img2 = cv2.imread("/content/gdrive/MyDrive/Colab Notebooks/for_test/1.jpg", cv2.IMREAD_COLOR)
test_img2 = cv2.resize(test_img2, dsize=(224,224))
test_img2 = cv2.cvtColor(test_img2, cv2.COLOR_BGR2RGB)

pred2 = model.predict(test_img2.reshape(-1, 224,224,3))
score2=tf.nn.softmax(pred2)

print("{:.2f}percnet confidence.".format( 100*np.max(score2)))

45.38percnet confidence.


In [25]:
type(test_img2)

NoneType

1월 12일 종합
모델링설계는 그대로 가고
학습에서 높은점수 약 92프로정도와 evaluate도 높은것을 보아 모델링설계에서는 문제가 없는것 같다. 하지만 실제 예측값이 낮다는 문제점 발견. 

이미지 크롤링단계에서 더 많은 정제된 이미지를 가져오는것이 중요해졌다. 


1. 피자사진만 predict해봤는데, 다른 사진도 해봐야겠다. 
2. 방향성은 맞으니, 이미지 오답처리와 정제된 이미지 가져오는게 중요해졌다. 
