<a href="https://colab.research.google.com/github/Cl8D/SSUML/blob/main/Project%202%20%3A%20Intel%20Image%20Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#!mkdir ./drive/My\ Drive/project1

In [None]:
#!unzip ./drive/My\ Drive/archive.zip -d ./drive/My\ Drive/project1

In [None]:
%cd /content/drive/My Drive/project1
!ls

## **--- 실행 코드 ---**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/My Drive/project1
!ls

In [None]:
import tensorflow.keras.layers as Layers
import tensorflow.keras.activations as Actications
import tensorflow.keras.models as Models
import tensorflow.keras.optimizers as Optimizer
import tensorflow.keras.metrics as Metrics
import tensorflow.keras.utils as Utils
from keras.utils.vis_utils import model_to_dot
import os
import matplotlib.pyplot as plot
import cv2
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix as CM
from random import randint
from IPython.display import SVG
import matplotlib.gridspec as gridspec

In [None]:

def get_images(directory):
    Images = []
    Labels = []  # 0 for Building , 1 for forest, 2 for glacier, 3 for mountain, 4 for Sea , 5 for Street
    label = 0
    
    for labels in os.listdir(directory): #Main Directory where each class label is present as folder name.
        if labels == 'glacier': #Folder contain Glacier Images get the '2' class label.
            label = 2
        elif labels == 'sea':
            label = 4
        elif labels == 'buildings':
            label = 0
        elif labels == 'forest':
            label = 1
        elif labels == 'street':
            label = 5
        elif labels == 'mountain':
            label = 3
        
        for image_file in os.listdir(directory+labels): #Extracting the file name of the image from Class Label folder
            image = cv2.imread(directory+labels+r'/'+image_file) #Reading the image (OpenCV)
            image = cv2.resize(image,(150,150)) #Resize the image, Some images are different sizes. (Resizing is very Important)
            Images.append(image)
            Labels.append(label)
    
    return shuffle(Images,Labels,random_state=817328462) #Shuffle the dataset you just prepared.

def get_classlabel(class_code):
    labels = {2:'glacier', 4:'sea', 0:'buildings', 1:'forest', 5:'street', 3:'mountain'}
    
    return labels[class_code]


*Label 구분하기 (class_names)

In [None]:
datasets = ['./seg_train/seg_train','./seg_test/seg_test']
output = []

class_names = ['buildings','forest','glacier','mountain','sea','street']
class_name_labels = {class_name:i for i,class_name in enumerate(class_names)}

nb_classes = len(class_names)
class_name_labels

*데이터 로드하기 (1시간 넘게 걸림 주의.)

In [None]:
from tqdm import tqdm

def load_data():
    for dataset in datasets:
        print("Loading {}".format(dataset))

        images,labels = [],[]

        for folder in os.listdir(dataset):
            label = class_name_labels[folder]
            
            for file in tqdm(os.listdir(os.path.join(dataset,folder))):
            
                img_path = os.path.join(os.path.join(dataset,folder),file)
                #print(img_path)
                img = cv2.imread(img_path,cv2.IMREAD_COLOR)
                img = cv2.resize(img,(150,150))

                images.append(img)
                labels.append(label)
                pass
            pass
        
        images = np.array(images,dtype=np.float32)
        labels = np.array(labels,dtype=np.float32)

        output.append((images,labels))
        pass

    return output
    pass

In [None]:
(train_images,train_labels),(test_images,test_labels) = load_data()

*각각의 label에 맞는 train/test 데이터의 수를 dataframe으로 출력하기.

- np.unique : 중복된 성분을 제외한 array 반환.
여러 개의 이미지가 모이게 되면 중복된 label(빌딩, 숲, 숲, 산...) 이 나올 수 있게 되는데, 이때 중복된 성분을 제외시켜주기 때문에 각각의 label을 카운트할 수 있게 된다.

In [None]:
import pandas as pd

n_train = train_labels.shape[0]
n_test = test_labels.shape[0]

_, train_count = np.unique(train_labels, return_counts=True)
_, test_count = np.unique(test_labels, return_counts=True)

df = pd.DataFrame(data = (train_count,test_count))
df = df.T
df['Index']=['buildings','forest','glacier','mountain','sea','street']
df.columns = ['Train','Test','Name']

df

*히스토그램으로 표현하기

In [None]:
import matplotlib.pyplot as plt
plt.figure()
df.set_index('Name').plot.bar(rot=0)
#plt.xticks(df['Name'])

*원으로 표현하기 (train dataset)

In [None]:
plt.pie(train_count,
       explode=(0,0,0,0,0,0),
       labels = class_names,
       autopct = '%1.1f%%')
plt.axis('equal')
plt.title('Proportion of each observed quantity in train dataset')
plt.show()

*원으로 표현하기(test dataset)

In [None]:
plt.pie(test_count,
       explode=(0,0,0,0,0,0),
       labels = class_names,
       autopct = '%1.1f%%')
plt.axis('equal')
plt.title('Proportion of each observed quantity in test dataset')
plt.show()

*Train/Test data 및 label의 shape 출력하기

In [None]:
print("Shape of Train Images:", train_images.shape)
print("Shape of Test Images:", test_images.shape)
print("Shape of Train Labels:", train_labels.shape)
print("Shape of test Labels:", test_labels.shape)

*랜덤으로 이미지를 불러와서 label과 함께 출력

In [None]:
f,ax = plot.subplots(5,5) #5x5의 subplot 생성
f.subplots_adjust(0,0,3,3) #subplot간의 간격 변경
for i in range(0,5,1):
    for j in range(0,5,1):
        rnd_number = randint(0,len(train_images))
        ax[i,j].imshow((train_images[rnd_number]).astype(np.uint8))
        ax[i,j].set_title(get_classlabel(train_labels[rnd_number]))
        ax[i,j].axis('off')

*모델 구성하기

In [None]:
model = Models.Sequential()

model.add(Layers.Conv2D(200,kernel_size=(3,3),activation='relu',input_shape=(150,150,3)))
model.add(Layers.Conv2D(180,kernel_size=(3,3),activation='relu'))

model.add(Layers.MaxPool2D(5,5))
model.add(Layers.BatchNormalization()) 

model.add(Layers.Conv2D(180,kernel_size=(3,3),activation='relu'))
model.add(Layers.Conv2D(140,kernel_size=(3,3),activation='relu'))
model.add(Layers.Conv2D(140,kernel_size=(3,3),activation='relu'))
model.add(Layers.Conv2D(60,kernel_size=(3,3),activation='relu'))
model.add(Layers.MaxPool2D(5,5))
model.add(Layers.BatchNormalization())

model.add(Layers.Flatten())
model.add(Layers.Dropout(rate=0.3)) 

model.add(Layers.Dense(180,activation='relu'))
model.add(Layers.Dense(100,activation='relu'))
model.add(Layers.Dense(50,activation='relu'))
model.add(Layers.Dropout(rate=0.3)) 

model.add(Layers.Dense(6,activation='softmax'))

model.compile(optimizer=Optimizer.Adam(lr=0.0001),loss='sparse_categorical_crossentropy',metrics=['accuracy'])

model.summary()

*모델 학습시키기

In [None]:
#fit() : 모델을 학습합니다. 모델이 오차로부터 매개 변수를 업데이트 시키는 과정을 학습, 
#훈련, 또는 적합(fitting)이라고 하기도 하는데, 모델이 데이터에 적합해가는 과정이기 때문입니다. 

trained = model.fit(train_images,train_labels,epochs=20,validation_split=0.30)

In [None]:
plot.plot(trained.history['accuracy'])
plot.plot(trained.history['val_accuracy'])
plot.title('Model accuracy')
plot.ylabel('Accuracy')
plot.xlabel('Epoch')
plot.legend(['Train', 'Test'], loc='upper left')
plot.show()

plot.plot(trained.history['loss'])
plot.plot(trained.history['val_loss'])
plot.title('Model loss')
plot.ylabel('Loss')
plot.xlabel('Epoch')
plot.legend(['Train', 'Test'], loc='upper left')
plot.show()

In [None]:
'''
test_images,test_labels = get_images('../input/seg_test/seg_test/')
test_images = np.array(test_images)
test_labels = np.array(test_labels)
'''

In [None]:
model.evaluate(test_images,test_labels, verbose=1)

In [None]:
pred_images,no_labels = get_images('../input/seg_pred/')
pred_images = np.array(pred_images)
pred_images.shape

In [None]:
fig = plot.figure(figsize=(30, 30))
outer = gridspec.GridSpec(5, 5, wspace=0.2, hspace=0.2)

for i in range(25):
    inner = gridspec.GridSpecFromSubplotSpec(2, 1,subplot_spec=outer[i], wspace=0.1, hspace=0.1)
    rnd_number = randint(0,len(pred_images))
    pred_image = np.array([pred_images[rnd_number]])
    pred_class = get_classlabel(model.predict_classes(pred_image)[0])
    pred_prob = model.predict(pred_image).reshape(6)
    for j in range(2):
        if (j%2) == 0:
            ax = plot.Subplot(fig, inner[j])
            ax.imshow(pred_image[0])
            ax.set_title(pred_class)
            ax.set_xticks([])
            ax.set_yticks([])
            fig.add_subplot(ax)
        else:
            ax = plot.Subplot(fig, inner[j])
            ax.bar([0,1,2,3,4,5],pred_prob)
            fig.add_subplot(ax)


fig.show()