[참고] : https://towardsdatascience.com/image-captioning-with-keras-teaching-computers-to-describe-pictures-c88a46a311b8

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install visual-attention-tf

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting visual-attention-tf
  Downloading visual_attention_tf-1.2.0-py3-none-any.whl (5.4 kB)
Installing collected packages: visual-attention-tf
Successfully installed visual-attention-tf-1.2.0


In [None]:
import numpy as np
from numpy import array
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import os
from PIL import Image
from keras.layers import Dense, Activation, Flatten, Reshape, Dropout, Conv2D, Conv1D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam, RMSprop
from keras.layers.merge import add
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.utils import img_to_array, custom_object_scope
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import keras
from keras.models import Model
from keras import Input, layers
from tqdm import tqdm
import tensorflow as tf


In [None]:
class EfficientChannelAttention2D(tf.keras.layers.Layer):

    def __init__(self, nf=32, **kwargs):
        super(EfficientChannelAttention2D, self).__init__(**kwargs)
        self.nf = nf
        self.conv1 = Conv1D(filters=1, kernel_size=3, activation=None,padding="same", use_bias=False)

    @tf.function
    def call(self, x):
        pool = tf.reduce_mean(x,[1,2])
        pool = tf.expand_dims(pool,-1)
        att = self.conv1(pool) #set k=3 for every channel size between 8 and 64
        att = tf.transpose(att,perm=[0,2,1])
        att = tf.expand_dims(att,1)
        att = tf.sigmoid(att)
        y = tf.multiply(x,att)
        return y

    def get_config(self):
        config = super(EfficientChannelAttention2D, self).get_config()
        config.update({"Att_filters": self.nf})
        config = super(EfficientChannelAttention2D, self).get_config()
        return config

In [None]:
# 압축파일 압축해제 코드
from zipfile import ZipFile
with ZipFile('/content/drive/MyDrive/5조/데이터/음식.zip', 'r') as zip:
    zip.extractall('./temp1')

with ZipFile('/content/drive/MyDrive/5조/데이터/반려동물_완.zip', 'r') as zip:
    zip.extractall('./temp2')

with ZipFile('/content/drive/MyDrive/5조/데이터/육아.zip', 'r') as zip:
    zip.extractall('./temp3')

with ZipFile('/content/drive/MyDrive/5조/데이터/디저트.zip', 'r') as zip:
    zip.extractall('./temp4')

with ZipFile('/content/drive/MyDrive/5조/데이터/네일아트_완.zip', 'r') as zip:
    zip.extractall('./temp5')

with ZipFile('/content/drive/MyDrive/5조/데이터/옷스타그램 (재업).zip', 'r') as zip:
    zip.extractall('./temp6')


In [None]:
df1=pd.read_csv('/content/drive/MyDrive/5조/데이터/csv/food_info.csv', encoding='cp949') # id가 1000001~1003048
column=df1.columns

df2=pd.read_csv('/content/drive/MyDrive/5조/데이터/csv/반려동물_info.csv')  # id가 2007781~2012346
df2.columns=column

df3=pd.read_csv('/content/drive/MyDrive/5조/데이터/csv/육아.csv')   # id가 39000~310159
df3=df3[['이미지id', 'url', '음식', '음료수', '성인', '아동', '동물', '네일', '꽃', '기타', '패션']]

df4=pd.read_csv('/content/drive/MyDrive/5조/데이터/csv/디저트.csv', encoding='cp949')   # id가 6028883~6035562
df4.dropna(inplace=True)
df4=df4[['이미지id', 'url', '음식', '음료수', '성인', '아동', '동물', '네일', '꽃', '기타', '패션']]

df5=pd.read_csv('/content/drive/MyDrive/5조/데이터/csv/labeled/네일아트_id_url_src_labeld_0707.csv', encoding='cp949')   # id가 6000000~6004522
df5.columns= ['이미지id', 'url', 'src', '음식', '음료수', '성인', '아동', '동물', '네일', '꽃', '기타', '패션']
df5=df5[['이미지id', 'url', '음식', '음료수', '성인', '아동', '동물', '네일', '꽃', '기타', '패션']]

df6=pd.read_csv('/content/drive/MyDrive/5조/데이터/csv/labeled/옷스타그램_id_url_src_0707.csv', encoding='cp949')   # id가 6007781~6028882
df6=df6[['이미지id', 'url', '음식', '음료수', '성인', '아동', '동물', '네일', '꽃', '기타', '패션']]

df_total=pd.concat([df1, df2, df3, df4, df5, df6])
df_total.reset_index(drop=True, inplace=True)
df_total.dropna(inplace=True)
df_total

Unnamed: 0,이미지id,url,음식,음료수,성인,아동,동물,네일,꽃,기타,패션
0,1000001.jpg,https://www.instagram.com/p/CfP6mTqhoHJ/,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1000002.jpg,https://www.instagram.com/p/CfTNdZCu1SV/,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1000003.jpg,https://www.instagram.com/p/CfX4ZwIFUp_/,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1000004.jpg,https://www.instagram.com/p/CfWMXQpl5tI/,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1000005.jpg,https://www.instagram.com/p/CfN05YRP4rC/,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
37820,6028878.jpg,/p/CfOQ3G5PK81/,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
37821,6028879.jpg,/p/CfLkzONhdVh/,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
37822,6028880.jpg,/p/CfB6RLDvO77/,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
37823,6028881.jpg,/p/Ce3mdxAvc1l/,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# 이미지의 벡터화를 진행하는 코드

# Load the inception v3 model
model = InceptionV3(weights='imagenet')

# Create a new model, by removing the last layer (output layer) from the inception v3
model_new = Model(model.input, model.layers[-3].output)

# Function to encode a given image into a vector of size (8, 8, 2048)
def encode(image_path):
    img = keras.preprocessing.image.load_img(path, target_size=(299, 299))
    # Convert image to numpy array of 3-dimensions
    x = img_to_array(img)
    # Add one more dimension
    x = np.expand_dims(x, axis=0)
    # preprocess the images using preprocess_input() from inception module
    x = preprocess_input(x)
    fea_vec = model_new.predict(x) # Get the encoding vector for the image

    return fea_vec   # 이 데이터가 input data로 사용됨

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels.h5


In [None]:
image_path1='/content/temp1/'
image_path2='/content/temp2/╣▌╖┴╡┐╣░/'
image_path3='/content/temp3/└░╛╞/'
image_path4='/content/temp4/'
image_path5='/content/temp5/'
image_path6='/content/temp6/'
data=[]
for image in tqdm( df_total['이미지id'].values ):
  if int(image[0])==1:
    path=image_path1+image
    data.append( np.array(encode(path)).reshape(64,2048) )
  elif int(image[0])==2:
    path=image_path2+image
    data.append( np.array(encode(path)).reshape(64,2048) )
  elif int(image[0])==3:
    path=image_path3+image
    data.append( np.array(encode(path)).reshape(64,2048) )
  elif int(image[0])==6:
    if int(image.split('.')[0])<=6035562 and int(image.split('.')[0])>= 6028883:
      path=image_path4+image
      data.append( np.array(encode(path)).reshape(64,2048) )
    elif int(image.split('.')[0])<=6004522 and int(image.split('.')[0])>= 6000000:
      path=image_path5+image
      data.append( np.array(encode(path)).reshape(64,2048) )
    elif int(image.split('.')[0])<=6028882 and int(image.split('.')[0])>= 6007781:
      path=image_path6+image
      data.append( np.array(encode(path)).reshape(64,2048) )

data=np.array(data)
data.shape

100%|██████████| 37823/37823 [1:06:34<00:00,  9.47it/s]


(37823, 64, 2048)

In [None]:
Y=df_total.iloc[:, 2:].values

In [None]:
# train_test_split을 이용해 데이터를 분할
X_train, X_test, Y_train, Y_test=train_test_split(
    data, Y, test_size=0.2, random_state=2022
)

In [None]:
# image feature extractor model
inputs1 = Input(shape=(64, 2048, 1))
cnn_layer1 = Conv2D(32, kernel_size=(3,3), input_shape=(64, 2048, 1), activation='relu', padding='same')(inputs1)
cnn_layer2 = Conv2D(32, kernel_size=(3,3), input_shape=(64, 2048, 1), activation='relu', padding='same')(cnn_layer1)
# Using the .shape[-1] to simplify network modifications. Can directly input number of channels as well
attention_cnn = EfficientChannelAttention2D(cnn_layer2.shape[-1])(cnn_layer2)
drop=Dropout(0.25)(attention_cnn)
pool = GlobalAveragePooling2D()(drop)
d1=Dense(128, activation='relu')(pool)
d2=Dense(256, activation='relu')(d1)
d3=Dense(256, activation='relu')(d2)
d4=Dense(512, activation='relu')(d2)
outputs = Dense(9, activation='sigmoid')(d4)   # multi label classification 모델이므로 activation이 sigmoid가 되어야 함

model = Model(inputs=inputs1, outputs=outputs)


In [None]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 64, 2048, 1)]     0         
                                                                 
 conv2d_94 (Conv2D)          (None, 64, 2048, 32)      320       
                                                                 
 conv2d_95 (Conv2D)          (None, 64, 2048, 32)      9248      
                                                                 
 efficient_channel_attention  (None, 64, 2048, 32)     3         
 2d (EfficientChannelAttenti                                     
 on2D)                                                           
                                                                 
 dropout (Dropout)           (None, 64, 2048, 32)      0         
                                                                 
 global_average_pooling2d (G  (None, 32)               0   

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_path='best_model.h5'
mc=ModelCheckpoint(model_path, save_best_only=True, verbose=1, save_weights_only=False)
es=EarlyStopping(patience=10)

In [None]:
hist=model.fit(
    X_train, Y_train, validation_split=0.2, epochs=100, callbacks=[mc,es],  batch_size=80
)

In [None]:
best_model=load_model('best_model.h5' , custom_objects={'EfficientChannelAttention2D' : EfficientChannelAttention2D} )
label=best_model.predict(X_test)

y_pred=[]
for sample in label:
  y_pred.append([1 if i>=0.5 else 0 for i in sample ] )
y_pred = np.array(y_pred)
print(accuracy_score(Y_test, y_pred))

real_df=pd.DataFrame(Y_test)
pred_df=pd.DataFrame(y_pred)

In [None]:
# 음식에 대한 모델의 예측력
from sklearn.metrics import accuracy_score
accuracy_score(real_df[0], pred_df[0])

In [None]:
# 반려동물에 대한 모델의 예측력
from sklearn.metrics import accuracy_score
accuracy_score(real_df[4], pred_df[4])

In [None]:
# 아동에 대한 모델의 예측력
from sklearn.metrics import accuracy_score
accuracy_score(real_df[3], pred_df[3])

In [None]:
# 성인에 대한 모델의 예측력
from sklearn.metrics import accuracy_score
accuracy_score(real_df[2], pred_df[2])