In [1]:
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.models import Model
from tensorflow.keras import models
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing import image
import os
import math
import numpy as np
from glob import glob

In [2]:
def read_folder(folder_path) : # jpg 파일 목록 반환
    file_list_all = os.listdir(folder_path)
    file_list = [file for file in file_list_all if file.endswith('jpg')]
    return file_list

# 각 sequece에서 원하는 만큼의 이미지를 추출
def data_maker(all_images, n) :

    selected_images = []
    if n == 7 :
        selected_images = all_images

    else :
        # 7장의 이미지 중 가운데 n장만 선택
        for i in range(0, len(all_images), 7):
            
            start = i + int((7 - n) / 2)
            end = i + int((7 + n) / 2)
           
            selected_images.extend(all_images[start:end])

    images = [image.load_img(p, target_size=(224, 224)) for p in selected_images]
    vector = np.asarray([image.img_to_array(img) for img in images])

    return vector

In [3]:
# 이미지 데이터 경로

test_CA_Y_folder = read_folder("./test/image/CA_Y")
test_CA_N_folder = read_folder("./test/image/CA_N")
# test_CA_Y_folder = read_folder("./temp_test/CA_Y")
# test_CA_N_folder = read_folder("./temp_test/CA_N")

train_CA_Y_folder = read_folder("./train/image/CA_Y")
train_CA_N_folder = read_folder("./train/image/CA_N")
# train_CA_Y_folder = read_folder("./temp_train/CA_Y")
# train_CA_N_folder = read_folder("./temp_train/CA_N")

# 학대 => 1, 학대 x => 0으로  라벨링데이터 생성
# labels = ([1] * int(len(CA_Y_folder)/7)) + ([0] * int(len(CA_N_folder)/7))
# print(len(labels))
test_label = ([1] * int(len(test_CA_Y_folder)/7)) + ([0] * int(len(test_CA_N_folder)/7))
train_label = ([1] * int(len(train_CA_Y_folder)/7)) + ([0] * int(len(train_CA_N_folder)/7))
print(len(test_label))
print(len(train_label))

# 전체 이미지 데이터 불러오기
# all_images = glob('./CA_Y/*jpg') + glob('./CA_N/*jpg')


# test_images = glob('./temp_test/CA_Y/*jpg') + glob('./temp_test/CA_N/*jpg')
# train_images = glob('./temp_train/CA_Y/*jpg') + glob('./temp_train/CA_N/*jpg')
test_images = glob('./test/image/CA_Y/*jpg') + glob('./test/image/CA_N/*jpg')
train_images = glob('./train/image/CA_Y/*jpg') + glob('./train/image/CA_N/*jpg')

165
1256


In [4]:
base_model = models.load_model("model1_layer3_RMSprop_simple.keras")
model = Model(inputs=base_model.input, outputs=base_model.get_layer('flatten_1').output)

In [5]:
model.summary()

In [6]:
image_per_sequence = 1
# vector = data_maker(all_images, image_per_sequence)
# vector.shape

test_vector = data_maker(test_images, image_per_sequence)
train_vector = data_maker(train_images, image_per_sequence)

In [7]:
# 개별 이미지에서 벡터를 추출
# vectors = model.predict(preprocess_input(vector)) 

test_vectors = model.predict(preprocess_input(test_vector))
train_vectors = model.predict(preprocess_input(train_vector))

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2s/step
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 2s/step


In [8]:
# vectors.shape

print(test_vectors.shape)
print(train_vectors.shape)

(165, 25088)
(1256, 25088)


In [9]:
# concatenated_vectors = np.array([])
test_concatenated_vectors = np.array([])
train_concatenated_vectors = np.array([])

# (이미지 수, 4096) 형태의 배열을 n개씩 묶어서 (이미지수 / n, 4096 * n) 벡터를 만드는 함수
def concatenate_vectors(vectors, n):
    concatenated_vectors = []
    for i in range(0, vectors.shape[0], n):
        concatenated = vectors[i:i+n].reshape(1, -1)
        concatenated_vectors.append(concatenated)
    return np.vstack(concatenated_vectors)

# 함수 호출
# concatenated_vectors = concatenate_vectors(vectors, image_per_sequence)
test_concatenated_vectors = concatenate_vectors(test_vectors, image_per_sequence)
train_concatenated_vectors = concatenate_vectors(train_vectors, image_per_sequence)
    
# 결과 출력
# print(concatenated_vectors.shape)  
print(f"test_concatenated_vectors : {test_concatenated_vectors.shape}")  
print(f"train_concatenated_vectors : {train_concatenated_vectors.shape}")  

test_concatenated_vectors : (165, 25088)
train_concatenated_vectors : (1256, 25088)


In [10]:
# from sklearn.model_selection import train_test_split  
# X_train, X_test, y_train, y_test = train_test_split(concatenated_vectors, labels, test_size=0.2)

In [11]:
import numpy as np

test_label = np.array(test_label)
train_label = np.array(train_label)

# 랜덤한 인덱스를 생성
test_indices = np.random.permutation(len(test_concatenated_vectors))
train_indices = np.random.permutation(len(train_concatenated_vectors))

# 인덱스를 사용하여 배열을 섞음
test_concatenated_shuffled_vectors = test_concatenated_vectors[test_indices]
test_shuffled_label = test_label[test_indices]

train_concatenated_shuffled_vectors = train_concatenated_vectors[train_indices]
train_shuffled_label = train_label[train_indices]

print(train_shuffled_label)

[1 1 1 ... 1 0 1]


In [12]:
from sklearn.linear_model import LogisticRegression
lr_p1 = LogisticRegression(C=0.1, penalty='l1', solver='saga', max_iter=10000) 
lr_p1.fit(train_concatenated_shuffled_vectors, train_shuffled_label)
print(lr_p1.score(test_concatenated_shuffled_vectors, test_shuffled_label))

0.7151515151515152


In [13]:
import joblib

# 모형 저장하기
model_name = f"Model2_Seq{image_per_sequence}_Model1.joblib"
joblib.dump(lr_p1, model_name)

['Model2_Seq1_Model1.joblib']

In [14]:
from sklearn.linear_model import LogisticRegression
import numpy as np
import joblib

image_per_sequence = 3

test_vector = data_maker(test_images, image_per_sequence)
train_vector = data_maker(train_images, image_per_sequence)

# 개별 이미지에서 벡터를 추출
test_vectors = model.predict(preprocess_input(test_vector))
train_vectors = model.predict(preprocess_input(train_vector))

# concatenation
test_concatenated_vectors = np.array([])
train_concatenated_vectors = np.array([])

# (이미지 수, 4096) 형태의 배열을 n개씩 묶어서 (이미지수 / n, 4096 * n) 벡터를 만드는 함수
def concatenate_vectors(vectors, n):
    concatenated_vectors = []
    for i in range(0, vectors.shape[0], n):
        concatenated = vectors[i:i+n].reshape(1, -1)
        concatenated_vectors.append(concatenated)
    return np.vstack(concatenated_vectors)

# 함수 호출
test_concatenated_vectors = concatenate_vectors(test_vectors, image_per_sequence)
train_concatenated_vectors = concatenate_vectors(train_vectors, image_per_sequence)

# 데이터 shuffling
test_label = np.array(test_label)
train_label = np.array(train_label)

test_indices = np.random.permutation(len(test_concatenated_vectors))
train_indices = np.random.permutation(len(train_concatenated_vectors))

test_concatenated_shuffled_vectors = test_concatenated_vectors[test_indices]
test_shuffled_label = test_label[test_indices]

train_concatenated_shuffled_vectors = train_concatenated_vectors[train_indices]
train_shuffled_label = train_label[train_indices]


# 로지스틱 회귀모형 학습 
lr_p1 = LogisticRegression(C=0.1, penalty='l1', solver='saga', max_iter=10000) 
lr_p1.fit(train_concatenated_shuffled_vectors, train_shuffled_label)
print(lr_p1.score(test_concatenated_shuffled_vectors, test_shuffled_label))

# 모형 저장하기
model_name = f"Model2_Seq{image_per_sequence}_Model1.joblib"
joblib.dump(lr_p1, model_name)

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 2s/step
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m223s[0m 2s/step
0.7151515151515152


['Model2_Seq3_Model1.joblib']

In [15]:
from sklearn.linear_model import LogisticRegression
import numpy as np
import joblib

image_per_sequence = 5

test_vector = data_maker(test_images, image_per_sequence)
train_vector = data_maker(train_images, image_per_sequence)

# 개별 이미지에서 벡터를 추출
test_vectors = model.predict(preprocess_input(test_vector))
train_vectors = model.predict(preprocess_input(train_vector))

# concatenation
test_concatenated_vectors = np.array([])
train_concatenated_vectors = np.array([])

# (이미지 수, 4096) 형태의 배열을 n개씩 묶어서 (이미지수 / n, 4096 * n) 벡터를 만드는 함수
def concatenate_vectors(vectors, n):
    concatenated_vectors = []
    for i in range(0, vectors.shape[0], n):
        concatenated = vectors[i:i+n].reshape(1, -1)
        concatenated_vectors.append(concatenated)
    return np.vstack(concatenated_vectors)

# 함수 호출
test_concatenated_vectors = concatenate_vectors(test_vectors, image_per_sequence)
train_concatenated_vectors = concatenate_vectors(train_vectors, image_per_sequence)

# 데이터 shuffling
test_label = np.array(test_label)
train_label = np.array(train_label)

test_indices = np.random.permutation(len(test_concatenated_vectors))
train_indices = np.random.permutation(len(train_concatenated_vectors))

test_concatenated_shuffled_vectors = test_concatenated_vectors[test_indices]
test_shuffled_label = test_label[test_indices]

train_concatenated_shuffled_vectors = train_concatenated_vectors[train_indices]
train_shuffled_label = train_label[train_indices]


# 로지스틱 회귀모형 학습 
lr_p1 = LogisticRegression(C=0.1, penalty='l1', solver='saga', max_iter=10000) 
lr_p1.fit(train_concatenated_shuffled_vectors, train_shuffled_label)
print(lr_p1.score(test_concatenated_shuffled_vectors, test_shuffled_label))

# 모형 저장하기
model_name = f"Model2_Seq{image_per_sequence}_Model1.joblib"
joblib.dump(lr_p1, model_name)

[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m375s[0m 2s/step
0.7151515151515152


['Model2_Seq5_Model1.joblib']

In [16]:
from sklearn.linear_model import LogisticRegression
import numpy as np
import joblib

image_per_sequence = 7

test_vector = data_maker(test_images, image_per_sequence)
train_vector = data_maker(train_images, image_per_sequence)

# 개별 이미지에서 벡터를 추출
test_vectors = model.predict(preprocess_input(test_vector))
train_vectors = model.predict(preprocess_input(train_vector))

# concatenation
test_concatenated_vectors = np.array([])
train_concatenated_vectors = np.array([])

# (이미지 수, 4096) 형태의 배열을 n개씩 묶어서 (이미지수 / n, 4096 * n) 벡터를 만드는 함수
def concatenate_vectors(vectors, n):
    concatenated_vectors = []
    for i in range(0, vectors.shape[0], n):
        concatenated = vectors[i:i+n].reshape(1, -1)
        concatenated_vectors.append(concatenated)
    return np.vstack(concatenated_vectors)

# 함수 호출
test_concatenated_vectors = concatenate_vectors(test_vectors, image_per_sequence)
train_concatenated_vectors = concatenate_vectors(train_vectors, image_per_sequence)

# 데이터 shuffling
test_label = np.array(test_label)
train_label = np.array(train_label)

test_indices = np.random.permutation(len(test_concatenated_vectors))
train_indices = np.random.permutation(len(train_concatenated_vectors))

test_concatenated_shuffled_vectors = test_concatenated_vectors[test_indices]
test_shuffled_label = test_label[test_indices]

train_concatenated_shuffled_vectors = train_concatenated_vectors[train_indices]
train_shuffled_label = train_label[train_indices]


# 로지스틱 회귀모형 학습 
lr_p1 = LogisticRegression(C=0.1, penalty='l1', solver='saga', max_iter=10000) 
lr_p1.fit(train_concatenated_shuffled_vectors, train_shuffled_label)
print(lr_p1.score(test_concatenated_shuffled_vectors, test_shuffled_label))

# 모형 저장하기
model_name = f"Model2_Seq{image_per_sequence}_Model1.joblib"
joblib.dump(lr_p1, model_name)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 2s/step
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m512s[0m 2s/step
0.7212121212121212


['Model2_Seq7_Model1.joblib']