In [None]:
import sys
import subprocess
import importlib
import os
import tkinter as tk
from tkinter import filedialog

# ---------------------------------------------------------
# 1. 패키지 자동 설치 함수
# ---------------------------------------------------------
def install_package(module_name, package_name=None):
    if package_name is None:
        package_name = module_name
    try:
        importlib.import_module(module_name)
    except ImportError:
        print(f"Installing {package_name} ...")
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
            print(f"{package_name} installation completed")
        except subprocess.CalledProcessError as e:
            print(f"{package_name} installation failed (exit code {e.returncode})")
            sys.exit(1)

# 필수 패키지 확인 및 설치
print("Checking required packages...")
install_package("tensorflow")
install_package("numpy")
install_package("pandas")
install_package("tqdm")
install_package("PIL", "Pillow")
install_package("cv2", "opencv-python")
install_package("matplotlib")
install_package("sklearn", "scikit-learn")

# ---------------------------------------------------------
# 2. 라이브러리 임포트
# ---------------------------------------------------------
from tensorflow.keras.preprocessing import image as keras_image
import numpy as np
from tqdm import tqdm
from PIL import ImageFile
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from tensorflow.keras.applications import MobileNetV2 
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# ---------------------------------------------------------
# 3. 데이터 로드 및 전처리
# ---------------------------------------------------------

print("Please select the data folder window...")
root = tk.Tk()
root.withdraw() # TK 창 숨기기
dirname = filedialog.askdirectory(title='Select Data Folder to Train') # 폴더 선택 창 띄우기
root.destroy()

if not dirname:
    print("No folder selected. Exiting...")
    sys.exit()

print(f"Selected Directory: {dirname}")

# 경로 보정
if not dirname.endswith('/'):
    dirname += '/'

def image_to_tensor(img_path):
    try:
        path = os.path.join(dirname, img_path)
        # 파일 존재 여부 먼저 확인
        if not os.path.exists(path):
            print(f"File missing: {img_path}")
            return None
            
        img = keras_image.load_img(path, target_size=(120,160))
        x = keras_image.img_to_array(img)
        return np.expand_dims(x, axis=0)
    except Exception as e:
        print(f"Error loading image {img_path}: {e}")
        return None

ImageFile.LOAD_TRUNCATED_IMAGES = True

# CSV 로드
csv_path = os.path.join(dirname, "0_road_labels.csv")
if not os.path.exists(csv_path):
    print(f"Error: CSV file '{csv_path}' not found in the selected folder.")
    sys.exit()

data = pd.read_csv(csv_path)

# [중요] 후진 데이터(Label 4) 제거 로직
print(f"Original data count: {len(data)}")
data = data[data['label'] != 4] # 라벨 4가 아닌 것만 남김
print(f"Filtered data count (removed backward): {len(data)}")

# 인덱스 초기화
data = data.reset_index(drop=True)

print("Loading images and syncing labels...")

valid_tensors = []
valid_labels = []

for index, row in tqdm(data.iterrows(), total=data.shape[0]):
    img_path = row['file']
    label = row['label']
    
    tensor = image_to_tensor(img_path)
    
    if tensor is not None:
        valid_tensors.append(tensor)
        valid_labels.append(label)

# 리스트를 numpy 배열로 변환
if len(valid_tensors) > 0:
    tensors = np.vstack(valid_tensors)
    targets = np.array(valid_labels)
else:
    print("Error: No valid images loaded.")
    sys.exit()

# 확인용 파일 리스트 업데이트
files = data['file'].values[:len(targets)] 

print(f"Final loaded tensors shape: {tensors.shape}")
print(f"Final loaded targets shape: {targets.shape}")

if tensors.shape[0] != targets.shape[0]:
    print("Error: Mismatch between images and labels.")
    sys.exit()

# ---------------------------------------------------------
# 4. 데이터 시각화 (확인용)
# ---------------------------------------------------------
names = ['_0_forward', '_1_right', '_2_left', '_3_stop']

def display_images(img_path, ax):
    img = cv2.imread(os.path.join(dirname, img_path))
    ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

fig = plt.figure(figsize=(10, 3))
count = 0
for i in range(len(files)):
    if count >= 4: break
    ax = fig.add_subplot(1, 4, count + 1, xticks=[], yticks=[])
    ax.set_title(names[targets[i]], color='blue')
    display_images(files[i], ax)
    count += 1
plt.show()

# ---------------------------------------------------------
# 5. 데이터셋 분리 (Train / Test / Validation)
# ---------------------------------------------------------
tensors = tensors.reshape(-1,120,160,3)

# 정규화 (0~255 -> 0~1)
tensors = tensors.astype('float32')/255

# 원-핫 인코딩 (4개 클래스)
targets = to_categorical(targets, 4)

x_train, x_test, y_train, y_test = train_test_split(
        tensors,
        targets,
        test_size = 0.2,
        random_state = 1)

n = int(len(x_test)/2)
x_valid, y_valid = x_test[:n], y_test[:n]
x_test, y_test = x_test[n:], y_test[n:]

print(f"Train shapes: {x_train.shape}, {y_train.shape}")
print(f"Test shapes: {x_test.shape}, {y_test.shape}")
print(f"Valid shapes: {x_valid.shape}, {y_valid.shape}")

# ---------------------------------------------------------
# 6. 모델 정의 (MobileNetV2 Transfer Learning)
# ---------------------------------------------------------
print("Building MobileNetV2 Model...")

# 1. Base Model 로드 (ImageNet 가중치 사용)
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(120, 160, 3))

# 2. Base Model 가중치 고정 (Fine-tuning 전까지 학습되지 않도록 함)
base_model.trainable = False

# 3. 새로운 헤드(Head) 부착
inputs = Input(shape=(120, 160, 3))
x = base_model(inputs, training=False)
x = GlobalAveragePooling2D()(x) # 특징맵 압축
x = Dropout(0.2)(x)
x = Dense(100, activation='relu')(x)
x = Dropout(0.2)(x)
outputs = Dense(4, activation='softmax')(x) # 4개 클래스 출력

model = Model(inputs, outputs)
model.summary()

# 4. 컴파일 (전이학습이므로 학습률을 약간 낮게 설정)
model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              metrics=['accuracy'])

# ---------------------------------------------------------
# 7. 학습 실행
# ---------------------------------------------------------

# 1. 데이터 증강 설정
datagen = ImageDataGenerator(
#    rotation_range=10,      # 이미지를 10도 이내로 살짝 회전 (카메라 흔들림 대응)
#    width_shift_range=0.1,  # 좌우로 10% 이동 (위치 변화 대응)
#    height_shift_range=0.1, # 위아래로 10% 이동
    brightness_range=[0.5, 1.5], # 밝기를 50%~150%로 조절 (그림자/조명 대응 - 중요!)
#    zoom_range=0.1,         # 10% 확대/축소
#    fill_mode='nearest'     # 빈 공간 채우기 방식
)

# 2. 증강된 데이터로 학습 실행
print("Starting training with Data Augmentation...")
history = model.fit(
    datagen.flow(x_train, y_train, batch_size=32), # 여기서 데이터를 실시간으로 뻥튀기함
    validation_data=(x_valid, y_valid),
    steps_per_epoch=len(x_train) // 32, # 한 에폭당 배치를 몇 번 돌릴지 계산
    epochs=20 # 에폭 수는 필요에 따라 조절 (보통 증강하면 더 오래 학습해야 함)
)

loss = history.history['loss']
epochs = range(1, len(loss) + 1)

plt.plot(epochs, loss, 'g', label='Training loss')
plt.title('Training loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# 모델 저장
model.save("model.h5")
print("Model saved to model.h5")

# ---------------------------------------------------------
# 8. 테스트 및 검증
# ---------------------------------------------------------
print("Evaluating model...")
# 로드 시 커스텀 레이어 문제 방지를 위해, 학습된 model 객체를 그대로 사용하거나
# load_model 사용 시 주의 (여기서는 바로 학습된 객체 사용 권장)
model1 = load_model('model.h5')

y_test_predict = model1.predict(x_test)
y_test_predict = np.argmax(y_test_predict,axis=1)

# 테스트 데이터 시각화
sample_size = min(16, x_test.shape[0])
if sample_size > 0:
    fig = plt.figure(figsize=(18, 18))
    indices = np.random.choice(x_test.shape[0], size=sample_size, replace=False)
    for i, idx in enumerate(indices):
        ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[])
        ax.imshow(np.squeeze(x_test[idx]))
        pred_idx = y_test_predict[idx]
        true_idx = np.argmax(y_test[idx])
        ax.set_title("{} ({})".format(names[pred_idx], names[true_idx]),
            color=("#4876ff" if pred_idx == true_idx else "darkred"))
    plt.show()
else:
    print("Not enough test data to visualize.")