# 빅데이터및AI (1324201-01) 실습  

## YOLOv8을 활용한 전방차량 및 브레이크 등 상태 탐지  
#### 객체 탐지(Object Detection) 목적으로 학습된 YOLOv8 모델을 활용하여 전방차량 및 해당 차량의 브레이크 등 상태를 탐지하는 인공지능 모델 학습 실습  
#### [**참고 영상**](https://www.dropbox.com/s/l91e0fjazbww4af/brake_light_detection_demo.mp4)


## 데이터셋 다운로드  

#### 공개 데이터셋 사이트 방문, (회원가입 및 )로그인 후 조교의 안내에 따라 아래 코드를 완성하여 데이터셋 다운로드 가능  
#### [**공개 데이터셋 링크**](https://universe.roboflow.com/kookmin-university-glfyz/state-of-vehicle-tail-lamp-detection)


In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="0f7yCIzBZ72NY1PgdahK")
project = rf.workspace("kookmin-university-glfyz").project("state-of-vehicle-tail-lamp-detection")
dataset = project.version(2).download("yolov8")

In [None]:
# 다운로드된 데이터셋 위치 이동
!mkdir datasets     # datasets 폴더 생성
!mv ./State-of-vehicle-tail-lamp-detection-2/ ./datasets/State-of-vehicle-tail-lamp-detection-2/    # 공개데이터셋 이동
!mv ./datasets/State-of-vehicle-tail-lamp-detection-2/data.yaml ./data.yaml             # 데이터셋 설정 파일 이동
!wget -O data.yaml https://www.dropbox.com/scl/fi/70tikgw5bzmlhpytz22mx/data.yaml?rlkey=v0w68v5dcu7i5ecoskwufa0kd&dl=0  # Train용 데이터셋 설정 파일 다운로드
!wget -O data_test.yaml https://www.dropbox.com/scl/fi/xyd8hyduo7ixbh7i8gys0/data_test.yaml?rlkey=rzsp5diditahj3rx3udcmq3d5&dl=0 # Test용 데이터셋 설정 파일 다운로드

## 탐색적 자료 분석(EDA)

In [None]:
import os
import numpy as np

# Check current path position
cur_path = os.getcwd()
print(cur_path)

# Check folder and file list under current path position
print(os.listdir(cur_path))

### yaml 파일 확인

In [None]:
import yaml

yaml_path = os.path.join(cur_path, 'data.yaml')     # Train, Validation file
with open(yaml_path) as f:
    data_info = yaml.load(f, Loader=yaml.FullLoader)

for key, item in data_info.items():
    print(key, ':', item)

In [None]:
test_yaml_path = os.path.join(cur_path, 'data_test.yaml')   # Test file

with open(test_yaml_path) as f:
    test_data_info = yaml.load(f, Loader=yaml.FullLoader)

for key, item in test_data_info.items():
    print(key, ':', item)

### 입,출력 데이터 확인

In [None]:
# Define the base position of data
DATA_DIR = os.path.join(cur_path, 'datasets', 'State-of-vehicle-tail-lamp-detection-2')
print(os.listdir(DATA_DIR))

In [None]:
# Define the position of train, validation, and test data
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
VAL_DIR = os.path.join(DATA_DIR, 'valid')
TEST_DIR = os.path.join(DATA_DIR, 'test')

print("Train Folder: ", os.listdir(TRAIN_DIR))
print("Vaidation Folder: ", os.listdir(VAL_DIR))
print("Test Folder: ", os.listdir(TEST_DIR))

In [None]:
# Check the train image
TRAIN_IMG_DIR = os.path.join(TRAIN_DIR, 'images')
os.listdir(TRAIN_IMG_DIR)[:2]

In [None]:
from IPython import display     # blackbox image

# Show the train image
# img_file = os.listdir(TRAIN_IMG_DIR)[0]
sample_img_file = 'qyDn2S51_jpg.rf.fe33a97cd3c8cda330bc13db135f84da.jpg'
img_file_path = os.path.join(TRAIN_IMG_DIR, sample_img_file)
print(img_file_path)
display.Image(os.path.join(TRAIN_IMG_DIR, sample_img_file), width=500)

In [None]:
# Check the train label
TRAIN_LB_DIR = os.path.join(TRAIN_DIR, 'labels')
os.listdir(TRAIN_LB_DIR)[:2]

In [None]:
# Show the train label
file_name = os.path.splitext(sample_img_file)[0]
label_file = file_name+'.txt'
label_file_path = os.path.join(TRAIN_LB_DIR, label_file)
print(label_file_path)

In [None]:
with open(label_file_path, 'r') as f:
    contents = f.readlines()
contents
# (label에서의 class) (0 : brake off, 1 : brake on), (bounding box의 중심점(x, y), box의 size(h, w)) -> 이미지의 해상도 비율에 맞게

In [None]:
def get_label_info(contents):
    '''label(.txt) 정보를 한줄 씩 읽어온 리스트를 입력받아 상세 정보를 분할하여 출력하는 함수
    입력: contents (list), label(.txt) 파일을 readlines를 통해 얻은 리스트 형태의 정보
    출력: info (list), contents 각 줄에 담겨있는 정보를 (class, x, y, w, h)의 튜플형태로 분할하여 순차적으로 축적시킨 리스트
    '''

    # tuple이나 array로 불러서 담아옴
    info = []
    ##### ▽ 코드 작성 ▽ #####
    info = contents
    for i, data in enumerate(contents):
        class_data = tuple(map(int, data[0]))
        feature_data = tuple(map(float, data[1:].split()))
        info[i] = class_data + feature_data
    ##### △ 코드 작성 △ #####
    return info

In [None]:
print("(Class, x_corrdinate, y_corrdinate, width, height)", end='\n\n')

get_label_info(contents)

### **Reporting Point**
* Train, Validation, Test Image 개수
* Train **vs** Validation & Test Image 차이점 기술
* Train, Validation, Test Label 개수
* Train, Validation, Test Label 중 Class 개수
* (Optional) EDA를 통해 얻은 직관 및 데이터셋의 대한 고찰

## Label 시각화

#### **Reporting Point**
* Image 위에 Label 정보 시각화
* Class 별 서로 다른 색상으로 정확한 Bounding Box 그리기
* 최소 3장 이상의 그림을 레포트에 포함시킬 것
* 필요 시 아래 `draw_bbox_train` 함수 참조

In [None]:
import cv2
from google.colab.patches import cv2_imshow

def draw_bbox_train(img_file):
    '''이미지 파일명을 입력 받아 해당 이미지의 label(.txt) 정보를 바운딩박스 형태로 이미지 위에 시각화하여 보여주는 함수
    클래스 별 바운딩박스의 태두리 색상을 다르게 할 것
    입력: img_file (str), 이미지 파일명 (확장자 포함)
    출력: None
    '''

    ##### ▽ 코드 작성 ▽ #####
    colors = [(0, 255, 255), (255, 255, 0)]
    tickness = 2 # 박스 선 굵기

    img_path = os.path.join(TRAIN_IMG_DIR, img_file)
    img = cv2.imread(img_path)
    img_width, img_height = img.shape[:2]

    file_name = os.path.splitext(img_file)[0]
    label_file = file_name + '.txt'
    label_file_path = os.path.join(TRAIN_LB_DIR, label_file)

    with open(label_file_path, 'r') as f:
        contents = f.readlines()
        label_info = get_label_info(contents)

    for box in label_info:
        class_idx = int(box[0])
        x, y, w, h = map(float, box[1:])
        print(box[1:])
        left = int(img_width * (x-(w/2)))
        top = int(img_height * (y-(h/2)))
        width = int(img_width * w)
        height = int(img_height * h)

        cv2.rectangle(img, (left, top), (left + width, top + height), colors[class_idx], tickness)

    cv2_imshow(img)
    ##### △ 코드 작성 △ #####


draw_bbox_train(sample_img_file)

In [None]:
import random
random_img_file = random.choice(os.listdir(TRAIN_IMG_DIR))
draw_bbox_train(random_img_file)

## 인공신경망 학습
### 저장경로 설정

In [None]:
# 학습결과 저장을 위해 구글 드라이브 연동 (본인 구글 드라이브에 저장됨)
from google.colab import drive
drive.mount('/content/drive')

In [None]:
print(os.getcwd())  # 현재위치 확인

In [None]:
BASE_PATH = os.path.join(os.getcwd(), 'drive', 'MyDrive', 'bigdata-ai-2023')

if not os.path.isdir(BASE_PATH):
    os.makedirs(BASE_PATH)  # 기본폴더가 없으면 생성

%cd drive/MyDrive/bigdata-ai-2023

### YOLOv8  
[YOLOv8 상세 문서](https://docs.ultralytics.com/)


In [None]:
!pip install -q ultralytics
import ultralytics
ultralytics.checks()

In [None]:
# YOLOv8 객체탐지 예시
!yolo predict model=yolov8n.pt source='https://ultralytics.com/images/zidane.jpg'

In [None]:
# Install and update Ultralytics and Ray Tune packages
!pip install -U ultralytics "ray[tune]"

# Optionally install W&B for logging
!pip install wandb
!wandb login

### 전이학습


In [None]:
from ultralytics import YOLO
from ray import tune
# Load a pretrained model
model = YOLO('yolov8n.pt')

# Train the model
## you can customize the hyperparameters...
num_epoch = 50
num_patience = 50
input_size = 320
batch_size = 64


result_grid = model.tune(data='/content/data.yaml',
                         optimizer='Adam',
                         iterators=num_patience,
                         epochs=50,
                         use_ray=True)




# Load a pretrained model # weight를 입힘
# model = YOLO('/content/drive/MyDrive/bigdata-ai-2023/runs/detect/train_epoch_50_256/weights/best.pt')

# # Train the model
# ## you can customize the hyperparameters...
# num_epoch = 50
# num_patience = 50   # epoch
# input_size = 320
# batch_size = 64

# results = model.train(data="/content/data.yaml",
#                       epochs=num_epoch,
#                       patience=num_patience,
#                       optimizer='RAdam',
#                       imgsz=input_size,
#                       batch=batch_size,
#                       freeze=10,
#                       lr0=1E-3
#                       )
# train_args={"epochs": 20, "batch": 2}
# trainable_with_cpu_gpu = tune.with_resources(trainable, {"cpu": 2, "gpu": 1})

In [None]:
from ultralytics import YOLO

# Load a pretrained model
model = YOLO('yolov8n.pt')

# Train the model
## you can customize the hyperparameters...
num_epoch = 50
num_patience = 20
input_size = 640
batch_size = 16
lr = 1E-3

                      # lr0=0.0724,
                      # lrf=0.0307,
                      # momentum=0.8757,
                      # warmup_epochs=4.7728,
                      # warmup_momentum=0.7052,
                      # weight_decay=0.0005816,
                      # seed=999

results = model.train(data="/content/data.yaml",
                      epochs=num_epoch,
                      patience=num_patience,
                      optimizer='SGD',
                      imgsz=input_size,
                      batch=batch_size,
                      lr0=lr,
                      lrf=lr
                      )
#freeze=10

In [None]:
import matplotlib.pyplot as plt
from ultralytics import YOLO

# pts = {'SGD':'./runs/detect/Optimizer_change/train_default_SGD/weights/best.pt',
#        'Adam':'./runs/detect/Optimizer_change/train_default_Adam/weights/best.pt',
#        'Adamax':'./runs/detect/Optimizer_change/train_default_Adamax/weights/best.pt',
#        'AdamW':'./runs/detect/Optimizer_change/train_default_AdamW/weights/best.pt',
#        'NAdam':'./runs/detect/Optimizer_change/train_default_NAdam/weights/best.pt',
#        'RAdam':'./runs/detect/Optimizer_change/train_default_RAdam/weights/best.pt',
#        'RMSProp':'./runs/detect/Optimizer_change/train_default_RMSProp/weights/best.pt'}

# pts = {'SGD':'./runs/detect/Default_change/train_default_SGD/weights/best.pt',
#        'Adam':'./runs/detect/Default_change/train_default_Adam/weights/best.pt',
#        'Adamax':'./runs/detect/Default_change/train_default_Adamax/weights/best.pt',
#        'AdamW':'./runs/detect/Default_change/train_default_AdamW/weights/best.pt',
#        'NAdam':'./runs/detect/Default_change/train_default_NAdam/weights/best.pt',
#        'RAdam':'./runs/detect/Default_change/train_default_RAdam/weights/best.pt',
#        'RMSProp':'./runs/detect/Default_change/train_default_RMSProp/weights/best.pt'}


pts = {'train_16_01_001':'./runs/detect/Adamax_fine_tune/train_16_01_001/weights/best.pt',
       'train_16_0001_001':'./runs/detect/Adamax_fine_tune/train_16_0001_001/weights/best.pt',
       'train_16_0001_0001':'./runs/detect/Adamax_fine_tune/train_16_0001_0001/weights/best.pt',
       'train_16_001_0001':'./runs/detect/Adamax_fine_tune/train_16_001_0001/weights/best.pt',
       'train_16_01_01':'./runs/detect/Adamax_fine_tune/train_16_01_01/weights/best.pt',
       'train_16_001_001':'./runs/detect/Adamax_fine_tune/train_16_001_001/weights/best.pt',
       'train_16_01_0001':'./runs/detect/Adamax_fine_tune/train_16_01_0001/weights/best.pt',
       'train_32_100_001':'./runs/detect/Adamax_fine_tune/train_32_100_001/weights/best.pt',
       'train_32_300_001':'./runs/detect/Adamax_fine_tune/train_32_300_001/weights/best.pt',
       'train_0002_09' : './runs/detect/Adamax_fine_tune/train_0002_09/weights/best.pt'}

val_map50_ls = [0. for i in range(len(pts))]
val_total_time_ls = [0. for i in range(len(pts))]

map50_ls = [0. for i in range(len(pts))]
total_time_ls = [0. for i in range(len(pts))]
key_ls = [0. for i in range(len(pts))]

i = 0
for key, ptpath in pts.items():
  val_model = YOLO(f"{ptpath}")  # load a custom model
  val_metric = val_model.val(data="/content/data.yaml") # Validate the model

  metric = val_model.val(data="/content/data_test.yaml")

  print(f"==============================<Validation : {key}>==============================")
  print(f"mAP50: {val_metric.box.map50}")
  print(f"preprocess time: {val_metric.speed['preprocess']}")
  print(f"inference time: {val_metric.speed['inference']}")
  print(f"postprocess time: {val_metric.speed['postprocess']}")

  print(f"==============================<Test : {key}>==============================")
  print(f"mAP50: {metric.box.map50}")
  print(f"preprocess time: {metric.speed['preprocess']}")
  print(f"inference time: {metric.speed['inference']}")
  print(f"postprocess time: {metric.speed['postprocess']}")
  print(f"==========================================================================")

  val_map50_ls[i] = val_metric.box.map50
  val_total_time_ls[i] = val_metric.speed['preprocess'] + val_metric.speed['inference'] + val_metric.speed['postprocess']

  map50_ls[i] = metric.box.map50
  total_time_ls[i] = metric.speed['preprocess'] + metric.speed['inference'] + metric.speed['postprocess']
  key_ls[i] = key
  i += 1

plt.figure(figsize=(20,5))
plt.subplot(1, 2, 1)
for j in range(len(map50_ls)):
  plt.plot(total_time_ls[j], map50_ls[j], 'o', label=key_ls[j])
  #plt.text(total_time_ls[j], map50_ls[j], key_ls[j])

plt.title('Accuracy-Speed tradeoff(val)');
plt.xlabel('total_time'); plt.ylabel('map_50'); plt.grid(); plt.legend()
plt.show()

plt.figure(figsize=(20,5))
plt.subplot(1, 2, 2)
for j in range(len(val_map50_ls)):
  plt.plot(val_total_time_ls[j], val_map50_ls[j], 'o', label=key_ls[j])
  #plt.text(val_total_time_ls[j], val_map50_ls[j], key_ls[j])

plt.title('Accuracy-Speed tradeoff(test)');
plt.xlabel('total_time'); plt.ylabel('map_50'); plt.grid(); plt.legend()
plt.show()

In [None]:
test_metrics = model.val(data="/content/data_test.yaml")  # no arguments needed, dataset and settings remembered

In [None]:
print(f"mAP50: {test_metrics.box.map50}")
print(f"preprocess time: {test_metrics.speed['preprocess']}")
print(f"inference time: {test_metrics.speed['inference']}")
print(f"postprocess time: {test_metrics.speed['postprocess']}")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### **Reporting Point**
* mAP50에 대하여 기술
* 최소 3개 이상의 서로 다른 모델 학습
* 3개 이상의 학습 모델의 Accuracy - Speed 트레이드오프 그래프 그리기
