In [1]:
import random
import os
import shutil
import cv2
import glob
import torch
import torchvision
import numpy as np
import json
import yaml
import time
from tqdm import tqdm
from PIL import Image
from torchvision import datasets, models, transforms

In [2]:
from ultralytics import YOLO

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
data_root = "D:\\human_fall\\dataset"

train_origin_root = 'D:\\human_fall\\train_origin'
train_resize_images = f'{data_root}\\train\\images'
#labels_root = f'{data_root}\\train\\labels'

valid_origin_root = 'D:\\human_fall\\valid_origin'
valid_resize_images = f'{data_root}\\val\\images'
#valid_labels_root = f'{data_root}\\valid\\labels'


In [6]:
# data resize/padding

train_root = 'D:\\human_fall\\dataset\\train\\images'
val_root = 'D:\\human_fall\\dataset\\val\\images'
test_root = 'D:\\human_fall\\dataset\\test\\images'
# Create directories if not exist
#os.makedirs(train_resize_images, exist_ok=True)
#os.makedirs(valid_resize_images, exist_ok=True)

# Helper function to resize and pad images
def letterbox_image(image, target_size=(640, 640)):
    # 원본 이미지 크기
    h, w = image.shape[:2]
    target_w, target_h = target_size

    # 비율 계산
    scale = min(target_w / w, target_h / h)

    # 새로운 이미지 크기
    new_w = int(w * scale)
    new_h = int(h * scale)

    # 이미지 리사이즈
    resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)

    # 패딩 적용
    pad_w = (target_w - new_w) // 2
    pad_h = (target_h - new_h) // 2

    # 이미지를 타겟 크기로 채워서 새로운 이미지를 만듦
    padded_image = cv2.copyMakeBorder(resized_image, pad_h, target_h - new_h - pad_h, pad_w, target_w - new_w - pad_w,
                                      cv2.BORDER_CONSTANT, value=[128, 128, 128])

    return padded_image

# Process Non-Fall Data
for file in os.listdir(test_root):
    file_path = os.path.join(test_root, file)
    image = cv2.imread(file_path)
    resized_image = letterbox_image(image)
    output_path = os.path.join(test_root, file)
    cv2.imwrite(output_path, resized_image)

# Process Fall Data
#for file in os.listdir(fall_image_path):
    #file_path = os.path.join(fall_image_path, file)
    #image = cv2.imread(file_path)
    #resized_image = letterbox_image(image)
    #output_path = os.path.join(output_fall_image_path, file)
    #cv2.imwrite(output_path, resized_image)

In [7]:
train_root = 'D:\\human_fall\\dataset\\train\\images'
val_root = 'D:\\human_fall\\dataset\\val\\images'
train_labels = 'D:\\human_fall\\dataset\\train\\labels'
val_labels = 'D:\\human_fall\\dataset\\val\\labels'
test_root = 'D:\\human_fall\\dataset\\test\\images'
test_labels = 'D:\\human_fall\\dataset\\test\\labels'

def update_labels(label_path, original_size, new_size, scale, pad_w, pad_h):
    with open(label_path, 'r') as f:
        lines = f.readlines()
    
    new_lines = []
    for line in lines:
        class_id, x, y, w, h = map(float, line.strip().split())
        
        # 원본 이미지에서의 픽셀 좌표로 변환
        x_pixel = x * original_size[0]
        y_pixel = y * original_size[1]
        w_pixel = w * original_size[0]
        h_pixel = h * original_size[1]
        
        # 새 이미지에서의 픽셀 좌표로 변환
        new_x_pixel = x_pixel * scale + pad_w
        new_y_pixel = y_pixel * scale + pad_h
        new_w_pixel = w_pixel * scale
        new_h_pixel = h_pixel * scale
        
        # 새 이미지에서의 상대 좌표로 변환
        new_x = new_x_pixel / new_size[0]
        new_y = new_y_pixel / new_size[1]
        new_w = new_w_pixel / new_size[0]
        new_h = new_h_pixel / new_size[1]
        
        new_lines.append(f"{int(class_id)} {new_x:.6f} {new_y:.6f} {new_w:.6f} {new_h:.6f}\n")
    
    with open(label_path, 'w') as f:
        f.writelines(new_lines)

# 원본 이미지 크기와 새 이미지 크기
original_size = (3840, 2160)
new_size = (640, 640)

# 스케일 계산
scale = min(new_size[0] / original_size[0], new_size[1] / original_size[1])

# 패딩 계산
new_w = int(original_size[0] * scale)
new_h = int(original_size[1] * scale)
pad_w = (new_size[0] - new_w) // 2
pad_h = (new_size[1] - new_h) // 2

# 라벨 파일 업데이트
for file in os.listdir(test_labels):
    if file.endswith('.txt'):
        label_path = os.path.join(test_labels, file)
        update_labels(label_path, original_size, new_size, scale, pad_w, pad_h)

In [None]:

# Helper function to get all patient directories
def get_patient_dirs(path):
    patient_dirs = {}
    for item in os.listdir(path):
        patient_id = '_'.join(item.split('_')[:4])  # Get unique identifier
        if patient_id not in patient_dirs:
            patient_dirs[patient_id] = []
        patient_dirs[patient_id].append(os.path.join(path, item))
    return list(patient_dirs.values())

# Process Non-Fall Data
non_fall_patients_video = get_patient_dirs(non_fall_video_path)
non_fall_patients_image = get_patient_dirs(non_fall_image_path)

for patient_files in non_fall_patients_video:
    for file in patient_files:
        shutil.copy(file, output_nonfall_video_path)

for patient_files in non_fall_patients_image:
    for file in patient_files:
        shutil.copy(file, output_nonfall_image_path)

# Process Fall Data
fall_patients_video = []
fall_patients_image = []
for path in fall_video_paths:
    fall_patients_video.extend(get_patient_dirs(path))
for path in fall_image_paths:
    fall_patients_image.extend(get_patient_dirs(path))

selected_patients = random.sample(fall_patients_video, len(fall_patients_video) // 3)

for patient_files in selected_patients:
    for file in patient_files:
        shutil.copy(file, output_fall_video_path)

# Ensure image selection matches selected patients
selected_patient_ids = {'_'.join(files[0].split('_')[:4]) for files in selected_patients}

for patient_files in fall_patients_image:
    patient_id = '_'.join(patient_files[0].split('_')[:4])
    if patient_id in selected_patient_ids:
        for file in patient_files:
            shutil.copy(file, output_fall_image_path)

In [10]:
# json 파일에 있는 bbox 정보를 yolo 형식으로
# class_name x_center y_center w h
train_json = 'D:\\human_fall\\dataset\\train\\labels'
val_json = 'D:\\human_fall\\dataset\\val\\labels'
test_json = 'D:\\human_fall\\dataset\\test\\labels'

def convert_bbox(bbox, img_width, img_height):
    x1, y1, x2, y2 = map(float, bbox.split(','))
    
    width = (x2 - x1) / img_width
    height = (y2 - y1) / img_height
    center_x = (x1 + x2) / (2 * img_width)
    center_y = (y1 + y2) / (2 * img_height)
    
    return center_x, center_y, width, height

def determine_class(file_name):
    file_name_lower = file_name.lower()
    if 'by' in file_name_lower or 'sy' in file_name_lower or 'fy' in file_name_lower:
        return 1
    else:
        return 0

def process_json(json_path, output_dir, new_width=640, new_height=640):
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    orig_width, orig_height = map(int, data['metadata']['scene_res'].split('x'))
    
    scale = min(new_width / orig_width, new_height / orig_height)
    
    scaled_width = int(orig_width * scale)
    scaled_height = int(orig_height * scale)
    
    pad_x = (new_width - scaled_width) // 2
    pad_y = (new_height - scaled_height) // 2
    
    bbox = data['bboxdata']['bbox_location']
    x, y, w, h = convert_bbox(bbox, orig_width, orig_height)
    
    x = (x * scaled_width + pad_x) / new_width
    y = (y * scaled_height + pad_y) / new_height
    w = (w * scaled_width) / new_width
    h = (h * scaled_height) / new_height
    
    class_id = determine_class(data['metadata']['file_name'])
    
    result = f"{class_id} {x:.6f} {y:.6f} {w:.6f} {h:.6f}"
    
    output_filename = os.path.splitext(data['metadata']['file_name'])[0] + '.txt'
    output_path = os.path.join(output_dir, output_filename)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(result)

def process_all_json_in_folder(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for filename in os.listdir(input_folder):
        if filename.endswith('.json'):
            json_path = os.path.join(input_folder, filename)
            process_json(json_path, output_folder)
    
    print(f"Processed all JSON files in {input_folder}")

# 각 데이터셋 처리
datasets = [
    ('train', train_json),
    ('val', val_json),
    ('test', test_json)
]

for dataset_name, json_folder in datasets:
    output_folder = os.path.join(os.path.dirname(json_folder), 'labels_yolo')
    print(f"Processing {dataset_name} dataset...")
    process_all_json_in_folder(json_folder, output_folder)

print("All datasets processed.")

Processing train dataset...
Processed all JSON files in D:\human_fall\dataset\train\labels
Processing val dataset...
Processed all JSON files in D:\human_fall\dataset\val\labels
Processing test dataset...
Processed all JSON files in D:\human_fall\dataset\test\labels
All datasets processed.


In [15]:
train_root = "D:\\human_fall\\dataset\\train\\images"
train_label = "D:\\human_fall\\dataset\\train\\labels"

val_root = "D:\\human_fall\\dataset\\val\\images"
val_label = "D:\\human_fall\\dataset\\val\\labels"

test_root = "D:\\human_fall\\dataset\\test\\images"
test_label = "D:\\human_fall\\dataset\\test\\labels"


def clip_coordinates(coord):
    return max(0, min(coord, 1))

def normalize_coordinates(label_path, image_path):
    # 이미지 크기 가져오기
    with Image.open(image_path) as img:
        img_width, img_height = img.size
    
    # 라벨 파일 읽기
    with open(label_path, 'r') as f:
        lines = f.readlines()
    
    normalized_lines = []
    for line in lines:
        parts = line.strip().split()
        if len(parts) == 5:  # class x y width height 형식 확인
            class_id = parts[0]
            x = float(parts[1])
            y = float(parts[2])
            width = float(parts[3])
            height = float(parts[4])
            
            # 좌표 정규화
            x_normalized = x / img_width
            y_normalized = y / img_height
            width_normalized = width / img_width
            height_normalized = height / img_height
            
            x_normalized = clip_coordinates(x_normalized)
            y_normalized = clip_coordinates(y_normalized)
            width_normalized = clip_coordinates(width_normalized)
            height_normalized = clip_coordinates(height_normalized)
            
            
            # 정규화된 좌표로 새 라인 생성
            new_line = f"{class_id} {x_normalized:.6f} {y_normalized:.6f} {width_normalized:.6f} {height_normalized:.6f}\n"
            normalized_lines.append(new_line)
    
    # 정규화된 좌표로 파일 다시 쓰기
    with open(label_path, 'w') as f:
        f.writelines(normalized_lines)

image_folder = val_root
label_folder = val_label

for filename in os.listdir(label_folder):
    if filename.endswith('.txt'):
        label_path = os.path.join(label_folder, filename)
        image_path = os.path.join(image_folder, filename.replace('.txt', '.jpg'))
        
        if os.path.exists(image_path):
            normalize_coordinates(label_path, image_path)
        else:
            print(f"Image not found for label: {filename}")

In [13]:
# 사용되지 않는 캐시 메모리 삭제
torch.cuda.empty_cache()

In [8]:
data_root = 'D:\\human_fall\\dataset'
train_root = f'{data_root}\\train\\images'
val_root = f'{data_root}\\val\\images'
class_names = {0 : 'Non_Fall', 1 : 'Fall'}
num_classes = len(class_names)

yaml_info = {
    'path' : data_root,
    'names': class_names,
    'nc': num_classes,
    'train': train_root,
    'val': val_root
}

with open('yaml_info_yolov8s.yaml', 'w') as f : 
    yaml.dump(yaml_info, f)
print(f'이 경로에 yaml파일 생성 : {data_root}')

이 경로에 yaml파일 생성 : D:\human_fall\dataset


In [4]:
start_time = time.time()

model = YOLO('yolov8s.pt')
result = model.train(data = 'D:\\project\\prjvenv\\yaml_info_yolov8s.yaml', epochs = 50, batch = 16, imgsz =640, device = device, workers = 20, amp = True, patience = 30, name = 'human_fall_s')

end_time = time.time()
execution_time = end_time - start_time
print(f"실행 시간: {execution_time:.4f} 초")

New https://pypi.org/project/ultralytics/8.3.17 available  Update with 'pip install -U ultralytics'
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=D:\project\prjvenv\yaml_info_yolov8s.yaml, epochs=50, time=None, patience=30, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cuda, workers=20, project=None, name=human_fall_s29, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_c

[34m[1mtrain: [0mScanning D:\human_fall\dataset\train\labels.cache... 87730 images, 0 backgrounds, 0 corrupt: 100%|██████████| 87730/87730 [00:00<?, ?it/s]
[34m[1mval: [0mScanning D:\human_fall\dataset\val\labels.cache... 42294 images, 0 backgrounds, 0 corrupt: 100%|██████████| 42294/42294 [00:00<?, ?it/s]


Plotting labels to runs\detect\human_fall_s29\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 20 dataloader workers
Logging results to [1mruns\detect\human_fall_s29[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50      4.23G          0      12.01          0          0        640:  46%|████▌     | 2535/5484 [10:03<11:41,  4.20it/s]  


KeyboardInterrupt: 