In [None]:
import numpy as np
import os
from ultralytics import YOLO
import matplotlib.pyplot as plt
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader


In [29]:
# Retrieve stored variables
%store -r df_augmented
%store -r val_df
train_df = df_augmented
val_df.head()

Unnamed: 0,age,gender,img_name,pixels
5949,24,1,24_1_1_20170113013149199.jpg.chip.jpg,"[[[172, 184, 182], [172, 184, 182], [173, 185,..."
12919,20,1,20_1_2_20170117141237688.jpg.chip.jpg,"[[[16, 4, 6], [16, 4, 6], [16, 4, 4], [16, 5, ..."
3305,42,0,42_0_3_20170119160350227.jpg.chip.jpg,"[[[136, 25, 18], [136, 25, 18], [136, 25, 18],..."
3173,58,0,58_0_1_20170117191843355.jpg.chip.jpg,"[[[189, 161, 140], [189, 161, 140], [190, 162,..."
6865,3,1,3_1_0_20170103210428066.jpg.chip.jpg,"[[[94, 59, 21], [93, 58, 20], [92, 57, 19], [9..."


In [30]:
os.makedirs('yolo_dataset/images/train', exist_ok=True)
os.makedirs('yolo_dataset/images/val', exist_ok=True)
os.makedirs('yolo_dataset/labels/train', exist_ok=True)
os.makedirs('yolo_dataset/labels/val', exist_ok=True)

In [None]:
# Function to convert pixel string to image and save
def save_image_from_pixels(row, folder):
    pixels = row['pixels']
    
    # Handle different pixel formats
    if isinstance(pixels, np.ndarray):
        img_array = pixels
    elif isinstance(pixels, str):
        pixels = pixels.split(' ')
        img_array = np.array(pixels, dtype=np.uint8)
    else:
        # If pixels is already in the right format
        img_array = np.array(pixels, dtype=np.uint8)
    
    # Reshape to image dimensions
    if img_array.size == 48*48:
        img_array = img_array.reshape(48, 48)
    
    # Convert to PIL Image
    img = Image.fromarray(img_array)
    img = img.convert('RGB')  # Convert to RGB for YOLO
    
    # Save the image
    img_path = f"yolo_dataset/images/{folder}/{row['img_name']}.jpg"
    img.save(img_path)
    return img_path

# Function to create YOLO format labels
def create_yolo_label(row, folder):
    # Create directory if it doesn't exist
    os.makedirs(f"yolo_dataset/labels/{folder}", exist_ok=True)
    # Create label file in YOLO format
    label_path = f"yolo_dataset/labels/{folder}/{row['img_name']}.txt"
    # Get gender class (0 for male, 1 for female)
    gender_class = int(row['gender'])
    # Get age and map to appropriate age range class
    age = float(row['age'])
    
    # Map age to class ID (2-13 for the 12 age ranges)
    if age <= 10:
        age_class = 2
    elif age <= 20:
        age_class = 3
    elif age <= 30:
        age_class = 4
    elif age <= 40:
        age_class = 5
    elif age <= 50:
        age_class = 6
    elif age <= 60:
        age_class = 7
    elif age <= 70:
        age_class = 8
    elif age <= 80:
        age_class = 9
    elif age <= 90:
        age_class = 10
    elif age <= 100:
        age_class = 11
    elif age <= 110:
        age_class = 12
    else:  # 111-120
        age_class = 13
    
    with open(label_path, 'w') as f:
        # Format: class x_center y_center width height
        # Write gender class
        f.write(f"{gender_class} 0.5 0.5 1.0 1.0\n")
        # Write age class
        f.write(f"{age_class} 0.5 0.5 1.0 1.0\n")
    
    # No longer need separate metadata file as we're encoding directly in YOLO format
    return label_path

In [33]:
# Process training data
train_sample = train_df.sample(5000, random_state=42)
print(f"Processing {len(train_sample)} training samples")

for idx, row in train_sample.iterrows():
    img_path = save_image_from_pixels(row, 'train')
    label_path = create_yolo_label(row, 'train')
    if idx % 500 == 0:
        print(f"Processed {idx} training samples")

# Process validation data
val_sample = val_df.sample(1000, random_state=42)
print(f"Processing {len(val_sample)} validation samples")

for idx, row in val_sample.iterrows():
    img_path = save_image_from_pixels(row, 'val')
    label_path = create_yolo_label(row, 'val')
    if idx % 100 == 0:
        print(f"Processed {idx} validation samples")


Processing 5000 training samples
Processed 1500 training samples
Processed 23000 training samples
Processed 21500 training samples
Processed 1000 training samples
Processed 15000 training samples
Processed 7500 training samples
Processed 13500 training samples
Processed 8000 training samples
Processed 10000 training samples
Processed 12500 training samples
Processed 7000 training samples
Processed 4000 training samples
Processed 16500 training samples
Processed 12000 training samples
Processed 14000 training samples
Processed 23500 training samples
Processed 8500 training samples
Processed 9000 training samples
Processing 1000 validation samples
Processed 10200 validation samples
Processed 5500 validation samples
Processed 3100 validation samples
Processed 1900 validation samples
Processed 7200 validation samples
Processed 600 validation samples
Processed 11000 validation samples
Processed 13200 validation samples
Processed 3900 validation samples
Processed 0 validation samples
Process

In [34]:
# Create dataset.yaml file with absolute paths
dataset_dir = os.path.join(os.getcwd(), 'yolo_dataset')
yaml_path = os.path.join(dataset_dir, 'dataset.yaml')

yaml_content = f"""path: {dataset_dir}
train: images/train
val: images/val
nc: 14
names: ['male', 'female', '0-10', '11-20', '21-30', '31-40', '41-50', '51-60', '61-70', '71-80', '81-90', '91-100', '101-110', '111-120']
"""

with open(yaml_path, 'w') as f:
    f.write(yaml_content)
print(f"Created YAML file at: {yaml_path}")

Created YAML file at: /Users/jinleixu/Desktop/UTK/yolo_dataset/dataset.yaml


In [37]:
# Load a pre-trained YOLO model
model = YOLO('yolov8n.pt')

In [54]:
# Configure device for optimal performance
device = '0' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

# Train the model with the absolute path to the YAML file
results = model.train(
    data=yaml_path,
    epochs=25,  # Increase epochs to allow more learning time
    imgsz=128,  # Increase image size for better feature extraction
    batch=16,  # Smaller batch size to avoid memory issues
    name='age_gender_model_balanced',
    device=device,
    workers=4,
    exist_ok=True,
    verbose=True,
    patience=15,  # Early stopping patience
    cos_lr=True,  # Use cosine learning rate scheduler
    lr0=0.01,  # Initial learning rate
    lrf=0.001,  # Final learning rate
    weight_decay=0.0005,  # Weight decay for regularization
    label_smoothing=0.1 # Label smoothing for better generalization
    
)

Using device: cpu
Ultralytics 8.3.109 🚀 Python-3.10.6 torch-2.6.0 CPU (Apple M1)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/Users/jinleixu/Desktop/UTK/yolo_dataset/dataset.yaml, epochs=25, time=None, patience=15, batch=16, imgsz=128, save=True, save_period=-1, cache=False, device=cpu, workers=4, project=None, name=age_gender_model_balanced, exist_ok=True, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=True, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_lab

[34m[1mtrain: [0mScanning /Users/jinleixu/Desktop/UTK/yolo_dataset/labels/train.cache... 5000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /Users/jinleixu/Desktop/UTK/yolo_dataset/labels/val.cache... 1000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1000/1000 [00:00<?, ?it/s]

Plotting labels to runs/detect/age_gender_model_balanced/labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000556, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 128 train, 128 val
Using 0 dataloader workers
Logging results to [1mruns/detect/age_gender_model_balanced[0m
Starting training for 25 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/25         0G     0.1801      1.067       0.96         48        128: 100%|██████████| 313/313 [04:08<00:00,  1.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:33<00:00,  1.04s/it]


                   all       1000       2000      0.405      0.589      0.361      0.353

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/25         0G     0.1789      1.065     0.9543         42        128: 100%|██████████| 313/313 [03:32<00:00,  1.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:26<00:00,  1.21it/s]


                   all       1000       2000      0.403      0.652      0.356      0.348

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/25         0G     0.1767      1.057     0.9436         44        128: 100%|██████████| 313/313 [04:21<00:00,  1.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:28<00:00,  1.12it/s]

                   all       1000       2000      0.333      0.645       0.38      0.374






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/25         0G     0.1693      1.042     0.9371         56        128: 100%|██████████| 313/313 [03:50<00:00,  1.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:34<00:00,  1.06s/it]

                   all       1000       2000      0.439      0.653      0.378      0.371






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/25         0G     0.1606      1.032     0.9304         54        128: 100%|██████████| 313/313 [03:46<00:00,  1.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [00:27<00:00,  1.15it/s]


                   all       1000       2000       0.41      0.669      0.395      0.388

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/25         0G     0.1581      1.031     0.9281         52        128: 100%|██████████| 313/313 [04:02<00:00,  1.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  12%|█▎        | 4/32 [00:06<00:42,  1.52s/it]


KeyboardInterrupt: 

In [39]:
# Evaluate the model
val_results = model.val()
print(f"Validation results: {val_results}")

Ultralytics 8.3.109 🚀 Python-3.10.6 torch-2.6.0 CPU (Apple M1)
Model summary (fused): 72 layers, 3,008,378 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /Users/jinleixu/Desktop/UTK/yolo_dataset/labels/val.cache... 1000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1000/1000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:24<00:00,  2.59it/s]


                   all       1000       2000      0.421      0.542      0.412      0.405
                  male        523        523      0.356      0.985      0.913      0.911
                female        477        477      0.387      0.971      0.887      0.884
                  0-10        109        109      0.439      0.908      0.861      0.833
                 11-20         68         68          1          0      0.141      0.138
                 21-30        332        332      0.483      0.901      0.613      0.605
                 31-40        187        187      0.279      0.658      0.314       0.31
                 41-50         96         96      0.271      0.323      0.243      0.235
                 51-60        101        101      0.247      0.743      0.313      0.295
                 61-70         57         57      0.201      0.404      0.223      0.218
                 71-80         29         29      0.104      0.241      0.157      0.155
                 81-9

In [40]:
val_dir = os.path.join(os.getcwd(), 'yolo_dataset', 'images', 'val')

if os.path.exists(val_dir) and len(os.listdir(val_dir)) > 0:
    # randomly choose 100 images from the val_dir
    val_images = os.listdir(val_dir)
    sample_images = random.sample(val_images, min(20, len(val_images)))
    
    for img_name in sample_images:
        img_path = os.path.join(val_dir, img_name)
        
        # run prediction
        results = model.predict(img_path)
        
        
        # extract and display prediction info
        for r in results:
            boxes = r.boxes
            for box in boxes:
                cls = int(box.cls[0])
                conf = float(box.conf[0])
                
                # 获取对应的元数据文件(如果存在)
                meta_path = os.path.join(os.getcwd(), 'yolo_dataset', 'labels', 'val', 
                                        img_name.replace('.jpg', '_meta.txt'))
                meta_info = ""
                if os.path.exists(meta_path):
                    with open(meta_path, 'r') as f:
                        meta_info = f.read().strip()


else:
    print("verify dataset is correct or not, val_dir is empty or not exist")






image 1/1 /Users/jinleixu/Desktop/UTK/yolo_dataset/images/val/40_0_2_20170117001148193.jpg.chip.jpg.jpg: 64x64 1 male, 1 21-30, 12.7ms
Speed: 0.8ms preprocess, 12.7ms inference, 3.2ms postprocess per image at shape (1, 3, 64, 64)

image 1/1 /Users/jinleixu/Desktop/UTK/yolo_dataset/images/val/38_1_4_20170103230647441.jpg.chip.jpg.jpg: 64x64 1 female, 1 21-30, 6.4ms
Speed: 0.2ms preprocess, 6.4ms inference, 0.6ms postprocess per image at shape (1, 3, 64, 64)

image 1/1 /Users/jinleixu/Desktop/UTK/yolo_dataset/images/val/1_0_0_20170110205418587.jpg.chip.jpg.jpg: 64x64 1 male, 1 0-10, 9.1ms
Speed: 0.3ms preprocess, 9.1ms inference, 0.5ms postprocess per image at shape (1, 3, 64, 64)

image 1/1 /Users/jinleixu/Desktop/UTK/yolo_dataset/images/val/25_1_3_20170119195859404.jpg.chip.jpg.jpg: 64x64 1 female, 1 21-30, 8.0ms
Speed: 0.2ms preprocess, 8.0ms inference, 0.6ms postprocess per image at shape (1, 3, 64, 64)

image 1/1 /Users/jinleixu/Desktop/UTK/yolo_dataset/images/val/23_0_0_20170117144

In [41]:
import matplotlib.pyplot as plt
%matplotlib inline
# Display training results summary
print("Training Results Summary:")
if hasattr(results, 'results_dict'):
    for metric, value in results.results_dict.items():
        print(f"{metric}: {value}")
elif isinstance(results, dict):
    for metric, value in results.items():
        print(f"{metric}: {value}")
else:
    print(f"Training results: {results}")

# Display validation results summary
print("\nValidation Results Summary:")
if hasattr(val_results, 'results_dict'):
    for metric, value in val_results.results_dict.items():
        print(f"{metric}: {value}")
elif isinstance(val_results, dict):
    for metric, value in val_results.items():
        print(f"{metric}: {value}")
else:
    print(f"Validation results: {val_results}")

# If you want to visualize the training metrics
if hasattr(results, 'plot'):
    print("\nPlotting training metrics:")
    fig = results.plot()
    plt.show()

# If you want to visualize the confusion matrix (if available)
if hasattr(val_results, 'confusion_matrix'):
    print("\nConfusion Matrix:")
    val_results.confusion_matrix.plot()
    plt.show()

Training Results Summary:
Training results: [ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'male', 1: 'female', 2: '0-10', 3: '11-20', 4: '21-30', 5: '31-40', 6: '41-50', 7: '51-60', 8: '61-70', 9: '71-80', 10: '81-90', 11: '91-100', 12: '101-110', 13: '111-120'}
obb: None
orig_img: array([[[106, 117, 107],
        [106, 117, 107],
        [106, 117, 107],
        ...,
        [106, 125, 132],
        [109, 125, 131],
        [106, 122, 128]],

       [[106, 117, 107],
        [106, 117, 107],
        [107, 118, 108],
        ...,
        [107, 126, 133],
        [110, 126, 132],
        [107, 123, 129]],

       [[106, 117, 107],
        [107, 118, 108],
        [107, 118, 108],
        ...,
        [109, 128, 135],
        [111, 127, 133],
        [109, 125, 131]],

       ...,

       [[125, 136, 126],
        [125, 136, 126],
        [125, 136, 126],
        ...,
        [123, 143, 1