In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import os
import matplotlib.pyplot as plt
import urllib.request
import zipfile
import glob
import shutil
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm

# Preparing Data:

- **Step 1**: Download and Unzip Data
- **Step 2**: Preprocessing 
- **Step 3**: Split train set into training and validation set (0.9, 0.1)
- **Step 4**: Make yaml file for training YOLO

- The following command dwonloads data and save it in the path that is passed.
- It has both images and labels.

## Step 1: Download and Unzip Data

In [None]:


# Path for saving data
path = '/sc/home/masoumeh.javanbakhat/coldstore/KITT'
# KITTI dataset download URLs (training images + labels)
kitti_images_url = 'https://s3.eu-central-1.amazonaws.com/avg-kitti/data_object_image_2.zip'
kitti_labels_url = 'https://s3.eu-central-1.amazonaws.com/avg-kitti/data_object_label_2.zip'

os.makedirs(os.path.join(path,'kitti_obj'), exist_ok=True)

In [None]:
# Download function
def download_and_extract(url, dest_folder):
    local_zip = os.path.join(dest_folder, os.path.basename(url))
    if not os.path.exists(local_zip):
        print(f'Downloading {url}...')
        urllib.request.urlretrieve(url, local_zip)
    with zipfile.ZipFile(local_zip, 'r') as zip_ref:
        zip_ref.extractall(dest_folder)
    print(f'Extracted to {dest_folder}')

download_and_extract(kitti_images_url, os.path.join(path,'kitti_obj'))
download_and_extract(kitti_labels_url, os.path.join(path,'kitti_obj'))

In [2]:
# Checking loaded Data
path = '/sc/home/masoumeh.javanbakhat/coldstore/KITT'
path_image_2 = os.path.join(path,'kitti_obj','training')

image_path = os.path.join(path_image_2, 'image_2')
label_path = os.path.join(path_image_2, 'label_2')

len_images_kitti = len(os.listdir(image_path))
len_labels_kitti = len(os.listdir(label_path))
print(f'Number of KITTI images: {len_images_kitti}')
print(f'Number of KITTI labels: {len_labels_kitti}')


Number of KITTI images: 7481
Number of KITTI labels: 7481


## Step 2: Preprocessing

- Convert corner based representation of bounding boxes to center based
- We considered only three classes: ['Car', 'Pedistrain', 'Cyclist']
- From labels, we only save index of object class along with bbox coordinates 
- Svae new labels in lables_yolo
- Pair images and labels in one single file 

In [None]:
classes = ['Car', 'Pedestrian', 'Cyclist']

def kitti_to_yolo(label_file, img_width=1242, img_height=375):
    yolo_lines = []
    with open(label_file, 'r') as f:
        for line in f.readlines():
            parts = line.strip().split()
            cls_name = parts[0]
            if cls_name not in classes:
                continue
            cls_idx = classes.index(cls_name)
            # KITTI bbox
            x1, y1, x2, y2 = map(float, parts[4:8])
            # Convert to YOLO format
            x_center = ((x1 + x2) / 2) / img_width
            y_center = ((y1 + y2) / 2) / img_height
            width = (x2 - x1) / img_width
            height = (y2 - y1) / img_height
            yolo_lines.append(f"{cls_idx} {x_center} {y_center} {width} {height}")
    return yolo_lines

# Example: convert all labels
#label_files = glob.glob('kitti/label_2/*.txt')

label_files = glob.glob(os.path.join(label_path, '*.txt'))
for lbl in label_files:
    yolo_lines = kitti_to_yolo(lbl)
    out_file = lbl.replace('label_2', 'labels_yolo')
    os.makedirs(os.path.dirname(out_file), exist_ok=True)
    with open(out_file, 'w') as f:
        f.write('\n'.join(yolo_lines))


In [3]:
# Now I will look at label files of YOLO:
# Check number of labels 

yolo_labels = os.path.join(path,'kitti_obj','training','labels_yolo')
len_yolo_labels = len(os.listdir(yolo_labels))
print(f'Number of YOLO labels: {len_yolo_labels}')

Number of YOLO labels: 7481


In [None]:

img_dir = os.path.join(path,'kitti_obj','training','image_2')
label_dir = os.path.join(path,'kitti_obj','training','labels_yolo')

imgs = sorted(list(Path(img_dir).glob('*')))
labels = sorted(list(Path(label_dir).glob('*')))
pairs = list(zip(imgs,labels))

## Step 3: Split training set into train and validation 

- Split data into train and validation
- Create directories and save train and validation 

In [None]:
train, test = train_test_split(pairs,test_size=0.1,shuffle=True)
len(train), len(test)

In [None]:
train_path = Path('train').resolve()
train_path.mkdir(exist_ok=True)
valid_path = Path('valid').resolve()
valid_path.mkdir(exist_ok=True)

In [None]:
for t_img, t_lb in tqdm(train):
    im_path = train_path / t_img.name
    lb_path = train_path / t_lb.name
    shutil.copy(t_img,im_path)
    shutil.copy(t_lb,lb_path)

In [None]:
for t_img, t_lb in tqdm(test):
    im_path = valid_path / t_img.name
    lb_path = valid_path / t_lb.name
    shutil.copy(t_img,im_path)
    shutil.copy(t_lb,lb_path)

## Step 4: Making yaml file 
 - Make yamle file for training YOLO. 

In [None]:
train_dir = "./train"
valid_dir = "./valid"
classes = ['Car', 'Pedestrian', 'Cyclist']

In [None]:
yaml_file = f"""train: {train_dir}
val: {valid_dir}

nc: {len(classes)}

names:

"""

for i, c in enumerate(classes):
    yaml_file += f"  {i}: {c}\n"

with open('kitti.yaml','w') as f:
    f.write(yaml_file)

- Now data is ready for training YOLO. 

# Training Model

- We use following steps to train YOLO
  - in the directory of the project strat enroot: `enroot start ultralytics+ultralytics+latest`
  - Then we change directory to the following:
    - `cd ~/netstore-old/Baysian/3D/Computer_Vision_New`
  - The use the command: `python src/train.py` for training  
