In [1]:
import os
import glob
import numpy as np
import csv
import random

In [2]:
#Mount google drive to colab
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
def split_data(filenames, subset1_size_perc = 0.8, random_shuffle = True, rseed=512):
    random.seed(rseed)
    n_set1 = int(subset1_size_perc * len(filenames))
    
    shuffled_filenames = filenames.copy()
    if random_shuffle:
        random.shuffle(shuffled_filenames)
    set1 = shuffled_filenames[:n_set1]
    set2 = shuffled_filenames[n_set1:]
    
    return set1,set2

In [4]:
def parse_csv(csv_path):
    image_data = []
    print("parsing: ", csv_path)
    with open(csv_path) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        for row in csv_reader:
            #filename, xmin, ymin, xmax, ymax, obj_name = row
            image_data.append(row)
    return image_data

In [5]:
csv_path = "./gdrive/MyDrive/data/annotations/processed_anotations.csv"

all_data = parse_csv(csv_path)
print(all_data[0:5])

parsing:  ./gdrive/MyDrive/data/annotations/processed_anotations.csv
[['image_name', 'x1', 'y1', 'x2', 'y2', 'class'], ['D8C08190-E91D-4480-B229-DCA56426342B-3.png', '18', '100', '33', '225', '1'], ['D8C08190-E91D-4480-B229-DCA56426342B-4.png', '201', '100', '223', '225', '1'], ['D8C08190-E91D-4480-B229-DCA56426342B-5.png', '22', '100', '37', '225', '1'], ['D8C08190-E91D-4480-B229-DCA56426342B-7.png', '206', '100', '238', '225', '1']]


In [6]:
TRAIN_PECR = 0.8
VAL_PERC = 0.15

trainval, test = split_data(all_data, subset1_size_perc=TRAIN_PECR, rseed=512)
train, val = split_data(trainval, subset1_size_perc=(1-VAL_PERC), rseed=512)

print("len(all_data):", len(all_data))
print("len(train):", len(train))
print("len(val):", len(val))
print("len(test):", len(test))

len(all_data): 13850
len(train): 9418
len(val): 1662
len(test): 2770


In [7]:
def makedirs(path):
    # Intended behavior: try to create the directory,
    # pass if the directory exists already, fails otherwise.
    # Meant for Python 2.7/3.n compatibility.
    try:
        os.makedirs(path)
    except OSError:
        if not os.path.isdir(path):
            raise

In [8]:
output_directory = "./gdrive/MyDrive/data/annotations"
makedirs(output_directory)

train_output = os.path.join(output_directory, "train_annotations.csv")
val_output = os.path.join(output_directory, "val_annotations.csv")
test_output = os.path.join(output_directory, "test_annotations.csv")

### for python2 check : https://stackoverflow.com/questions/3348460/csv-file-written-with-python-has-blank-lines-between-each-row

In [9]:
for output_path, annotations in [(train_output, train), (val_output, val), (test_output, test)]:
    with open(output_path, 'w', newline='') as f:
        print("writing ", len(annotations), " annotations to ", output_path)
        writer = csv.writer(f)
        writer.writerows(annotations)

writing  9418  annotations to  ./gdrive/MyDrive/data/annotations/train_annotations.csv
writing  1662  annotations to  ./gdrive/MyDrive/data/annotations/val_annotations.csv
writing  2770  annotations to  ./gdrive/MyDrive/data/annotations/test_annotations.csv
