In [1]:
import ifcb
import csv
from urllib.request import urlopen
import numpy as np
import os
import imageio
from pathlib import Path

In [2]:
def create_folders(output_path,classes):
    for folder_name in classes:
        path = output_path + '/' + folder_name
        try:
            Path(path).mkdir(parents=True, exist_ok=True)
        except OSError:
            print ("Creation of the directory %s failed" % path)

In [3]:
def classify_images_in_folders(base_url,output_path,pid,threshold):
    """ Reads dashboard autoclass file and classify images if max score
    is greater than the defined threshold. Images will be moved to the 
    Unclassified folder otherwise.
    """
    # Determine classes list and classes with best score 
    with urlopen(base_url + pid + '_class_scores.csv') as f :
        reader = csv.reader(f.read().decode('utf-8').split('\n'),delimiter=',')
        classes = next(reader)[1:]
        classes.append('Unclassified')
        create_folders(output_path,classes)
        roi_numbers = []
        best_classes = []
        for row in reader:
            if len(row) > 0:
                roi_number = int(row[0].split("_")[-1])
                roi_numbers.append(roi_number)
                scores = [float(score) for score in row[1:]]
                max_score = np.amax(scores)
                if max_score > threshold:
                    best_class = np.where(scores == max_score)[0][0]
                else:
                    best_class = classes.index('Unclassified')
                best_classes.append(best_class)
    
    # Retrieving images 
    with ifcb.open_url(base_url + pid) as sample_bin:
        print('{} has {} image(s)'.format(sample_bin.lid, len(sample_bin.images)))
        results = {k:0 for k in classes}  # Initializing distribution results
        for roi_number in sample_bin.images:
            pos = roi_numbers.index(roi_number)
            best_class = best_classes[pos]
            class_name = classes[best_class]
            results[class_name] = results[class_name] + 1 # Counting images
            img_path = output_path + '/' + class_name + '/' +  pid + '_' + str(roi_number) + '.png'
            imageio.imwrite(img_path, sample_bin.images[roi_number])
        count_matrix = np.array(list(results.items()))
        classes = count_matrix[:,0]
        counts = count_matrix[:,1].astype(int).reshape((1,len(classes)))
    return classes, counts

In [6]:
pids_file = "bins.csv"
base_url = 'https://ifcb-data.whoi.edu/mvco/'
output_dir = 'whoi-data'
threshold = 0.5
n_classes = 146

In [None]:
with open(pids_file) as f:
    reader = csv.reader(f)
    all_counts = np.empty((0,n_classes), int)
    for row in reader:
        pid = row[0]
        classes, counts = classify_images_in_folders(base_url,output_dir,pid,threshold)
        all_counts = np.append(all_counts,counts,axis=0) # Aggregatin counts from all files
#print(all_counts)