In [None]:
from src.utils import *
import pandas as pd
from datetime import datetime
from tqdm import tqdm

In [None]:
REMOVE_BACKGROUND = True
OPTMIZED = True

## Remove background from images

In [None]:
# Remove backgroud from image
if (REMOVE_BACKGROUND):
    remove_backgrounds()

## Octree algorithm

### Octree image color quantization example

In [None]:
octree, img_info = create_octree_from_image('2.png', 6)

# make palette for specified number of colors
if (OPTMIZED):
    octree.make_optimized_palette(256)
else:
    octree.make_palette(256)


# create palette for 256 color max and save to file
create_palette_image(octree, 'example_palette.png')

# save output image
save_quantized_image(octree, 'example_quantized.png', img_info)

# get the node id set
# which allows to calculate the Jaccard similarity coefficient between two images
id_set = octree.node_id_set()

## Comparison Pipeline

In [None]:
RIPENESS_LEVELS = ['unripe', 'barely ripe', 'ripe', 'overripe']
DEPTH_LEVELS = [4, 6, 8]
PALETTE_SIZES = [64, 256, 512]

### CSV Split

In [None]:
# Instantiate sample values
samples_df = pd.read_csv('./dataset/samples.csv')
sample_dict = { filename: create_node_id_set_from_image(filename, OPTMIZED, depth=6, palette_size=256, resize=True) 
                for filename in samples_df['Filename'].values }

In [None]:
# Read test values
test_df = pd.read_csv('./dataset/test.csv')
test_dict = { filename: create_node_id_set_from_image(filename, OPTMIZED, depth=6, palette_size=256, resize=True) 
                for filename in test_df['Filename'].values }

### Make Predictions

In [None]:
predictions_df = pd.DataFrame(columns = ['Filename', 'Label'])

for test_filename, test_id_set in tqdm(test_dict.items()):
    ripeness_comparisons = {}
    for ripeness_level in RIPENESS_LEVELS:
        ripeness_list = []
        for sample_filename in samples_df[samples_df['Label'] == ripeness_level]['Filename'].values:
            print(sample_filename)
            sample_id_set = sample_dict[sample_filename]
            print(sample_id_set)
            ripeness_list.append(jaccard_similarity_coefficient(sample_id_set, test_id_set))
        ripeness_comparisons[ripeness_level] = (sum(ripeness_list) / len(ripeness_list))
        # print(f'{test_filename} vs {ripeness_level}: {ripeness_comparisons[ripeness_level]}')
    
    label = max(ripeness_comparisons, key=ripeness_comparisons.get)
    predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)

current_datetime = datetime.now().strftime("%d_%m__%H_%M")
predictions_df.to_csv(f'./dataset/predictions/predictions_{current_datetime}.csv', index=False)

### Calculate accuracy

In [None]:
# Calculate accuracy
correct_predictions = 0
for index, row in predictions_df.iterrows():
    if (row['Label'] == test_df[test_df['Filename'] == row['Filename']]['Label'].values[0]):
        correct_predictions += 1

accuracy = correct_predictions / len(predictions_df)
print(f'Accuracy: {accuracy}')

### Compare palette sizes and depth levels

In [None]:
predictions_df = pd.DataFrame(columns = ['Filename', 'Label', 'Depth', 'PaletteSize', 'Unripe', 'Barely Ripe', 'Ripe', 'Very Ripe', 'Rotten'])

for depth in DEPTH_LEVELS:
      for palette_size in PALETTE_SIZES:
        print(f'DEPTH: {depth} & PALETTE SIZE: {palette_size}')

        # Instantiate sample values
        print('Instantiating sample values...')
        samples_df = pd.read_csv('./dataset/samples.csv')
        sample_dict = { filename: create_node_id_set_from_image(filename, OPTMIZED, depth, palette_size) 
                        for filename in samples_df['Filename'].values }
        
        # Read test values
        print('Instantiating test values...')
        test_df = pd.read_csv('./dataset/test.csv')
        test_dict = { filename: create_node_id_set_from_image(filename, OPTMIZED, depth, palette_size) 
                        for filename in test_df['Filename'].values }
        
        print('Predicting...')
        for test_filename, test_id_set in tqdm(test_dict.items()):
            ripeness_comparisons = {}
            for ripeness_level in RIPENESS_LEVELS:
                ripeness_list = []
                for sample_filename in samples_df[samples_df['Label'] == ripeness_level]['Filename'].values:
                    sample_id_set = sample_dict[sample_filename]
                    ripeness_list.append(jaccard_similarity_coefficient(sample_id_set, test_id_set))
                ripeness_comparisons[ripeness_level] = (sum(ripeness_list) / len(ripeness_list))
            
            label = max(ripeness_comparisons, key=ripeness_comparisons.get)
            predictions_df = predictions_df.append({
                'Filename' : test_filename, 
                'Label': label, 
                'Depth': depth, 
                'PaletteSize': palette_size, 
                'Unripe': ripeness_comparisons['unripe'],
                'Ripe': ripeness_comparisons['ripe'],
                'Rotten': ripeness_comparisons['rotten']
            }, ignore_index = True)
            
current_datetime = datetime.now().strftime("%d_%m__%H_%M")
predictions_df.to_csv(f'./dataset/predictions/comparisons/predictions_{current_datetime}.csv', index=False)