In [1]:
from src.utils import *
import pandas as pd
from datetime import datetime
from tqdm import tqdm

In [2]:
REMOVE_BACKGROUND = True
OPTMIZED = True

## Remove background from images

In [10]:
# Remove backgroud from image
if (REMOVE_BACKGROUND):
    remove_backgrounds()

## Octree algorithm

### Octree image color quantization example

In [4]:
octree, img_info = create_octree_from_image('2.png', 6)

# make palette for specified number of colors
if (OPTMIZED):
    octree.make_optimized_palette(256)
else:
    octree.make_palette(256)


# create palette for 256 color max and save to file
create_palette_image(octree, 'example_palette.png')

# save output image
save_quantized_image(octree, 'example_quantized.png', img_info)

# get the node id set
# which allows to calculate the Jaccard similarity coefficient between two images
id_set = octree.node_id_set()

### Comparison Pipeline

In [5]:
RIPENESS_LEVELS = ['unripe', 'ripe', 'rotten']

In [11]:
# Instantiate sample values
samples_df = pd.read_csv('./dataset/samples.csv')
sample_dict = { filename: create_node_id_set_from_image(filename, OPTMIZED, depth=6, palette_size=256) 
                for filename in samples_df['Filename'].values }

In [7]:
# Read test values
test_df = pd.read_csv('./dataset/test.csv')
test_dict = { filename: create_node_id_set_from_image(filename, OPTMIZED, depth=6, palette_size=256) 
                for filename in test_df['Filename'].values }

In [12]:
predictions_df = pd.DataFrame(columns = ['Filename', 'Label'])

for test_filename, test_id_set in tqdm(test_dict.items()):
    ripeness_comparisons = {}
    for ripeness_level in RIPENESS_LEVELS:
        ripeness_list = []
        for sample_filename in samples_df[samples_df['Label'] == ripeness_level]['Filename'].values:
            sample_id_set = sample_dict[sample_filename]
            ripeness_list.append(jaccard_similarity_coefficient(sample_id_set, test_id_set))
        ripeness_comparisons[ripeness_level] = (sum(ripeness_list) / len(ripeness_list))
        print(f'{test_filename} vs {ripeness_level}: {ripeness_comparisons[ripeness_level]}')
    
    label = max(ripeness_comparisons, key=ripeness_comparisons.get)
    predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)

current_datetime = datetime.now().strftime("%d_%m__%H_%M")
predictions_df.to_csv(f'./dataset/predictions/predictions_{current_datetime}.csv', index=False)

  predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)
100%|██████████| 1/1 [00:00<00:00, 314.11it/s]

4.png vs unripe: 0.5432692307692307
4.png vs ripe: 0.4451901565995526
4.png vs rotten: 0.35294117647058826





In [9]:
# Calculate accuracy
correct_predictions = 0
for index, row in predictions_df.iterrows():
    if (row['Label'] == test_df[test_df['Filename'] == row['Filename']]['Label'].values[0]):
        correct_predictions += 1

accuracy = correct_predictions / len(predictions_df)
print(f'Accuracy: {accuracy}')

Accuracy: 0.0
