In [1]:
from src.utils import *
import pandas as pd
from datetime import datetime
from tqdm import tqdm

In [6]:
REMOVE_BACKGROUND = True
OPTMIZED = True

## Remove background from images

In [7]:
# Remove backgroud from image
if (REMOVE_BACKGROUND):
    remove_backgrounds()

## Octree algorithm

### Octree image color quantization example

In [13]:
octree, img_info = create_octree_from_image('2.png', 6)

# make palette for specified number of colors
if (OPTMIZED):
    octree.make_optimized_palette(256)
else:
    octree.make_palette(256)


# create palette for 256 color max and save to file
create_palette_image(octree, 'example_palette.png')

# save output image
save_quantized_image(octree, 'example_quantized.png', img_info)

# get the node id set
# which allows to calculate the Jaccard similarity coefficient between two images
id_set = octree.node_id_set()

## Comparison Pipeline

In [2]:
RIPENESS_LEVELS = ['unripe', 'barely ripe', 'ripe', 'overripe']
DEPTH_LEVELS = [4, 6, 8]
PALETTE_SIZES = [64, 256, 512]

### Train Test split

In [3]:
from sklearn.model_selection import train_test_split

# Extract features and generate balanced train test splits
X, y = create_features_labels(RIPENESS_LEVELS)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [7]:
sample_dict = { filename: create_node_id_set_from_image(filename, OPTMIZED, depth=6, palette_size=256) 
                for filename in X_train }

KeyboardInterrupt: 

### CSV Split

In [10]:
# Instantiate sample values
samples_df = pd.read_csv('./dataset/samples.csv')
sample_dict = { filename: create_node_id_set_from_image(filename, OPTMIZED, depth=6, palette_size=256) 
                for filename in samples_df['Filename'].values }

In [12]:
# Read test values
test_df = pd.read_csv('./dataset/test.csv')
test_dict = { filename: create_node_id_set_from_image(filename, OPTMIZED, depth=6, palette_size=256) 
                for filename in test_df['Filename'].values }

### Make Predictions

In [13]:
predictions_df = pd.DataFrame(columns = ['Filename', 'Label'])

for test_filename, test_id_set in tqdm(test_dict.items()):
    ripeness_comparisons = {}
    for ripeness_level in RIPENESS_LEVELS:
        ripeness_list = []
        for sample_filename in samples_df[samples_df['Label'] == ripeness_level]['Filename'].values:
            sample_id_set = sample_dict[sample_filename]
            ripeness_list.append(jaccard_similarity_coefficient(sample_id_set, test_id_set))
        ripeness_comparisons[ripeness_level] = (sum(ripeness_list) / len(ripeness_list))
        # print(f'{test_filename} vs {ripeness_level}: {ripeness_comparisons[ripeness_level]}')
    
    label = max(ripeness_comparisons, key=ripeness_comparisons.get)
    predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)

current_datetime = datetime.now().strftime("%d_%m__%H_%M")
predictions_df.to_csv(f'./dataset/predictions/predictions_{current_datetime}.csv', index=False)

  predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)
  predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)
  predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)
  predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)
  predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)
  predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)
  predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)
  predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)
  predictions_df = predictions_df.append({'Filename' : test_filename, 'Label': label}, ignore_index = True)
  predictions_df = predictio

1.png vs unripe: 0.07325383304940375
1.png vs barely ripe: 0.2707509881422925
1.png vs ripe: 0.3490364025695932
1.png vs very ripe: 0.41309255079006774
1.png vs rotten: 0.3672566371681416
2.png vs unripe: 0.08623548922056384
2.png vs barely ripe: 0.20795660036166366
2.png vs ripe: 0.2335216572504708
2.png vs very ripe: 0.1944954128440367
2.png vs rotten: 0.2294455066921606
3.png vs unripe: 0.16544117647058823
3.png vs barely ripe: 0.403470715835141
3.png vs ripe: 0.3693304535637149
3.png vs very ripe: 0.1864406779661017
3.png vs rotten: 0.21484375
4.png vs unripe: 0.1282952548330404
4.png vs barely ripe: 0.3731656184486373
4.png vs ripe: 0.3572938689217759
4.png vs very ripe: 0.268389662027833
4.png vs rotten: 0.2883435582822086
5.png vs unripe: 0.14575645756457564
5.png vs barely ripe: 0.2938775510204082
5.png vs ripe: 0.35
5.png vs very ripe: 0.26954732510288065
5.png vs rotten: 0.27941176470588236
6.png vs unripe: 0.12186379928315412
6.png vs barely ripe: 0.3094262295081967
6.png vs




### Calculate accuracy

In [14]:
# Calculate accuracy
correct_predictions = 0
for index, row in predictions_df.iterrows():
    if (row['Label'] == test_df[test_df['Filename'] == row['Filename']]['Label'].values[0]):
        correct_predictions += 1

accuracy = correct_predictions / len(predictions_df)
print(f'Accuracy: {accuracy}')

Accuracy: 0.2


### Compare palette sizes and depth levels

In [25]:
predictions_df = pd.DataFrame(columns = ['Filename', 'Label', 'Depth', 'PaletteSize', 'Unripe', 'Barely Ripe', 'Ripe', 'Very Ripe', 'Rotten'])

for depth in DEPTH_LEVELS:
      for palette_size in PALETTE_SIZES:
        print(f'DEPTH: {depth} & PALETTE SIZE: {palette_size}')

        # Instantiate sample values
        print('Instantiating sample values...')
        samples_df = pd.read_csv('./dataset/samples.csv')
        sample_dict = { filename: create_node_id_set_from_image(filename, OPTMIZED, depth, palette_size) 
                        for filename in samples_df['Filename'].values }
        
        # Read test values
        print('Instantiating test values...')
        test_df = pd.read_csv('./dataset/test.csv')
        test_dict = { filename: create_node_id_set_from_image(filename, OPTMIZED, depth, palette_size) 
                        for filename in test_df['Filename'].values }
        
        print('Predicting...')
        for test_filename, test_id_set in tqdm(test_dict.items()):
            ripeness_comparisons = {}
            for ripeness_level in RIPENESS_LEVELS:
                ripeness_list = []
                for sample_filename in samples_df[samples_df['Label'] == ripeness_level]['Filename'].values:
                    sample_id_set = sample_dict[sample_filename]
                    ripeness_list.append(jaccard_similarity_coefficient(sample_id_set, test_id_set))
                ripeness_comparisons[ripeness_level] = (sum(ripeness_list) / len(ripeness_list))
            
            label = max(ripeness_comparisons, key=ripeness_comparisons.get)
            predictions_df = predictions_df.append({
                'Filename' : test_filename, 
                'Label': label, 
                'Depth': depth, 
                'PaletteSize': palette_size, 
                'Unripe': ripeness_comparisons['unripe'],
                'Ripe': ripeness_comparisons['ripe'],
                'Rotten': ripeness_comparisons['rotten']
            }, ignore_index = True)
            
current_datetime = datetime.now().strftime("%d_%m__%H_%M")
predictions_df.to_csv(f'./dataset/predictions/comparisons/predictions_{current_datetime}.csv', index=False)

DEPTH: 4 & PALETTE SIZE: 64
Instantiating sample values...
Instantiating test values...
Predicting...


  predictions_df = predictions_df.append({
100%|██████████| 1/1 [00:00<00:00, 696.84it/s]

DEPTH: 4 & PALETTE SIZE: 256
Instantiating sample values...





Instantiating test values...
Predicting...


  predictions_df = predictions_df.append({
100%|██████████| 1/1 [00:00<00:00, 494.84it/s]

DEPTH: 4 & PALETTE SIZE: 512
Instantiating sample values...





Instantiating test values...
Predicting...


  predictions_df = predictions_df.append({
100%|██████████| 1/1 [00:00<00:00, 681.23it/s]

DEPTH: 6 & PALETTE SIZE: 64
Instantiating sample values...





Instantiating test values...
Predicting...


  predictions_df = predictions_df.append({
100%|██████████| 1/1 [00:00<00:00, 710.18it/s]

DEPTH: 6 & PALETTE SIZE: 256
Instantiating sample values...





Instantiating test values...
Predicting...


  predictions_df = predictions_df.append({
100%|██████████| 1/1 [00:00<00:00, 649.88it/s]

DEPTH: 6 & PALETTE SIZE: 512
Instantiating sample values...





Instantiating test values...
Predicting...


  predictions_df = predictions_df.append({
100%|██████████| 1/1 [00:00<00:00, 537.39it/s]

DEPTH: 8 & PALETTE SIZE: 64
Instantiating sample values...





Instantiating test values...
Predicting...


  predictions_df = predictions_df.append({
100%|██████████| 1/1 [00:00<00:00, 705.16it/s]

DEPTH: 8 & PALETTE SIZE: 256
Instantiating sample values...





Instantiating test values...
Predicting...


  predictions_df = predictions_df.append({
100%|██████████| 1/1 [00:00<00:00, 665.66it/s]

DEPTH: 8 & PALETTE SIZE: 512
Instantiating sample values...





Instantiating test values...
Predicting...


  predictions_df = predictions_df.append({
100%|██████████| 1/1 [00:00<00:00, 657.52it/s]
