In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions

In [3]:
# Load pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=True)

In [None]:
#Original data accuracy calculation

for quality in range(2, 101, 2):

    # Prepare test data generator
    test_data_dir = f'./data_j2k_jpeg_{quality}/'
    test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

    test_generator = test_datagen.flow_from_directory(
        test_data_dir,
        target_size=(224, 224),
        batch_size=32,
        class_mode=None,  # We do not need to specify class mode as we are not training
        shuffle=False  # Important: Keep the order of predictions consistent with ground truth
    )


    predictions = base_model.predict(test_generator)

    # Extract predicted class labels
    predicted_labels_indices = np.argmax(predictions, axis=1)

    # Extract true labels indices
    true_labels_indices = test_generator.labels

    # Calculate accuracy
    accuracy = np.mean(np.array(true_labels_indices) == np.array(predicted_labels_indices))
    top5_accuracy = np.mean([true_label in np.argsort(prediction)[-5:] for true_label, prediction in zip(true_labels_indices, predictions)])
    
    print("Test Accuracy:", accuracy, "at quality", quality)
    print("Top-5 Accuracy:", top5_accuracy, "at quality", quality)

In [2]:

import os
import csv

def get_directory_size(directory):
    total_size = 0
    for dirpath, _, filenames in os.walk(directory):
        for filename in filenames:
            filepath = os.path.join(dirpath, filename)
            total_size += os.path.getsize(filepath)
    return total_size

for quality in range(2, 101, 2):
    # Specify the directory path
    directory_path = f'./data_j2k_{quality}/n01440764/'
    
    # Get the total size of images in the directory
    total_size = get_directory_size(directory_path)
    
    # Print the total size in bytes
    #print("Total size of images in directory:", total_size, "bytes")
    total_size_mb = round(total_size / (1024), 2)
    # Optionally, print the total size in kilobytes, megabytes, etc.    
    #print("Total size of images in directory:", total_size_mb, "KB")  # in megabytes
    #print("Average size of images at quality",quality, " is ", round((total_size_mb/50),2), "KB")  # in megabytes
    output_file = "directory_size_j2k.csv"
    with open(output_file, 'a', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        #csv_writer.writerow(['Directory', 'Size (KB)'])
        csv_writer.writerow([quality, total_size_mb])

    #print("Total size of images in directory:", total_size_mb, "KB")
    #print(quality, total_size_mb)
    print(quality,",", total_size)
    #print("Output written to:", output_file)


2 , 8197
4 , 8197
6 , 8197
8 , 8197
10 , 8197
12 , 8211
14 , 8321
16 , 9265
18 , 13325
20 , 25546
22 , 54589
24 , 105740
26 , 176025
28 , 260414
30 , 353868
32 , 457665
34 , 573051
36 , 694814
38 , 831923
40 , 977905
42 , 1139718
44 , 1314184
46 , 1514707
48 , 1721519
50 , 1930630
52 , 2146534
54 , 2349897
56 , 2540449
58 , 2707851
60 , 2826873
62 , 2917866
64 , 2987081
66 , 3041399
68 , 3083855
70 , 3110748
72 , 3129804
74 , 3138447
76 , 3151780
78 , 3160534
80 , 3165109
82 , 3167816
84 , 3170239
86 , 3171589
88 , 3171842
90 , 3172041
92 , 3172347
94 , 3172347
96 , 3172347
98 , 3172347
100 , 3175680
