## Group 4 Deep Learning Project Developing Robust Models for Natural Distribution Shift

Please download the dataset from [this link: SDNET2018](https://digitalcommons.usu.edu/all_datasets/48/) before running the code.

In [1]:
import zipfile
import os
import numpy as np
from PIL import Image
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from joblib import Parallel, delayed  # Import Joblib for parallel processing
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm

2024-11-10 00:52:29.616597: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-10 00:52:30.260475: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Function to assign labels based on filenames
def assign_labels(image_paths):
    return np.array([1 if 'c' in img.split('/')[1].lower() else 0 for img in image_paths], dtype=np.int32)

# Function to load and preprocess a single image
def load_image_from_zip(zip_path, img_path, idx, total_images):
    with zipfile.ZipFile(zip_path, 'r') as zip_file:
        with zip_file.open(img_path) as file:
            img = Image.open(file)
            img = img.resize((64, 64))  # Resize to 64x64
            img = img.convert('RGB')    # Ensure image has 3 channels
            img = np.array(img) / 255.0  # Normalize pixel values
            return img

# Function to process images in batches and write to memory-mapped arrays
def process_images_in_batches(zip_path, image_filenames, images_mmap_path, labels_mmap_path, batch_size=5000, n_jobs=4):
    total_images = len(image_filenames)
    image_shape = (64, 64, 3)  # Updated image shape
    dtype = 'float32'

    # Create memory-mapped arrays for images and labels
    images_mmap = np.memmap(images_mmap_path, dtype=dtype, mode='w+', shape=(total_images, *image_shape))
    labels_mmap = np.memmap(labels_mmap_path, dtype='int32', mode='w+', shape=(total_images,))

    for start_idx in range(0, total_images, batch_size):
        end_idx = min(start_idx + batch_size, total_images)
        batch_image_paths = image_filenames[start_idx:end_idx]

        print(f"\nProcessing images {start_idx + 1} to {end_idx} of {total_images}...")

        # Load images in parallel with a progress bar
        images_batch = Parallel(n_jobs=n_jobs)(
            delayed(load_image_from_zip)(zip_path, img_path, idx + start_idx, total_images) 
            for idx, img_path in enumerate(tqdm(batch_image_paths, desc="Loading Images", leave=False))
        )
        images_batch = np.array(images_batch)

        # Assign labels for the batch
        labels_batch = assign_labels(batch_image_paths)

        # Write to memory-mapped arrays
        images_mmap[start_idx:end_idx] = images_batch
        labels_mmap[start_idx:end_idx] = labels_batch

        # Clean up to free memory
        del images_batch
        del labels_batch

        # Flush changes to disk
        images_mmap.flush()
        labels_mmap.flush()

    # Close memory-mapped arrays
    del images_mmap
    del labels_mmap
    print(f"Finished processing {total_images} images.")

# Main code to process 'D' and 'P' folders
def main():
    zip_path = 'SDNET2018.zip'
    batch_size = 5000  # Adjust based on available memory
    n_jobs = 4         # Adjust based on system capabilities

    # Extract all filenames from the zip
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        all_filenames = zip_ref.namelist()

    # Function to filter image filenames from a folder
    def get_image_filenames(folder):
        print(f"Filtering images from folder '{folder}'...")
        return [f for f in all_filenames if f.startswith(folder) and f.lower().endswith(('.jpg', '.jpeg', '.png'))]

    # Process 'D' folder
    folder_d = 'D'
    image_filenames_d = get_image_filenames(folder_d)
    print(f"Found {len(image_filenames_d)} images in folder '{folder_d}'.")

    process_images_in_batches(
        zip_path,
        image_filenames_d,
        images_mmap_path='images_folder_d_mmap.npy',
        labels_mmap_path='labels_folder_d_mmap.npy',
        batch_size=batch_size,
        n_jobs=n_jobs
    )

    # Process 'P' folder
    folder_p = 'P'
    image_filenames_p = get_image_filenames(folder_p)
    print(f"Found {len(image_filenames_p)} images in folder '{folder_p}'.")

    process_images_in_batches(
        zip_path,
        image_filenames_p,
        images_mmap_path='images_folder_p_mmap.npy',
        labels_mmap_path='labels_folder_p_mmap.npy',
        batch_size=batch_size,
        n_jobs=n_jobs
    )

    # Return filenames for further use
    return image_filenames_d, image_filenames_p

# Run the main function and capture the returned filenames
if __name__ == "__main__":
    image_filenames_d, image_filenames_p = main()

Filtering images from folder 'D'...
Found 13620 images in folder 'D'.

Processing images 1 to 5000 of 13620...


                                                                   


Processing images 5001 to 10000 of 13620...


                                                                   


Processing images 10001 to 13620 of 13620...


                                                                   

Finished processing 13620 images.
Filtering images from folder 'P'...
Found 24334 images in folder 'P'.

Processing images 1 to 5000 of 24334...


                                                                   


Processing images 5001 to 10000 of 24334...


                                                                   


Processing images 10001 to 15000 of 24334...


                                                                   


Processing images 15001 to 20000 of 24334...


                                                                   


Processing images 20001 to 24334 of 24334...


                                                                   

Finished processing 24334 images.


In [3]:
# Load the processed data using memory-mapped mode
total_images_d = 13620
images_d = np.memmap('images_folder_d_mmap.npy', dtype='float32', mode='r', shape=(total_images_d, 64, 64, 3))
labels_d = np.memmap('labels_folder_d_mmap.npy', dtype='int32', mode='r', shape=(total_images_d,))

total_images_p = 24334
images_p = np.memmap('images_folder_p_mmap.npy', dtype='float32', mode='r', shape=(total_images_p, 64, 64, 3))
labels_p = np.memmap('labels_folder_p_mmap.npy', dtype='int32', mode='r', shape=(total_images_p,))


In [4]:
from collections import Counter

In [5]:
# Define the train-test split ratio
train_ratio = 0.8
batch_size = 32

# Function to stratify and split indices for large memory-mapped arrays
def stratified_split_indices(labels, train_ratio, random_state=42):
    # Get indices for each class
    class_0_indices = np.where(labels == 0)[0]
    class_1_indices = np.where(labels == 1)[0]

    # Shuffle indices for randomness
    np.random.seed(random_state)
    np.random.shuffle(class_0_indices)
    np.random.shuffle(class_1_indices)

    # Split the indices into training and testing
    train_class_0_idx = int(train_ratio * len(class_0_indices))
    train_class_1_idx = int(train_ratio * len(class_1_indices))

    train_indices = np.concatenate([
        class_0_indices[:train_class_0_idx],
        class_1_indices[:train_class_1_idx]
    ])
    test_indices = np.concatenate([
        class_0_indices[train_class_0_idx:],
        class_1_indices[train_class_1_idx:]
    ])

    return train_indices, test_indices

# Apply the stratified split function to both datasets
print("Performing stratified split for the 'P' dataset...")
train_indices_p, test_indices_p = stratified_split_indices(labels_p, train_ratio)

print("Performing stratified split for the 'D' dataset...")
train_indices_d, test_indices_d = stratified_split_indices(labels_d, train_ratio)

# Function to generate batches dynamically from memory-mapped arrays
def data_generator(images, labels, indices, batch_size=32):
    num_samples = len(indices)
    while True:
        # Shuffle the indices for each epoch
        np.random.shuffle(indices)
        # Yield only full batches
        for start_idx in range(0, num_samples - batch_size + 1, batch_size):
            batch_indices = indices[start_idx:start_idx + batch_size]
            # Read images and labels for this batch
            batch_images = images[batch_indices]
            batch_labels = labels[batch_indices]
            yield batch_images, batch_labels

# Create training and testing generators
# Generators for the 'P' dataset
train_gen_p = data_generator(images_p, labels_p, train_indices_p, batch_size=batch_size)
test_gen_p = data_generator(images_p, labels_p, test_indices_p, batch_size=batch_size)

# Generators for the 'D' dataset
train_gen_d = data_generator(images_d, labels_d, train_indices_d, batch_size=batch_size)
test_gen_d = data_generator(images_d, labels_d, test_indices_d, batch_size=batch_size)

# Create TensorFlow datasets from generators
train_dataset_p = tf.data.Dataset.from_generator(
    lambda: train_gen_p,
    output_signature=(
        tf.TensorSpec(shape=(batch_size, 64, 64, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(batch_size,), dtype=tf.int32)
    )
)

train_dataset_d = tf.data.Dataset.from_generator(
    lambda: train_gen_d,
    output_signature=(
        tf.TensorSpec(shape=(batch_size, 64, 64, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(batch_size,), dtype=tf.int32)
    )
)

# Combine training datasets
train_dataset = train_dataset_p.concatenate(train_dataset_d).prefetch(tf.data.AUTOTUNE)

# Testing datasets
test_dataset_p = tf.data.Dataset.from_generator(
    lambda: test_gen_p,
    output_signature=(
        tf.TensorSpec(shape=(batch_size, 64, 64, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(batch_size,), dtype=tf.int32)
    )
).prefetch(tf.data.AUTOTUNE)

test_dataset_d = tf.data.Dataset.from_generator(
    lambda: test_gen_d,
    output_signature=(
        tf.TensorSpec(shape=(batch_size, 64, 64, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(batch_size,), dtype=tf.int32)
    )
).prefetch(tf.data.AUTOTUNE)

print("Datasets are ready:")
print("Training dataset (combined):", train_dataset)
print("Test dataset (P only):", test_dataset_p)
print("Test dataset (D only):", test_dataset_d)

# Check the number of samples
num_samples_p = len(train_indices_p)
num_samples_d = len(train_indices_d)
total_samples = num_samples_p + num_samples_d

print(f"Number of samples in the combined training dataset: {total_samples}")

# Verify the new class distributions
train_labels_p = labels_p[train_indices_p]
train_labels_d = labels_d[train_indices_d]
test_labels_p = labels_p[test_indices_p]
test_labels_d = labels_d[test_indices_d]

print("Training labels distribution (P):", Counter(train_labels_p))
print("Test labels distribution (P):", Counter(test_labels_p))
print("Training labels distribution (D):", Counter(train_labels_d))
print("Test labels distribution (D):", Counter(test_labels_d))

# Define subset size for Set B (adjust as needed)
subset_size_per_class = 1000

# Extract indices for class 0 and class 1 from both datasets (P and D)
class_0_indices_p = train_indices_p[train_labels_p == 0]
class_1_indices_p = train_indices_p[train_labels_p == 1]

class_0_indices_d = train_indices_d[train_labels_d == 0]
class_1_indices_d = train_indices_d[train_labels_d == 1]

# Shuffle indices for randomness
np.random.seed(42)
np.random.shuffle(class_0_indices_p)
np.random.shuffle(class_1_indices_p)
np.random.shuffle(class_0_indices_d)
np.random.shuffle(class_1_indices_d)

# Select a subset of each class from P and D
subset_class_0_p = class_0_indices_p[:subset_size_per_class // 2]
subset_class_1_p = class_1_indices_p[:subset_size_per_class // 2]

subset_class_0_d = class_0_indices_d[:subset_size_per_class // 2]
subset_class_1_d = class_1_indices_d[:subset_size_per_class // 2]

# Combine subsets to create a balanced training set for Set B
train_indices_b_p = np.concatenate([subset_class_0_p, subset_class_1_p])
train_indices_b_d = np.concatenate([subset_class_0_d, subset_class_1_d])
total_samples_b = len(train_indices_b_p) + len(train_indices_b_d)

# Now calculate steps_per_epoch_b
steps_per_epoch_b = total_samples_b // batch_size

# Calculate the steps per epoch for training and validation
steps_per_epoch_p = len(train_indices_p) // batch_size
steps_per_epoch_d = len(train_indices_d) // batch_size
steps_per_epoch = steps_per_epoch_p + steps_per_epoch_d
validation_steps_p = len(test_indices_p) // batch_size
validation_steps_d = len(test_indices_d) // batch_size

# Function to generate batches for Set B
def data_generator_set_b(images_p, images_d, labels_p, labels_d, indices_p, indices_d, batch_size=32):
    num_samples_p = len(indices_p)
    num_samples_d = len(indices_d)
    total_samples = num_samples_p + num_samples_d

    images_list = [images_p, images_d]
    labels_list = [labels_p, labels_d]
    indices_list = [indices_p, indices_d]

    while True:
        # Combine and shuffle indices
        combined_indices = np.concatenate([
            np.column_stack((np.zeros(len(indices_p), dtype=int), indices_p)),
            np.column_stack((np.ones(len(indices_d), dtype=int), indices_d))
        ])
        np.random.shuffle(combined_indices)

        # Yield only full batches
        for start_idx in range(0, total_samples - batch_size + 1, batch_size):
            batch_info = combined_indices[start_idx:start_idx + batch_size]

            batch_images = []
            batch_labels = []

            for data_source, idx in batch_info:
                data_source = int(data_source)
                idx = int(idx)
                img = images_list[data_source][idx]
                label = labels_list[data_source][idx]
                batch_images.append(img)
                batch_labels.append(label)

            batch_images = np.array(batch_images)
            batch_labels = np.array(batch_labels)
            yield batch_images, batch_labels

# Create generators for Set B
train_gen_b = data_generator_set_b(
    images_p, images_d, labels_p, labels_d,
    train_indices_b_p, train_indices_b_d,
    batch_size=batch_size
)

# Create training dataset for Set B using generator
train_dataset_b = tf.data.Dataset.from_generator(
    lambda: train_gen_b,
    output_signature=(
        tf.TensorSpec(shape=(batch_size, 64, 64, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(batch_size,), dtype=tf.int32)
    )
).prefetch(tf.data.AUTOTUNE)

# Use the entire training set from the D dataset for Set C
train_dataset_c = train_dataset_d.prefetch(tf.data.AUTOTUNE)

print(f"Set B training dataset size: {total_samples_b} samples")
print(f"Set C training dataset size: {len(train_indices_d)} samples")

Performing stratified split for the 'P' dataset...
Performing stratified split for the 'D' dataset...
Datasets are ready:
Training dataset (combined): <_PrefetchDataset element_spec=(TensorSpec(shape=(32, 64, 64, 3), dtype=tf.float32, name=None), TensorSpec(shape=(32,), dtype=tf.int32, name=None))>
Test dataset (P only): <_PrefetchDataset element_spec=(TensorSpec(shape=(32, 64, 64, 3), dtype=tf.float32, name=None), TensorSpec(shape=(32,), dtype=tf.int32, name=None))>
Test dataset (D only): <_PrefetchDataset element_spec=(TensorSpec(shape=(32, 64, 64, 3), dtype=tf.float32, name=None), TensorSpec(shape=(32,), dtype=tf.int32, name=None))>
Number of samples in the combined training dataset: 30362
Training labels distribution (P): Counter({0: 17380, 1: 2086})
Test labels distribution (P): Counter({0: 4346, 1: 522})
Training labels distribution (D): Counter({0: 9276, 1: 1620})
Test labels distribution (D): Counter({0: 2319, 1: 405})
Set B training dataset size: 2000 samples
Set C training da

2024-11-10 01:35:49.115093: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1636] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31013 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:af:00.0, compute capability: 7.0


In [6]:

# Simplified CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification output
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model using the combined training dataset
print("Starting model training...")
history = model.fit(
    train_dataset, 
    epochs=2, 
    steps_per_epoch=steps_per_epoch,
    validation_data=test_dataset_p,
    validation_steps=validation_steps_p
)

# Evaluate the model on the D-only test set
test_loss, test_accuracy = model.evaluate(
    test_dataset_d, 
    steps=validation_steps_d
)
print(f"Test accuracy on D-only dataset: {test_accuracy}")

# Evaluate the model on the shifted test set (P-only)
print("Evaluating on the shifted test set...")
shifted_test_loss, shifted_test_accuracy = model.evaluate(
    test_dataset_p,
    steps=validation_steps_p
)
print(f"Test accuracy on shifted dataset (P): {shifted_test_accuracy}")

# Define subset size for Set B (adjust as needed)
subset_size_per_class = 1000

# Extract indices for class 0 and class 1 from both datasets (P and D)
class_0_indices_p = train_indices_p[train_labels_p == 0]
class_1_indices_p = train_indices_p[train_labels_p == 1]

class_0_indices_d = train_indices_d[train_labels_d == 0]
class_1_indices_d = train_indices_d[train_labels_d == 1]

# Shuffle indices for randomness
np.random.seed(42)
np.random.shuffle(class_0_indices_p)
np.random.shuffle(class_1_indices_p)
np.random.shuffle(class_0_indices_d)
np.random.shuffle(class_1_indices_d)

# Select a subset of each class from P and D
subset_class_0_p = class_0_indices_p[:subset_size_per_class // 2]
subset_class_1_p = class_1_indices_p[:subset_size_per_class // 2]

subset_class_0_d = class_0_indices_d[:subset_size_per_class // 2]
subset_class_1_d = class_1_indices_d[:subset_size_per_class // 2]

# Combine subsets to create a balanced training set for Set B
train_indices_b_p = np.concatenate([subset_class_0_p, subset_class_1_p])
train_indices_b_d = np.concatenate([subset_class_0_d, subset_class_1_d])
total_samples_b = len(train_indices_b_p) + len(train_indices_b_d)

# Function to generate batches for Set B
def data_generator_set_b(images_p, images_d, labels_p, labels_d, indices_p, indices_d, batch_size=32):
    num_samples_p = len(indices_p)
    num_samples_d = len(indices_d)
    total_samples = num_samples_p + num_samples_d

    images_list = [images_p, images_d]
    labels_list = [labels_p, labels_d]
    indices_list = [indices_p, indices_d]

    while True:
        # Combine and shuffle indices
        combined_indices = np.concatenate([
            np.column_stack((np.zeros(len(indices_p), dtype=int), indices_p)),
            np.column_stack((np.ones(len(indices_d), dtype=int), indices_d))
        ])
        np.random.shuffle(combined_indices)

        # Adjusted loop to exclude incomplete batches
        for start_idx in range(0, total_samples - batch_size + 1, batch_size):
            batch_info = combined_indices[start_idx:start_idx + batch_size]

            batch_images = []
            batch_labels = []

            for data_source, idx in batch_info:
                data_source = int(data_source)
                idx = int(idx)
                img = images_list[data_source][idx]
                label = labels_list[data_source][idx]
                batch_images.append(img)
                batch_labels.append(label)

            batch_images = np.array(batch_images)
            batch_labels = np.array(batch_labels)
            yield batch_images, batch_labels

# Create generators for Set B
train_gen_b = data_generator_set_b(
    images_p, images_d, labels_p, labels_d,
    train_indices_b_p, train_indices_b_d,
    batch_size=batch_size
)

# Create training dataset for Set B using generator
train_dataset_b = tf.data.Dataset.from_generator(
    lambda: train_gen_b,
    output_signature=(
        tf.TensorSpec(shape=(batch_size, 64, 64, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(batch_size,), dtype=tf.int32)
    )
).prefetch(tf.data.AUTOTUNE)

# Use the entire training set from the D dataset for Set C
train_dataset_c = train_dataset_d.prefetch(tf.data.AUTOTUNE)

print(f"Set B training dataset size: {total_samples_b} samples")
print(f"Set C training dataset size: {len(train_indices_d)} samples")

# Training with Set B
print("Training model on Set B (Small/Diverse)...")
steps_per_epoch_b = total_samples_b // batch_size
history_b = model.fit(
    train_dataset_b,
    epochs=2,  # Adjust as needed
    steps_per_epoch=steps_per_epoch_b,
    validation_data=test_dataset_p,
    validation_steps=validation_steps_p
)

# Training with Set C
print("Training model on Set C (Small/Standard)...")
history_c = model.fit(
    train_dataset_c,
    epochs=2,  # Adjust as needed
    steps_per_epoch=steps_per_epoch_d,
    validation_data=test_dataset_p,
    validation_steps=validation_steps_p
)

# Validate Set B model on the standard test set (D-only)
print("Evaluating Set B model on D-only test set...")
test_loss_b_d, test_accuracy_b_d = model.evaluate(
    test_dataset_d,
    steps=validation_steps_d
)
print(f"Test accuracy of Set B model on D-only dataset: {test_accuracy_b_d}")

# Validate Set B model on the shifted test set (P-only)
print("Evaluating Set B model on shifted test set...")
test_loss_b_shifted, test_accuracy_b_shifted = model.evaluate(
    test_dataset_p,
    steps=validation_steps_p
)
print(f"Test accuracy of Set B model on shifted dataset (P): {test_accuracy_b_shifted}")

# Validate Set C model on the standard test set (D-only)
print("Evaluating Set C model on D-only test set...")
test_loss_c_d, test_accuracy_c_d = model.evaluate(
    test_dataset_d,
    steps=validation_steps_d
)
print(f"Test accuracy of Set C model on D-only dataset: {test_accuracy_c_d}")

# Validate Set C model on the shifted test set (P-only)
print("Evaluating Set C model on shifted test set...")
test_loss_c_shifted, test_accuracy_c_shifted = model.evaluate(
    test_dataset_p,
    steps=validation_steps_p
)
print(f"Test accuracy of Set C model on shifted dataset (P): {test_accuracy_c_shifted}")

Starting model training...
Epoch 1/2


2024-11-10 01:35:50.455220: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8904
2024-11-10 01:35:51.532738: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fcdd7662980 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-11-10 01:35:51.532764: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Tesla V100-SXM2-32GB, Compute Capability 7.0
2024-11-10 01:35:51.652852: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-11-10 01:35:52.519313: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/2
Test accuracy on D-only dataset: 0.8441176414489746
Evaluating on the shifted test set...
Test accuracy on shifted dataset (P): 0.9103618264198303
Set B training dataset size: 2000 samples
Set C training dataset size: 10896 samples
Training model on Set B (Small/Diverse)...
Epoch 1/2
Epoch 2/2
Training model on Set C (Small/Standard)...
Epoch 1/2
Epoch 2/2
Evaluating Set B model on D-only test set...
Test accuracy of Set B model on D-only dataset: 0.8540441393852234
Evaluating Set B model on shifted test set...
Test accuracy of Set B model on shifted dataset (P): 0.8978207111358643
Evaluating Set C model on D-only test set...
Test accuracy of Set C model on D-only dataset: 0.8511029481887817
Evaluating Set C model on shifted test set...
Test accuracy of Set C model on shifted dataset (P): 0.9006990194320679
