In [None]:
import os
import zipfile
import numpy as np
import pandas as pd

from PIL import Image
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [None]:
def unzip_images(zip_path, extract_to):

    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

In [None]:
def load_and_resize_images(image_paths, target_size):
    images = []
    original_images = []
    original_indices = []

    for idx, path in enumerate(image_paths):
        try:
            img = Image.open(path).convert('L')
            img_resized = img.resize(target_size)
            img_array = np.array(img_resized).flatten()
            images.append(img_array)
            original_images.append(img)
            original_indices.append(idx)
        except Exception as e:
            print(f"Error in uploading image {path}: {e}")

    return np.array(images), original_images, original_indices

In [None]:
def save_selected_frames(selected_frames, original_images, original_indices, output_dir, tac_name):
    tac_output_dir = os.path.join(output_dir, tac_name)
    os.makedirs(tac_output_dir, exist_ok=True)

    for idx, frame_idx in enumerate(selected_frames):
        img = original_images[frame_idx]
        original_index = original_indices[frame_idx] + 1
        output_path = os.path.join(tac_output_dir, f"Slice{original_index:04d}.jpg")
        img.save(output_path)

In [None]:
def save_frame_features(features, frame_indices, original_indices, output_dir, tac_name):
    tac_output_dir = os.path.join(output_dir, tac_name)
    os.makedirs(tac_output_dir, exist_ok=True)
    feature_file_path = os.path.join(tac_output_dir, "SliceFeatures.csv")
    features_df = pd.DataFrame(features[frame_indices])
    features_df['original_index'] = [original_indices[i] + 1 for i in frame_indices]
    features_df.to_csv(feature_file_path, index=False)

In [None]:
tac_directory = '' # input dir containing scans

output_directory = '' # output dir for PCA-reduced groups of 5 slices per scan

os.makedirs(output_directory, exist_ok=True)

In [None]:
target_size = (224, 224)

all_images = []
all_original_images = []
all_original_indices = []
tac_names = []

for tac in os.listdir(tac_directory):
    tac_path = os.path.join(tac_directory, tac)
    if os.path.isdir(tac_path):
        zip_file_path = None

        for file in os.listdir(tac_path):
            if file.endswith('.zip'):
                zip_file_path = os.path.join(tac_path, file)
                break

        if zip_file_path:
            extract_path = os.path.join(tac_path, 'Extracted')
            os.makedirs(extract_path, exist_ok=True)
            unzip_images(zip_file_path, extract_path)

            image_paths = [os.path.join(extract_path, img) for img in os.listdir(extract_path) if img.endswith('.jpg')]
            if image_paths:
                tac_images, original_images, original_indices = load_and_resize_images(image_paths, target_size)

                variances = np.var(tac_images, axis=1)
                high_variance_indices = np.where(variances > np.percentile(variances, 20))[0]
                tac_images = tac_images[high_variance_indices]
                original_images = [original_images[i] for i in high_variance_indices]
                original_indices = [original_indices[i] for i in high_variance_indices]

                all_images.append(tac_images)
                all_original_images.append(original_images)
                all_original_indices.append(original_indices)
                tac_names.append(tac)

for tac_images, original_images, original_indices, tac_name in zip(all_images, all_original_images, all_original_indices, tac_names):
    if len(tac_images) > 0:
        scaler = StandardScaler()
        tac_images_scaled = scaler.fit_transform(tac_images)

        pca = PCA(n_components=0.95)
        tac_images_pca = pca.fit_transform(tac_images_scaled)

        explained_variances = np.var(tac_images_pca, axis=1)
        num_top_frames = 5
        top_frame_indices = np.argsort(explained_variances)[-num_top_frames:]

        save_selected_frames(top_frame_indices, original_images, original_indices, output_directory, tac_name)
        save_frame_features(tac_images_pca, top_frame_indices, original_indices, output_directory, tac_name)

print("Slices and features selected and stored as new data.")