# File Types

In [1]:
import os

def get_unique_filetypes(directory):
    filetypes = set()

    # Walk through the directory
    for root, _, files in os.walk(directory):
        for file in files:
            # Get the file extension and add it to the set
            _, extension = os.path.splitext(file)
            filetypes.add(extension.lower())

    return filetypes

# Example usage
directory_path = './'
unique_filetypes = get_unique_filetypes(directory_path)

print("Unique file types:")
for filetype in unique_filetypes:
    print(filetype if filetype else "No extension")

Unique file types:
No extension
.png
.meta
.txt
.docx
.py
.tif
.ipynb
.gz
.pptx
.html
.pdf


# Image Dimensions

In [2]:
import os
import numpy as np
from PIL import Image
import tifffile as tiff

def get_image_dimensions(directory):
    dimensions = set()

    # Walk through the directory
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith('.png'):
                file_path = os.path.join(root, file)
                try:
                    with Image.open(file_path) as img:
                        array = np.array(img)
                        dimensions.add(array.shape)
                        
                except Exception as e:
                    print(f"Could not process file {file_path}: {e}")

            elif file.lower().endswith(('.tif', '.tiff')):
                file_path = os.path.join(root, file)
                try:
                    with tiff.TiffFile(file_path) as img:
                        array = img.asarray()
                        dimensions.add(array.shape)
                        
                except Exception as e:
                    print(f"Could not process file {file_path}: {e}")

    return dimensions

# Example usage
directory_path = './'
unique_dimensions = get_image_dimensions(directory_path)

print("Unique image dimensions:")
for dim in unique_dimensions:
    print(dim)
    
print("\n2D image dimensions:")
for dim in unique_dimensions:
    print(tuple(filter(lambda x: x >= 30, dim)))
    

Unique image dimensions:
(14, 3, 104, 689)
(260, 940, 3)
(162, 1020)
(260, 941, 3)
(14, 3, 91, 563)
(10, 3, 107, 762)
(789, 943, 3)
(14, 3, 109, 731)
(12, 3, 110, 697)
(155, 1025)
(13, 3, 162, 1020)
(14, 3, 93, 634)
(18, 3, 89, 737)
(789, 1339, 3)
(14, 3, 76, 611)
(260, 942, 3)
(14, 3, 172, 848)
(11, 3, 113, 728)

2D image dimensions:
(104, 689)
(260, 940)
(162, 1020)
(260, 941)
(91, 563)
(107, 762)
(789, 943)
(109, 731)
(110, 697)
(155, 1025)
(162, 1020)
(93, 634)
(89, 737)
(789, 1339)
(76, 611)
(260, 942)
(172, 848)
(113, 728)


All the 2D images have height <= **172** (the exceptions of sizes (789,1339) and (789, 943) are outputs from tubulaton which aren't cropped yet)

So we can make the net take inputs of size **256 x 256** (to allow for higher resolution data later on)

In [16]:
import os
from PIL import Image
import numpy as np

def create_processed_folder(original_folder):
    base_dir = os.path.dirname(original_folder)
    new_folder = os.path.join(base_dir, os.path.basename(original_folder) + '-Processed')
    if not os.path.exists(new_folder):
        os.makedirs(new_folder)
    return new_folder

def process_image(image_path, output_path):
    # Open the image
    with Image.open(image_path) as img:
        # Convert image to numpy array
        img_array = np.array(img).astype(float)

        # Linearly rescale to make the brightest pixels have intensity 255
        img_min = img_array.min()
        img_max = img_array.max()
        img_rescaled = 255. * (img_array - img_min) / (img_max - img_min)
        img_rescaled = img_rescaled.astype(np.uint8)

        # Convert numpy array back to image
        img_processed = Image.fromarray(img_rescaled)

        # Save the processed image
        img_processed.save(output_path)

def process_images_in_folder(original_folder, processed_folder):
    for root, _, files in os.walk(original_folder):
        for file in files:
            if file.lower().endswith('.png'):
                original_file_path = os.path.join(root, file)
                relative_path = os.path.relpath(root, original_folder)
                processed_file_dir = os.path.join(processed_folder, relative_path)
                
                if not os.path.exists(processed_file_dir):
                    os.makedirs(processed_file_dir)
                
                processed_file_path = os.path.join(processed_file_dir, file)
                process_image(original_file_path, processed_file_path)

# Define paths
original_folder = '/Users/Karan/Microtubules/ExperimentalIndividualImages/Control'
processed_folder = create_processed_folder(original_folder)

# Process the images
process_images_in_folder(original_folder, processed_folder)

print(f"Processed images are saved in {processed_folder}")

Processed images are saved in /Users/Karan/Microtubules/ExperimentalIndividualImages/Control-Processed
