In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
# modified code from an online source to print out images
import os
import shutil
import random

dataset_base = "/content/gdrive/MyDrive/APS360/Combined_Dataset"
new_base_dir = "/content/gdrive/MyDrive/APS360/splitup"
new_train_dir = os.path.join(new_base_dir, "train")
new_val_dir = os.path.join(new_base_dir, "valid")
new_test_dir = os.path.join(new_base_dir, "test")

os.makedirs(new_train_dir, exist_ok=True)
os.makedirs(new_val_dir, exist_ok=True)
os.makedirs(new_test_dir, exist_ok=True)

classes = ["glioma", "meningioma", "notumor", "pituitary"]

for cls in classes:
    os.makedirs(os.path.join(new_train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(new_val_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(new_test_dir, cls), exist_ok=True)

def collect_files(directory):
    files = []
    for cls in classes:
        cls_dir = os.path.join(directory, cls)
        cls_files = [
            os.path.join(cls_dir, f)
            for f in os.listdir(cls_dir)
            if os.path.isfile(os.path.join(cls_dir, f))
        ]
        files.extend(cls_files)
    return files

all_files = collect_files(dataset_base)
random.shuffle(all_files)

train_split = 0.7
val_split = 0.2
test_split = 0.1

def copy_files(files, destination, count):
    copied_files = []
    for f in files[:count]:
        cls = os.path.basename(os.path.dirname(f))
        dest_dir = os.path.join(destination, cls)
        shutil.copy(f, dest_dir)
        copied_files.append(f)
    return copied_files

for cls in classes:
    cls_files = [f for f in all_files if os.path.basename(os.path.dirname(f)) == cls]
    total_files = len(cls_files)
    train_count = int(total_files * train_split)
    val_count = int(total_files * val_split)
    test_count = total_files - train_count - val_count

    train_files = copy_files(cls_files, new_train_dir, train_count)
    val_files = copy_files(cls_files[train_count:], new_val_dir, val_count)
    test_files = copy_files(cls_files[train_count + val_count:], new_test_dir, test_count)

    print(
        f"{cls} - Train: {len(train_files)}, Validation: {len(val_files)}, Test: {len(test_files)}"
    )

def count_files(directory):
    counts = {}
    for cls in classes:
        cls_dir = os.path.join(directory, cls)
        counts[cls] = len(
            [f for f in os.listdir(cls_dir) if os.path.isfile(os.path.join(cls_dir, f))]
        )
    return counts

train_counts = count_files(new_train_dir)
val_counts = count_files(new_val_dir)
test_counts = count_files(new_test_dir)

total_train_files = sum(train_counts.values())
total_val_files = sum(val_counts.values())
total_test_files = sum(test_counts.values())

print("\nTraining files per class:")
for cls, count in train_counts.items():
    percentage = (count / total_train_files) * 100
    print(f"{cls}: {count} ({percentage:.2f}%)")

print("\nValidation files per class:")
for cls, count in val_counts.items():
    percentage = (count / total_val_files) * 100
    print(f"{cls}: {count} ({percentage:.2f}%)")

print("\nTest files per class:")
for cls, count in test_counts.items():
    percentage = (count / total_test_files) * 100
    print(f"{cls}: {count} ({percentage:.2f}%)")


glioma - Train: 5079, Validation: 1451, Test: 727
meningioma - Train: 4895, Validation: 1398, Test: 701
notumor - Train: 5388, Validation: 1539, Test: 771
pituitary - Train: 5338, Validation: 1525, Test: 763

Training files per class:
glioma: 5079 (24.54%)
meningioma: 4895 (23.65%)
notumor: 5388 (26.03%)
pituitary: 5338 (25.79%)

Validation files per class:
glioma: 1451 (24.54%)
meningioma: 1398 (23.64%)
notumor: 1539 (26.03%)
pituitary: 1525 (25.79%)

Test files per class:
glioma: 727 (24.54%)
meningioma: 701 (23.67%)
notumor: 771 (26.03%)
pituitary: 763 (25.76%)


In [None]:
# modified code from an online source to print out images

te_pi = '/content/gdrive/MyDrive/APS360/splitup/test/pituitary'
te_no = '/content/gdrive/MyDrive/APS360/splitup/test/notumor'
te_me = '/content/gdrive/MyDrive/APS360/splitup/test/meningioma'
te_go = '/content/gdrive/MyDrive/APS360/splitup/test/glioma'

count_te_pi = len([f for f in os.listdir(te_pi) if os.path.isfile(os.path.join(te_pi, f))])
count_te_no = len([f for f in os.listdir(te_no) if os.path.isfile(os.path.join(te_no, f))])
count_te_me = len([f for f in os.listdir(te_me) if os.path.isfile(os.path.join(te_me, f))])
count_te_go = len([f for f in os.listdir(te_go) if os.path.isfile(os.path.join(te_go, f))])

print(f'There are {count_te_pi} files in te_pi folder.')
print(f'There are {count_te_no} files in te_no folder.')
print(f'There are {count_te_me} files in te_me folder.')
print(f'There are {count_te_go} files in te_go folder.')
print(f'Total files in test are {count_te_pi+count_te_no+count_te_me+count_te_go}')

There are 701 files in te_pi folder.
There are 701 files in te_no folder.
There are 701 files in te_me folder.
There are 701 files in te_go folder.
Total files in test are 2804


In [None]:
tr_pi = '/content/gdrive/MyDrive/APS360/splitup/train/pituitary'
tr_no = '/content/gdrive/MyDrive/APS360/splitup/train/notumor'
tr_me = '/content/gdrive/MyDrive/APS360/splitup/train/meningioma'
tr_go = '/content/gdrive/MyDrive/APS360/splitup/train/glioma'

count_tr_pi = len([f for f in os.listdir(tr_pi) if os.path.isfile(os.path.join(tr_pi, f))])
count_tr_no = len([f for f in os.listdir(tr_no) if os.path.isfile(os.path.join(tr_no, f))])
count_tr_me = len([f for f in os.listdir(tr_me) if os.path.isfile(os.path.join(tr_me, f))])
count_tr_go = len([f for f in os.listdir(tr_go) if os.path.isfile(os.path.join(tr_go, f))])

print(f'There are {count_tr_pi} files in tr_pi folder.')
print(f'There are {count_tr_no} files in tr_no folder.')
print(f'There are {count_tr_me} files in tr_me folder.')
print(f'There are {count_tr_go} files in tr_go folder.')
print(f'Total files in train are {count_tr_pi+count_tr_no+count_tr_me+count_tr_go}')

There are 4895 files in tr_pi folder.
There are 4895 files in tr_no folder.
There are 4895 files in tr_me folder.
There are 4895 files in tr_go folder.
Total files in train are 19580


In [None]:
va_pi = '/content/gdrive/MyDrive/APS360/splitup/valid/pituitary'
va_no = '/content/gdrive/MyDrive/APS360/splitup/valid/notumor'
va_me = '/content/gdrive/MyDrive/APS360/splitup/valid/meningioma'
va_go = '/content/gdrive/MyDrive/APS360/splitup/valid/glioma'

count_va_pi = len([f for f in os.listdir(va_pi) if os.path.isfile(os.path.join(va_pi, f))])
count_va_no = len([f for f in os.listdir(va_no) if os.path.isfile(os.path.join(va_no, f))])
count_va_me = len([f for f in os.listdir(va_me) if os.path.isfile(os.path.join(va_me, f))])
count_va_go = len([f for f in os.listdir(va_go) if os.path.isfile(os.path.join(va_go, f))])

print(f'There are {count_va_pi} files in va_pi folder.')
print(f'There are {count_va_no} files in va_no folder.')
print(f'There are {count_va_me} files in va_me folder.')
print(f'There are {count_va_go} files in va_go folder.')
print(f'Total files in valid are {count_va_pi+count_va_no+count_va_me+count_va_go}')

There are 1398 files in va_pi folder.
There are 1398 files in va_no folder.
There are 1398 files in va_me folder.
There are 1398 files in va_go folder.
Total files in valid are 5592


In [None]:
print(f'The train ratio is {19580/(19580+5592+2804)}')
print(f'The valid ratio is {5592/(19580+5592+2804)}')
print(f'The test ratio is {2804/(19580+5592+2804)}')

The train ratio is 0.6998856162424936
The valid ratio is 0.19988561624249357
The test ratio is 0.10022876751501286


In [None]:
# modified code from an online source to print out images

import os
folder_path = '/content/gdrive/MyDrive/APS360/splitup/test/glioma'
num_files_to_remove = 26

files = os.listdir(folder_path)

for i, file_name in enumerate(files):
    if i >= num_files_to_remove:
        break
    file_path = os.path.join(folder_path, file_name)
    os.remove(file_path)
    print(f"Removed: {file_path}")

Removed: /content/gdrive/MyDrive/APS360/splitup/test/glioma/Tr-gl_0688_jpg.rf.e7c71871d88ea6939262c0984ad12c0e.jpg
Removed: /content/gdrive/MyDrive/APS360/splitup/test/glioma/Tr-gl_0739_jpg.rf.4804526f0c218b8eb6c6f36dfdfe4aff.jpg
Removed: /content/gdrive/MyDrive/APS360/splitup/test/glioma/Te-glTr_0009_jpg.rf.d73426c7101c393ee766eab4272444bc.jpg
Removed: /content/gdrive/MyDrive/APS360/splitup/test/glioma/Tr-gl_0172_jpg.rf.7992e59e1a5175d3b2b622a14ab39ffc.jpg
Removed: /content/gdrive/MyDrive/APS360/splitup/test/glioma/Tr-gl_0348_jpg.rf.5b8ac55a7212b6f9fa55aae884d63fee.jpg
Removed: /content/gdrive/MyDrive/APS360/splitup/test/glioma/Tr-gl_0627_jpg.rf.30a0510615d57991508cd405c12637ea.jpg
Removed: /content/gdrive/MyDrive/APS360/splitup/test/glioma/Tr-gl_1081_jpg.rf.ad064d078efd5dc12384aeb8b71e95a7.jpg
Removed: /content/gdrive/MyDrive/APS360/splitup/test/glioma/Tr-gl_1239_jpg.rf.792e9fc87cd8cb3d00ded1312e806e99.jpg
Removed: /content/gdrive/MyDrive/APS360/splitup/test/glioma/Tr-gl_0948_jpg.rf.