In [None]:
%pip install -q ultralytics split-folders[full] gdown

Prepare Image DataSet

In [None]:
!rm -r data output

In [None]:
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# import os
# from concurrent.futures import ThreadPoolExecutor
# from PIL import Image

# def retrieve_adjacency_matrix(csv_file):
#     # Read the CSV file into a DataFrame
#     df = pd.read_csv(csv_file)

#     # Extract the filenames and values from the DataFrame
#     filenames = df['File']
#     labels = df['DX'].tolist()
#     values = df.drop(['File', 'ScanDir ID', 'DX'], axis=1).values


#     adjacency_matrices = []
#     for i in range(len(filenames)):
#         # Retrieve the upper triangle values from the row and drop NaNs
#         upper_triangle_values = values[i][~np.isnan(values[i])]

#         # Calculate the size of the adjacency matrix
#         n = int(np.sqrt(2 * len(upper_triangle_values) + 0.25) - 0.5)

#         # Create a zero-filled matrix
#         adjacency_matrix = np.zeros((n, n))

#         # Fill the upper triangle of the adjacency matrix
#         row = 0
#         for j in range(n):
#             for k in range(j+1, n):
#                 adjacency_matrix[j, k] = upper_triangle_values[row]
#                 row += 1

#         # Transpose the upper triangle to fill the lower triangle
#         adjacency_matrix += adjacency_matrix.T

#         if labels[i] == 0:
#           label = 0
#         else:
#           label = 1

#         # Append the adjacency matrix and its label to the list
#         adjacency_matrices.append((filenames[i], adjacency_matrix, label))

#     return adjacency_matrices
# def save_grayscale_image(filename, matrix, label, output_dir):
#     # Create the label directory if it doesn't exist
#     label_dir = os.path.join(output_dir, str(label))
#     os.makedirs(label_dir, exist_ok=True)

#     matrix[abs(matrix) < 0.5] = 0
#     # Normalize the matrix to the range [0, 255]
#     matrix_normalized = ((matrix - np.min(matrix)) / (np.max(matrix) - np.min(matrix)) * 255).astype(np.uint8)

#     # Convert the matrix to a grayscale image
#     image = Image.fromarray(matrix_normalized, mode='L')

#     # Extract the base filename without extension to use for the image
#     base_filename = os.path.basename(filename).split('.')[0]
#     output_path = os.path.join(label_dir, f"{base_filename}.png")

#     # Save the image
#     image.save(output_path)

# def save_grayscale_images(csv_file, output_dir):
#     adjacency_matrices = retrieve_adjacency_matrix(csv_file)

#     if not os.path.exists(output_dir):
#         os.makedirs(output_dir)

#     with ThreadPoolExecutor() as executor:
#         futures = []
#         for filename, matrix, label in adjacency_matrices:
#             futures.append(executor.submit(save_grayscale_image, filename, matrix, label, output_dir))

#         # Wait for all the futures to complete
#         for future in futures:
#             future.result()

# # Example usage
# csv_file = "/content/drive/MyDrive/DM_project2/full_pearsonr.csv"
# output_dir = "output"
# save_grayscale_images(csv_file, output_dir)

In [None]:
!gdown 1mhiwD2PTG4mN2h6xDjrCH_DmvP3BWmLG

In [None]:
import pandas as pd
import numpy as np
import os
from concurrent.futures import ThreadPoolExecutor
from PIL import Image

def generate_adjacency_matrix(filename, values, label):
    """Generate an adjacency matrix from the upper triangle values."""
    # Retrieve the upper triangle values from the row and drop NaNs
    upper_triangle_values = values[~np.isnan(values)]

    # Calculate the size of the adjacency matrix
    n = int(np.sqrt(2 * len(upper_triangle_values) + 0.25) - 0.5)

    # Create a zero-filled matrix
    adjacency_matrix = np.zeros((n, n))

    # Fill the upper triangle of the adjacency matrix
    triu_indices = np.triu_indices(n)
    adjacency_matrix[triu_indices] = upper_triangle_values

    # Mirror the upper triangle to fill the lower triangle
    adjacency_matrix += adjacency_matrix.T

    # Determine the binary label
    binary_label = 1 if label == 1 else 0

    return filename, adjacency_matrix, binary_label

def save_as_grayscale_image(filename, matrix, label, output_dir):
    """Save the adjacency matrix as a grayscale image in the appropriate label directory."""
    label_dir = os.path.join(output_dir, str(label))
    os.makedirs(label_dir, exist_ok=True)

    # Normalize the matrix to the range [0, 255]
    matrix_normalized = ((matrix - np.min(matrix)) / (np.max(matrix) - np.min(matrix)) * 255).astype(np.uint8)

    # Convert the matrix to a grayscale image
    image = Image.fromarray(matrix_normalized, mode='L')

    # Construct the output file path
    base_filename = os.path.splitext(os.path.basename(filename))[0]
    output_path = os.path.join(label_dir, f"{base_filename}.png")

    # Save the image
    image.save(output_path)

def load_csv_data(csv_file):
    """Load filenames, values, and labels from the CSV file."""
    df = pd.read_csv(csv_file, engine="c")
    filenames = df['File'].tolist()
    labels = df['DX'].tolist()
    values = df.drop(['File', 'ScanDir ID', 'DX'], axis=1).values
    return filenames, values, labels

def process_and_save_image(filename, values, label, output_dir):
    """Process a single row to generate an adjacency matrix and save it as an image."""
    filename, adjacency_matrix, binary_label = generate_adjacency_matrix(filename, values, label)
    save_as_grayscale_image(filename, adjacency_matrix, binary_label, output_dir)

def save_grayscale_images(csv_file, output_dir):
    """Main function to process the CSV file and save all adjacency matrices as grayscale images."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    filenames, values, labels = load_csv_data(csv_file)

    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_and_save_image, filenames[i], values[i], labels[i], output_dir) for i in range(len(filenames))]
        for future in futures:
            future.result()

# Example usage
csv_file = "/kaggle/working/full_pearsonr.csv"
output_dir = "output"
save_grayscale_images(csv_file, output_dir)


In [None]:
!splitfolders --ratio .8 .1 .1 --seed 42 --output ./data output

FineTune Yolo

In [None]:
import ultralytics
ultralytics.checks()

In [None]:
!nvidia-smi

In [None]:
#@title Select YOLOv8 🚀 logger {run: 'auto'}
logger = 'TensorBoard' #@param ['Comet', 'TensorBoard']

if logger == 'Comet':
  %pip install -q comet_ml
  import comet_ml; comet_ml.init()
elif logger == 'TensorBoard':
  %load_ext tensorboard
  %tensorboard --logdir .

In [None]:
!wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n-cls.pt

In [None]:
# Train YOLOv8n on COCO8 for 3 epochs
!yolo mode=train task=classify model=yolov8n-cls.pt data=data/ epochs=15 imgsz=352 batch=128

In [None]:
import pandas as pd
df = pd.read_csv("/kaggle/working/full_pearsonr.csv")

In [None]:
len(df)

In [None]:
df["DX"].value_counts()

In [None]:
69+38+6

In [None]:
!wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-cls.pt

In [None]:
!yolo mode=train task=classify model=yolov8x-cls.pt data=data epochs=500 imgsz=352 batch=32