In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from tqdm import tqdm
from PIL import Image
from torchvision.models import densenet121
import cv2
import torchvision.models as models

In [None]:
# Define CLAHE Transform class
class CLAHETransform:
    def __init__(self, clip_limit=0.10, tile_grid_size=(8, 8)):
        self.clip_limit = clip_limit
        self.tile_grid_size = tile_grid_size
        self.clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size)

    def __call__(self, img):
        if isinstance(img, Image.Image):
            img = np.array(img)
        if img.ndim == 3:
            lab_img = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
            l_channel, a_channel, b_channel = cv2.split(lab_img)
            l_channel = self.clahe.apply(l_channel)
            lab_img = cv2.merge((l_channel, a_channel, b_channel))
            img = cv2.cvtColor(lab_img, cv2.COLOR_LAB2RGB)
        else:
            img = self.clahe.apply(img)
        return Image.fromarray(img.astype('uint8'))

# Define the transform pipeline
transform = transforms.Compose([
    transforms.Resize(256),
    CLAHETransform(clip_limit=0.35, tile_grid_size=(8, 8)),
    transforms.CenterCrop(256),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class DenseNetModel(nn.Module):
    def __init__(self, out_size=14):
        super(DenseNetModel, self).__init__()
        self.densenet121 = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
        num_ftrs = self.densenet121.classifier.in_features
        self.densenet121.classifier = nn.Sequential(
            nn.Linear(num_ftrs, out_size),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.densenet121(x)

# Load your trained DenseNet model
model_path = "path_to_model.pth"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DenseNetModel().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model = model.to(device)
model.eval()

In [None]:
# Base path for disease-specific images
BASE_PATH = r'C:\mimic-cxr-jpg\t-sne-3-dataset'

In [None]:
# Function to extract features
def extract_features(img_path):
    img = Image.open(img_path).convert('RGB')
    img = transform(img).unsqueeze(0).to(device)  # Apply transformations and add batch dimension
    with torch.no_grad():
        features = model(img)  # Pass the image through the model
    return features.cpu().numpy().flatten()  # Convert to NumPy array

# Initialize lists to hold features and labels
all_features = []
all_labels = []

In [None]:
# List of diseases based on the folder names in the base path
disease_folders = [disease for disease in os.listdir(BASE_PATH) if os.path.isdir(os.path.join(BASE_PATH, disease))]

# Loop through each disease folder and extract features
for i, disease in enumerate(tqdm(disease_folders, desc="Processing Diseases")):
    disease_folder_path = os.path.join(BASE_PATH, disease)
    image_files = [os.path.join(disease_folder_path, img) for img in os.listdir(disease_folder_path)
                   if img.lower().endswith(('.jpg', '.jpeg', '.png')) and os.path.isfile(os.path.join(disease_folder_path, img))]

    # Skip folders with no valid images
    if not image_files:
        print(f"No valid image files found in {disease_folder_path}. Skipping this folder.")
        continue

    # Select up to 300 images for this disease
    if len(image_files) > 1000:
        image_files = np.random.choice(image_files, 1000, replace=False)

    try:
        # Extract features for these images
        features = np.array([extract_features(img_path) for img_path in tqdm(image_files, desc=f"Extracting features for {disease}", leave=False)])
        # Append features and labels
        all_features.append(features)
        all_labels.extend([i] * len(features))
    except Exception as e:
        print(f"Error processing {disease}: {e}")

# Flatten features and labels into single arrays
all_features = np.vstack(all_features)
all_labels = np.array(all_labels)

# Perform t-SNE
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
tsne_results = tsne.fit_transform(all_features)

# Plot t-SNE
plt.figure(figsize=(10, 8))
for label in np.unique(all_labels):
    indices = all_labels == label
    plt.scatter(tsne_results[indices, 0], tsne_results[indices, 1], label=disease_folders[label], alpha=0.7)

plt.legend()
plt.title("t-SNE Visualization")
plt.xlabel("t-SNE Dimension 1")
plt.ylabel("t-SNE Dimension 2")

plt.savefig('t-sne-our-model.png')

plt.show()