In [1]:
import os
import numpy as np
import pandas as pd
import torch
import json
from PIL import Image  
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
import torch.nn.functional as F
import torch.nn as nn
from fastai.learner import Learner
from monai.networks.nets import resnet18
from fastai.data.core import DataLoaders
from fastai.metrics import accuracy
from fastai.losses import CrossEntropyLossFlat
from fastai.callback.all import SaveModelCallback, EarlyStoppingCallback
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
%matplotlib inline

  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(


In [2]:
# import pandas as pd

# # Load datasets
# df = pd.read_csv('Final_Datasets/long_axis_heart.csv')
# df_test = pd.read_csv('Final_Datasets/test_data_incidence.csv')
# controls = pd.read_csv('Final_Datasets/imbalanced_control_iid.csv')

# # Remove test cases and controls
# test_iids = set(df_test['IID'])

# # Get all cases (CAD = 1) from long_axis_heart.csv, excluding test data
# cases_train = df[(df['CAD'] == 1) & (~df['IID'].isin(test_iids))]

# # Get all controls (IID in controls.csv and CAD = 0), excluding test data
# controls_train = df[(df['IID'].isin(controls['IID'])) & (df['CAD'] == 0) & (~df['IID'].isin(test_iids))]

# # Combine the imbalanced dataset
# df_imbalanced_train = pd.concat([cases_train, controls_train]).reset_index(drop=True)

# # Save the imbalanced dataset
# df_imbalanced_train.to_csv('Final_Datasets/train_resnet_heart_imbalanced.csv', sep=',', index=False)

# # Report the dataset statistics
# cases_count = len(cases_train)
# controls_count = len(controls_train)
# print(f"Number of cases: {cases_count}")
# print(f"Number of controls: {controls_count}")
# print(f"Imbalance ratio (controls to cases): {controls_count / cases_count:.2f}")

In [3]:
# # Load the CSV file
# input_file = 'Final_Datasets/train_resnet_heart_imbalanced.csv'
# df = pd.read_csv(input_file)

# # Define old and new directory paths
# old_path = '/scratch/07880/devansh/CAD/20208_long_axis/Processed/'
# new_path = '/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/'

# # Update the FilePath column
# df['FilePath'] = df['FilePath'].str.replace(old_path, new_path, regex=False)

# # Save the updated DataFrame back to the same file
# output_file = 'Final_Datasets/train_resnet_heart_imbalanced.csv'
# df.to_csv(output_file, index=False)

# print(f"File saved successfully to {output_file}")

In [4]:
# import pandas as pd

# # Load the CSV file
# input_file = 'Final_Datasets/test_data_incidence.csv'
# df = pd.read_csv(input_file)

# # Define old and new directory paths
# old_path = '/scratch/07880/devansh/CAD/20208_long_axis/Processed/'
# new_path = '/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/'

# # Update the FilePath column
# if 'FilePath' in df.columns:  # Ensure the column exists
#     df['FilePath'] = df['FilePath'].str.replace(old_path, new_path, regex=False)
# else:
#     print("FilePath column not found in the DataFrame.")

# # Save the updated DataFrame back to the same file
# output_file = 'Final_Datasets/test_data_incidence.csv'
# df.to_csv(output_file, index=False)

# print(f"File saved successfully to {output_file}")

In [5]:
class NpyDataset(Dataset):
    def __init__(self, dataframe, image_column_name, label_column_name, channel_dim=2):
        self.dataframe = dataframe.reset_index(drop=True)
        self.image_column_name = image_column_name
        self.label_column_name = label_column_name
        self.channel_dim = channel_dim
        self.transform = transforms.Compose([
            transforms.Lambda(lambda x: torch.stack([x, x, x], dim=0) if x.size(0) == 18 else x),  # Stack to create 3 channels
            transforms.Resize((112, 112))  # Resize to 112x112
        ])

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        npy_path = self.dataframe.loc[idx, self.image_column_name]
        label = self.dataframe.loc[idx, self.label_column_name]
        image = np.load(npy_path)[:, :, :, self.channel_dim]  # Accessing the specified channel dimension
        image = image[17:35, :, :]  # Selecting frames 17 to 34
        image = torch.tensor(image, dtype=torch.float32)
        image = self.transform(image)  # Apply transforms
        label = torch.tensor(label, dtype=torch.long)
        return {'image': image, 'label': label, 'path': npy_path}

class HeartDiseaseModel:
    def __init__(self, train_df_path, test_df_path, image_column_name, label_column_name, batch_size=32, model_name='best_model'):
        self.train_df_path = train_df_path
        self.test_df_path = test_df_path
        self.image_column_name = image_column_name
        self.label_column_name = label_column_name
        self.batch_size = batch_size
        self.model_name = model_name
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        self._prepare_data()
        self._prepare_model()

    def _prepare_data(self):
        train_df = pd.read_csv(self.train_df_path)
        test_df = pd.read_csv(self.test_df_path)

        self.train_dataset = NpyDataset(train_df, self.image_column_name, self.label_column_name)
        self.test_dataset = NpyDataset(test_df, self.image_column_name, self.label_column_name)

        self.train_loader = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=8)
        self.test_loader = DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=8)

    def _prepare_model(self):
        self.model = models.video.r3d_18(pretrained=True)
        self.model.fc = nn.Linear(self.model.fc.in_features, 2)
        self.model.to(self.device)

        self.learn = Learner(
            DataLoaders(self.train_loader, self.test_loader),
            self.model,
            loss_func=CrossEntropyLossFlat(),
            metrics=[accuracy],
            wd=1e-4,
            cbs=[
                SaveModelCallback(fname=self.model_name, monitor='train_loss'),
                EarlyStoppingCallback(monitor='train_loss', patience=10)
            ]
        ).to_fp16()

    def train(self, epochs=5, lr=1e-5):
        self.learn.fine_tune(epochs, base_lr=lr)

    def extract_embeddings(self, loader):
        self.model.eval()
        embeddings, labels, paths = [], [], []
        with torch.no_grad():
            for batch in loader:
                images = batch['image']
                label_batch = batch['label']
                path_batch = batch['path']
                images = images.to(self.device)
                
                # Pass through the model until the final pooling layer
                x = self.model.stem(images)
                x = self.model.layer1(x)
                x = self.model.layer2(x)
                x = self.model.layer3(x)
                x = self.model.layer4(x)
                x = self.model.avgpool(x)
                x = torch.flatten(x, 1)
                
                embeddings.append(x.cpu().numpy())
                labels.append(label_batch.cpu().numpy())
                paths.extend(path_batch)  # Collecting the paths
        embeddings = np.concatenate(embeddings)
        labels = np.concatenate(labels)
        return embeddings, labels, paths

    # Updated generate_embeddings_dataframe function
    def generate_embeddings_dataframe(self, embeddings, labels, paths):
        """
        Creates a Pandas DataFrame from embeddings, labels, and image paths.

        Args:
            embeddings (numpy.ndarray): The extracted embeddings.
            labels (numpy.ndarray): The labels corresponding to the embeddings.
            paths (list of str): The image paths.

        Returns:
            pd.DataFrame: A DataFrame with serialized embeddings and metadata.
        """
        # Serialize embeddings as JSON strings for safe CSV storage
        df = pd.DataFrame({
            'image_path': paths,
            'embedding': [json.dumps(emb.tolist()) for emb in embeddings],
            'label': labels
        })
        return df

    # Updated extract_and_save_embeddings function
    def extract_and_save_embeddings(self):
        """
        Extracts embeddings for train and test datasets and saves them as CSV files.

        The embeddings are serialized as JSON strings for robust CSV storage.
        """
        # Extract training embeddings
        train_embeddings, train_labels, train_paths = self.extract_embeddings(self.train_loader)
        train_df = self.generate_embeddings_dataframe(train_embeddings, train_labels, train_paths)

        # Extract test embeddings
        test_embeddings, test_labels, test_paths = self.extract_embeddings(self.test_loader)
        test_df = self.generate_embeddings_dataframe(test_embeddings, test_labels, test_paths)
        # Save DataFrames
        train_df.to_csv('train_embeddings_ch2_nov_imbalanced.csv', index=False)
        test_df.to_csv('test_embeddings_ch2_nov.csv', index=False)

In [6]:
if __name__ == "__main__":
    model = HeartDiseaseModel(
        train_df_path='Final_Datasets/train_resnet_heart_imbalanced.csv',
        test_df_path='Final_Datasets/test_data_incidence.csv',
        image_column_name='FilePath',  # Replace with your image path column name
        label_column_name='CAD',       # Replace with your label column name
        model_name='heart_ch2_3channel_nov'
    )
    # Uncomment the following line if you need to train the model first
    # model.train(epochs=50, lr=1e-5)
    
    # Extract embeddings and save to CSV
    model.extract_and_save_embeddings()



In [7]:
emb = pd.read_csv("train_embeddings_ch2_nov_imbalanced.csv")
emb['embedding'] = emb['embedding'].apply(lambda x: np.array(json.loads(x)))
emb.head()

Unnamed: 0,image_path,embedding,label
0,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/3857100.npy,"[0.0, 15.748132705688477, 0.0, 0.0, 0.0, 0.0, 0.0, 5.8607683181762695, 0.0, 0.0, 0.0, 13.57785701751709, 10.543232917785645, 15.600434303283691, 0.4483417272567749, 0.0, 0.0, 0.0, 0.004697457887232304, 16.928651809692383, 0.017383592203259468, 0.0, 0.0, 14.429327011108398, 8.418927192687988, 0.0, 25.554277420043945, 1.1376416683197021, 0.005670975428074598, 0.021407879889011383, 3.2028160095214844, 0.0, 0.19513292610645294, 13.869726181030273, 1.2919665575027466, 0.0, 0.0, 0.0, 0.0, 0.5822210311889648, 0.0, 0.013596005737781525, 0.0, 3.601943254470825, 0.0, 12.661970138549805, 0.0, 10.5965...",0
1,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/5394550.npy,"[0.0, 16.076406478881836, 0.0, 3.30710768699646, 0.03957848250865936, 0.0, 0.0, 19.850811004638672, 0.03006305731832981, 0.0, 0.0, 9.877923965454102, 17.67885398864746, 11.138339042663574, 1.8109478950500488, 0.0, 0.0, 0.0, 0.0787578672170639, 19.256065368652344, 0.03214229643344879, 0.0, 0.0, 17.644163131713867, 18.100954055786133, 0.0, 22.617874145507812, 1.0368365049362183, 0.2986532151699066, 0.67405766248703, 1.284436821937561, 0.0, 0.6692177653312683, 9.234003067016602, 4.320312023162842, 0.4675767123699188, 0.08472917228937149, 0.9715940356254578, 0.0, 1.0804816484451294, 0.0, 0.0, ...",0
2,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/1159012.npy,"[0.0, 13.309107780456543, 0.0, 0.05142797529697418, 0.006083700340241194, 0.005934476386755705, 0.0, 5.344156265258789, 0.0, 0.023572301492094994, 0.0, 9.272041320800781, 13.098078727722168, 13.780034065246582, 0.2862495183944702, 0.0, 0.0, 0.0, 0.13327279686927795, 12.325188636779785, 0.5125125646591187, 0.0, 0.0, 11.85288143157959, 5.680669784545898, 0.0, 25.42896842956543, 1.7093530893325806, 0.049495819956064224, 0.6460055112838745, 2.1451380252838135, 0.0, 0.14672420918941498, 8.845198631286621, 1.607137680053711, 0.009921059012413025, 0.028345949947834015, 0.08878529071807861, 0.0, 1...",0
3,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/3335084.npy,"[0.0, 11.00273323059082, 0.0, 0.0, 0.0, 0.0, 0.0, 7.314640522003174, 0.0, 0.0, 0.0, 14.983819961547852, 10.165009498596191, 14.025079727172852, 0.5681868195533752, 0.0, 0.0, 0.0, 0.14165207743644714, 9.789031982421875, 0.1998993456363678, 0.0, 0.0, 13.90654468536377, 9.867302894592285, 0.0, 23.250411987304688, 1.1879093647003174, 0.1231626346707344, 0.0280240960419178, 2.1952123641967773, 0.0, 0.019950317218899727, 17.255538940429688, 1.8254714012145996, 0.0, 0.05300655961036682, 0.11566347628831863, 0.0, 0.45653870701789856, 0.0, 0.0, 0.0, 3.606003761291504, 0.0, 12.381714820861816, 0.0, ...",1
4,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/2657680.npy,"[0.0, 11.231810569763184, 0.0, 1.3420112133026123, 0.0, 0.0, 0.00751546211540699, 6.291823387145996, 0.0, 0.0, 0.0, 5.670766353607178, 9.009591102600098, 6.022567272186279, 0.9053731560707092, 0.0, 0.04019899666309357, 0.0, 0.3468594253063202, 10.990494728088379, 0.26200324296951294, 0.0076745059341192245, 0.0, 14.149898529052734, 9.893427848815918, 0.0, 10.682109832763672, 0.8349721431732178, 0.1664576679468155, 0.2947036027908325, 1.2980947494506836, 0.0, 0.7377256155014038, 7.881402492523193, 1.8298877477645874, 0.15632478892803192, 0.0, 0.7917327880859375, 0.0, 0.5207995176315308, 0.0,...",1


In [8]:
len(emb['embedding'][0])

512

**MONAI HEART**

In [54]:
# Logging Configuration
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("heart_long_axis.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)


class HeartLongAxisDataset(Dataset):
    """
    Custom PyTorch Dataset for loading 3D heart long-axis images from .npy files.
    """
    def __init__(self, dataframe, image_column_name, label_column_name, custom_transform=None):
        self.dataframe = dataframe.reset_index(drop=True)
        self.image_column_name = image_column_name
        self.label_column_name = label_column_name

        # Default transformation pipeline
        default_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(15),
            transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
            transforms.Resize((112, 112))
        ])
        self.transform = custom_transform if custom_transform else default_transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        try:
            npy_path = self.dataframe.loc[idx, self.image_column_name]
            label = self.dataframe.loc[idx, self.label_column_name]
            
            # Load the 3D array for heart long-axis
            image = np.load(npy_path)[:, :, :, 0]  # Select the long-axis channel (0)
            image = image[17:33, :, :]  # Extract frames 17 to 32
            
            # Convert to tensor and add channel dimension
            image = torch.tensor(image, dtype=torch.float32).unsqueeze(0)  # Shape: [1, depth, height, width]
            image = self.transform(image)  # Apply transformations
            
            return {'image': image, 'label': torch.tensor(label, dtype=torch.long), 'path': npy_path}
        
        except Exception as e:
            logger.error(f"Error loading image at index {idx}: {e}")
            raise


class HeartDiseaseEmbeddingExtractor:
    """
    Class for extracting embeddings from a fine-tuned MONAI ResNet18 model for heart long-axis images.
    """
    def __init__(self, train_df_path, test_df_path, image_column_name, label_column_name, batch_size=8, model_name='heart_long_axis_resnet18'):
        self.train_df_path = train_df_path
        self.test_df_path = test_df_path
        self.image_column_name = image_column_name
        self.label_column_name = label_column_name
        self.batch_size = batch_size
        self.model_name = model_name
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        self._prepare_data()
        self._load_model()

    def _prepare_data(self):
        train_df = pd.read_csv(self.train_df_path)
        test_df = pd.read_csv(self.test_df_path)

        self.train_dataset = HeartLongAxisDataset(train_df, self.image_column_name, self.label_column_name)
        self.test_dataset = HeartLongAxisDataset(test_df, self.image_column_name, self.label_column_name)

        self.train_loader = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=8)
        self.test_loader = DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=8)

    def _load_model(self):
        # Initialize MONAI ResNet18
        self.model = resnet18(spatial_dims=3, n_input_channels=1, num_classes=2)
        
        # Use FastAI Learner to load the fine-tuned model
        self.learn = Learner(
            DataLoaders(self.train_loader, self.test_loader),
            self.model,
            loss_func=CrossEntropyLossFlat(),
            metrics=[accuracy]
        ).to_fp16()

        self.learn.load(self.model_name)
        self.model = self.learn.model.eval().to(self.device)

    def extract_embeddings(self, loader):
        embeddings, labels, paths = [], [], []
        with torch.no_grad():
            for batch in loader:
                images = batch['image'].to(self.device)
                label_batch = batch['label']
                path_batch = batch['path']

                # Forward pass to extract features
                x = self.model.conv1(images)  # Initial convolution
                x = self.model.bn1(x)        # BatchNorm
                x = self.model.maxpool(x)    # Max pooling
                x = self.model.layer1(x)
                x = self.model.layer2(x)
                x = self.model.layer3(x)
                x = self.model.layer4(x)
                x = self.model.avgpool(x)    # Global average pooling
                x = torch.flatten(x, 1)      # Flatten to get embeddings

                embeddings.append(x.cpu().numpy())
                labels.append(label_batch.cpu().numpy())
                paths.extend(path_batch)  # Collect paths
        
        embeddings = np.concatenate(embeddings)
        labels = np.concatenate(labels)
        return embeddings, labels, paths

     # Updated generate_embeddings_dataframe function
    def generate_embeddings_dataframe(self, embeddings, labels, paths):
        """
        Creates a Pandas DataFrame from embeddings, labels, and image paths.

        Args:
            embeddings (numpy.ndarray): The extracted embeddings.
            labels (numpy.ndarray): The labels corresponding to the embeddings.
            paths (list of str): The image paths.

        Returns:
            pd.DataFrame: A DataFrame with serialized embeddings and metadata.
        """
        # Serialize embeddings as JSON strings for safe CSV storage
        df = pd.DataFrame({
            'image_path': paths,
            'embedding': [json.dumps(emb.tolist()) for emb in embeddings],
            'label': labels
        })
        return df

    # Updated extract_and_save_embeddings function
    def extract_and_save_embeddings(self):
        """
        Extracts embeddings for train and test datasets and saves them as CSV files.

        The embeddings are serialized as JSON strings for robust CSV storage.
        """
        # Extract training embeddings
        train_embeddings, train_labels, train_paths = self.extract_embeddings(self.train_loader)
        train_df = self.generate_embeddings_dataframe(train_embeddings, train_labels, train_paths)

        # Extract test embeddings
        test_embeddings, test_labels, test_paths = self.extract_embeddings(self.test_loader)
        test_df = self.generate_embeddings_dataframe(test_embeddings, test_labels, test_paths)
        train_df.to_csv('train_embeddings_long_axis_ch0_imbalanced_monai.csv', index=False)
        test_df.to_csv('test_embeddings_long_axis_ch0_imbalanced_monai.csv', index=False)


In [56]:
if __name__ == "__main__":
    extractor = HeartDiseaseEmbeddingExtractor(
        train_df_path='Final_Datasets/train_resnet_heart_imbalanced.csv',
        test_df_path='Final_Datasets/test_data_incidence.csv',
        image_column_name='FilePath',
        label_column_name='CAD',
        model_name='heart_ch0_3channel_MONAI_resnet18'
    )  
    
    # Extract and save embeddings
    extractor.extract_and_save_embeddings()

  state = torch.load(file, map_location=device, **torch_load_kwargs)
  elif with_opt: warn("Saved file doesn't contain an optimizer state.")


In [57]:
emb = pd.read_csv("train_embeddings_long_axis_ch0_imbalanced_monai.csv")
emb['embedding'] = emb['embedding'].apply(lambda x: np.array(json.loads(x)))
emb.head()

Unnamed: 0,image_path,embedding,label
0,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/2481857.npy,"[0.7518877387046814, 1.1472522020339966, 0.8155255913734436, 1.033762812614441, 0.9253906011581421, 1.5739915370941162, 0.5733785629272461, 1.157525658607483, 1.0277401208877563, 0.9323064088821411, 1.1983778476715088, 0.9477559328079224, 1.0538650751113892, 0.662196159362793, 0.9855329394340515, 1.0084242820739746, 1.6836415529251099, 0.4729509949684143, 1.69727623462677, 0.7550787329673767, 0.8744810819625854, 1.2068544626235962, 1.1737672090530396, 0.7582306265830994, 0.7180768847465515, 0.9304273128509521, 0.6569504737854004, 1.4690414667129517, 0.8964888453483582, 0.7766852974891663, ...",0
1,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/3267458.npy,"[0.6222007274627686, 0.857754111289978, 0.8616003394126892, 0.883907675743103, 0.8197298645973206, 1.445050835609436, 0.7749342322349548, 1.3935546875, 0.8606221675872803, 0.925336480140686, 1.041946530342102, 0.9229991436004639, 0.763674259185791, 0.48203182220458984, 0.8501312136650085, 1.1560198068618774, 1.5509973764419556, 0.584078848361969, 1.2906135320663452, 0.8675865530967712, 0.851374089717865, 1.0265535116195679, 0.8038004636764526, 0.8011053204536438, 0.998083770275116, 0.4229792356491089, 0.3374080955982208, 0.8993629813194275, 0.7269145846366882, 0.6914081573486328, 0.9002788...",0
2,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/3424201.npy,"[1.0241048336029053, 1.4497884511947632, 1.0378142595291138, 0.8838290572166443, 1.2725428342819214, 2.3465678691864014, 0.9894381165504456, 1.785378098487854, 0.8728373050689697, 1.244332194328308, 1.3307230472564697, 1.145872712135315, 0.8221296072006226, 0.6415755748748779, 0.956454873085022, 1.224797248840332, 1.8196980953216553, 0.46269381046295166, 1.9597762823104858, 0.6793693900108337, 1.2028716802597046, 1.2875028848648071, 1.3738477230072021, 0.9754697680473328, 0.9096880555152893, 0.884101390838623, 0.4094003438949585, 1.6951991319656372, 1.2093385457992554, 1.248578667640686, 0...",0
3,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/4086644.npy,"[0.8524421453475952, 1.1529635190963745, 0.9273219108581543, 1.3484241962432861, 0.9488350749015808, 1.3672786951065063, 0.4505791962146759, 1.2266489267349243, 1.039737343788147, 1.4517909288406372, 1.3201881647109985, 1.1502001285552979, 1.2937198877334595, 0.4957474172115326, 0.6454839110374451, 1.1808773279190063, 1.2069627046585083, 0.6448122262954712, 1.394805908203125, 0.9780712127685547, 1.0762056112289429, 1.5468693971633911, 1.1778465509414673, 0.8933378458023071, 0.6201441287994385, 0.8116099834442139, 0.43900540471076965, 2.078183174133301, 0.6782131195068359, 0.872504234313964...",0
4,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/4236006.npy,"[0.8198446035385132, 1.0692479610443115, 0.9421319365501404, 0.6511673331260681, 0.7163881659507751, 0.9577062129974365, 1.1619913578033447, 1.0509202480316162, 0.531731128692627, 0.7200844883918762, 0.8411928415298462, 0.7896888852119446, 0.8706614971160889, 0.7214251756668091, 1.0195368528366089, 0.8900471925735474, 1.146662950515747, 0.8275796175003052, 1.2668043375015259, 0.6630517244338989, 0.9643077850341797, 0.7024521231651306, 0.9156805276870728, 0.9088653922080994, 0.8984662890434265, 0.8248251676559448, 0.5240901112556458, 0.878409206867218, 0.7233778238296509, 0.8595252633094788...",1


In [58]:
emb['embedding'][0]

array([0.75188774, 1.1472522 , 0.81552559, 1.03376281, 0.9253906 ,
       1.57399154, 0.57337856, 1.15752566, 1.02774012, 0.93230641,
       1.19837785, 0.94775593, 1.05386508, 0.66219616, 0.98553294,
       1.00842428, 1.68364155, 0.47295099, 1.69727623, 0.75507873,
       0.87448108, 1.20685446, 1.17376721, 0.75823063, 0.71807688,
       0.93042731, 0.65695047, 1.46904147, 0.89648885, 0.7766853 ,
       0.78783596, 0.81435603, 0.85756087, 0.64172328, 1.39258552,
       1.20027018, 1.36306942, 1.28262913, 0.85763174, 1.05640841,
       0.71480036, 0.63470227, 0.93134338, 0.95108199, 0.904553  ,
       1.54935312, 0.90008885, 1.10689592, 1.6981771 , 1.18962252,
       0.87021697, 0.8612265 , 1.20179617, 1.23033297, 1.09484124,
       1.20580721, 0.66935712, 1.11969876, 0.58761764, 0.68776101,
       0.92204499, 0.91277421, 1.16067243, 0.87758368, 0.45294654,
       1.12662709, 0.40810791, 1.09621692, 1.67120171, 0.75789934,
       1.04960704, 1.01219654, 0.71697259, 1.02269077, 1.76806

In [30]:
emb['label'].value_counts()

label
0    26358
1     4391
Name: count, dtype: int64

In [31]:
em_te = pd.read_csv('test_embeddings_long_axis_ch2_imbalanced_monai.csv')
em_te.head()

Unnamed: 0,image_path,embedding,label
0,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/4890586.npy,[0.775385 1.3522666 1.0920718 0.70664024 0.7810585 0.77542114\n 0.9246951 0.7518873 0.89678687 0.9051374 0.717285 0.8749739\n 0.72046506 0.9044865 0.8685076 1.018049 0.76703185 0.6930378\n 0.844805 0.6016929 0.8467186 0.80493927 1.0370123 0.9645185\n 0.5209577 0.39125106 1.0601838 1.0096126 0.55926317 0.64178455\n 0.7603833 0.72407544 1.2951285 1.0423611 0.62194943 0.6975328\n 0.6892222 0.8193876 0.92753583 1.0084357 0.84470415 0.6502908\n 0.8517376 0.9632172 0.8308439 1.0197409 1.1180056 1.21494\n 0.99437666 0.7994548 0.73946434 0.909182 0.9900477 0.72...,0
1,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/2201189.npy,[0.71652555 1.0079336 0.99967736 0.8138457 0.61405385 0.78973836\n 1.2331018 0.79418284 0.9701854 0.8634911 0.61321485 1.0809593\n 0.6977347 0.6112499 0.78686965 0.8673014 1.0064123 0.5189477\n 0.94834226 0.22096053 0.9744282 0.91416967 0.89326096 0.8890767\n 0.7274727 0.5514565 0.934832 0.75610197 0.73395133 1.1286633\n 0.774223 0.47248918 1.0088389 1.0508419 0.5286124 0.59018666\n 0.80166996 1.0510082 0.82088405 0.81121093 0.8461439 0.5330051\n 0.9131121 0.8864106 0.8665971 0.91117334 1.3783032 1.1890212\n 0.9416004 0.503209 0.98876876 0.85732955 1.1100987 0....,0
2,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/3646967.npy,[0.90940845 0.7920192 0.73972857 0.86023617 0.77328527 0.95004094\n 0.77942824 1.0404199 1.2526661 1.3110647 0.91357064 1.1931895\n 1.0780315 0.812014 1.014269 0.7406413 0.91905755 0.53876823\n 1.0474985 0.49289215 1.1086296 1.044888 0.75099397 0.8885848\n 0.52208257 1.1598228 1.119299 0.83081615 0.6136243 0.7780417\n 0.8932785 0.9856912 0.9774254 1.1947478 0.4908974 0.87899625\n 0.9698491 0.8669189 1.1813542 0.9793348 1.0182954 0.8186843\n 1.2559637 1.085221 0.7685704 0.6298648 0.9488984 0.9249805\n 0.9507037 0.5355989 0.910834 0.89853656 0.8433439 0...,1
3,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/2032754.npy,[0.9447314 1.5493579 1.253081 0.85414654 1.0370028 0.8173339\n 1.5604081 0.62616223 1.1932764 0.9818474 0.6304101 1.5645891\n 0.7945353 0.6768062 0.9514557 1.0379204 0.74294996 0.78239495\n 0.9612371 0.71720713 0.824731 1.0739565 0.9992365 1.2118741\n 0.5853022 0.36547375 1.3795104 1.4808457 0.21255986 0.76789665\n 0.8294341 0.89542294 1.090856 1.4045956 0.59360737 0.8351802\n 0.3705137 0.76350385 1.0137361 1.2199725 1.1405014 0.499568\n 0.9910436 1.056192 0.59574705 0.96622217 0.87795025 1.3414029\n 1.0929176 0.57319236 1.2594103 0.8487063 1.1430613 0.9...,0
4,/corral/utexas/UKB-Imaging-Genetics/temp_imaging_data/20208_long_axis/Processed/2434644.npy,[1.0993538 1.314854 0.75711364 0.7647052 1.0955659 0.85929084\n 1.0868901 0.9049737 1.1144208 0.98769635 0.75878096 0.9445097\n 0.9349058 0.80992186 0.9758284 1.0958067 1.0553372 0.6109925\n 1.0648053 0.43866542 0.92681557 0.89311635 1.0190144 1.083999\n 0.618482 0.78148335 1.0716757 0.6364063 0.65177846 0.92860544\n 0.86220837 0.76517844 1.0687212 1.2281768 0.6186692 0.9267516\n 0.91079104 0.80148906 0.7834265 0.69560754 0.78350425 0.68084264\n 1.3032821 0.96592814 0.75606346 0.8067589 1.0761501 1.0090616\n 0.92572224 0.7342143 0.6467474 0.8080504 0.9818148 0....,1


In [32]:
em_te['label'].value_counts()

label
0    200
1    200
Name: count, dtype: int64