## Setup
Import required libraries.

In [None]:
import os
from glob import glob

import matplotlib.pyplot as plt
import numpy as np
import torch

from napari_dinosim.dinoSim_pipeline import *
from napari_dinosim.utils import *

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print("Device:", device)

import h5py
from torch.nn import functional as F

In [None]:
#@title Data and Processing Parameters

#@markdown ### Data Settings
dataset_path = '/home/tomwelch/Cambridge/Datasets/neurotransmitter_data' #@param {type:"string"}
dates = os.listdir(dataset_path)#@param {type:"string"}
neurotransmitters = os.listdir(os.path.join(dataset_path, dates[0])) #@param {type:"string"} 

#@markdown ### Post-processing Settings
upsample = "bilinear" #@param {type:"string", options:["bilinear", "Nearest Neighbor", "None"], value-map:{bilinear:"bilinear", "Nearest Neighbor": "nearest", None:None}}
crop_shape = (512,512,1) #@param {type:"raw"}

#@markdown ### Model Input Settings
#@markdown Should be multiple of model patch_size
resize_size = 518 #@param {type:"integer"}

## Load Data


In [None]:
def load_image(path):
    with h5py.File(path) as f:
        pre, post = f['annotations/locations'][:]/8
        x, y, z = pre[0].astype(int), pre[1].astype(int), pre[2].astype(int)
        slice_volume = f['volumes/raw'][:][:,:,z]
        return slice_volume, x, y

In [None]:
train_x_fnames = glob(os.path.join(dataset_path, dates[0], neurotransmitters[0], '*.hdf*'))
train_x_fnames.sort()

train_dataset = np.stack([load_image(p)[0] for p in train_x_fnames]).astype(np.uint8)
train_dataset = train_dataset[...,np.newaxis] # add channel dim

coordinates = np.stack([load_image(p)[1:] for p in train_x_fnames])

print(f'N files:  {len(train_dataset)} \t Shape: {train_dataset.shape[1:]}')

# Load Model

In [None]:
# select model size
model_size = 'small' #@param {type:"string", options:["small", "base", "large", "giant"]}

model_dims = {'small': 384, 'base': 768, 'large': 1024, 'giant': 1536}
assert model_size in model_dims, f'Invalid model size: ({model_size})'
model = torch.hub.load('facebookresearch/dinov2', f'dinov2_vit{model_size[0]}14_reg')
model.to(device)
model.eval()

feat_dim = model_dims[model_size]

few_shot = DinoSim_pipeline(model, model.patch_size, device, get_img_processing_f(resize_size),
                             feat_dim, dino_image_size=resize_size )
print("Model loaded")

# Prompting

In [None]:
# select reference points
x, y = coordinates[0]+[-5,0]
points = [(0, x, y),]
z, x, y = zip(*points)

plt.imshow(train_dataset[z[0]], 'gray')
plt.scatter(x, y, c='r', marker='x')
plt.show()

# DinoSim

In [None]:
def gaussian_kernel(size=3, sigma=1):

    upper = size - 1
    lower = -int(size / 2)

    y, x = np.mgrid[lower:upper, lower:upper]

    kernel = (1 / (2 * np.pi * sigma**2)) * np.exp(
        -(x**2 + y**2) / (2 * sigma**2)
    )
    kernel = kernel / kernel.sum()

    return kernel

In [None]:
def torch_convolve(input, weights, mode="reflect", cval=0.0, origin=0):
    """
    Multidimensional convolution using PyTorch.

    Parameters
    ----------
    input : torch.Tensor
        The input tensor to be convolved.
    weights : torch.Tensor
        Convolution kernel, with the same number of dimensions as the input.
    mode : str, optional
        Padding mode. Options are 'reflect', 'constant', 'replicate', or 'circular'.
        Default is 'reflect'.
    cval : float, optional
        Value to fill past edges of input if `mode` is 'constant'. Default is 0.0.
    origin : int, optional
        Controls the origin of the input signal. Positive values shift the filter
        to the right, and negative values shift the filter to the left. Default is 0.

    Returns
    -------
    result : torch.Tensor
        The result of convolution of `input` with `weights`.
    """
    # Ensure input is 4D (batch, channels, height, width)
    if input.dim() == 2:  # Single channel 2D image
        input = input.unsqueeze(0).unsqueeze(0)
    elif input.dim() == 3:  # Add batch dimension if missing
        input = input.unsqueeze(0)

    # Add channel dimension for weights if necessary
    if weights.dim() == 2:
        weights = weights.unsqueeze(0).unsqueeze(0)

    # Apply padding based on mode
    padding = (
        weights.shape[-1] // 2 - origin
    )  # Adjust padding for origin shift
    input_padded = F.pad(
        input, (padding, padding, padding, padding), mode=mode, value=cval
    )

    # Perform convolution
    result = F.conv2d(input_padded, weights)

    return result.squeeze()  # Remove extra dimensions for output

In [None]:
#Post-processing
kernel = gaussian_kernel(size=3, sigma=1)
kernel = torch.tensor(kernel, dtype=torch.float32, device=device)
filter_f = lambda x: torch_convolve(x, kernel)

#few_shot.delete_precomputed_embeddings()
# few_shot.delete_references()
if not few_shot.emb_precomputed:
    few_shot.pre_compute_embeddings(train_dataset, overlap = (0,0), padding=(0,0), crop_shape=crop_shape, verbose=True, batch_size=5)
few_shot.set_reference_vector(list_coords=points)
distances = few_shot.get_ds_distances_sameRef(verbose=False)
predictions = few_shot.distance_post_processing(distances, filter_f, upsampling_mode=upsample)
print("Finished")

In [None]:
predictions[6].shape

# Plot results

In [None]:
# select threshold
threshold = 0.5

for i in range(1):
    input_img = train_dataset[i]
    plt.figure(figsize=(12,7))
    plt.subplot(131)
    if input_img.shape[-1] == 1:
        plt.imshow(input_img[...,0], 'gray')
    else:
        plt.imshow(input_img)
    plt.subplot(132)
    plt.imshow(1-predictions[i], 'magma')
    plt.subplot(133)
    plt.imshow(predictions[i] < threshold, 'gray')
    plt.show()

In [None]:
def get_bbox(predictions, threshold):
    bbox_list = []
    failed = []
    try:
        for i in range(predictions.shape[0]):
            GT = (predictions[i] < threshold).astype(np.uint8)
            if np.sum(GT) == 0:
                failed.append(i)
                continue
            bbox_1x, bbox_1y = min(np.where(GT == 1)[1])-5, min(np.where(GT == 1)[0])-5
            bbox_2x, bbox_2y = max(np.where(GT == 1)[1])+5, max(np.where(GT == 1)[0])+5
            bbox_list.append(((bbox_1x, bbox_2x), (bbox_1y, bbox_2y)))
    except Exception as e:
        print(f"Error in bbox nb.{i}: {e}")
    
    print(f"{len(failed)/predictions.shape[0]*100}% of images did not pass the threshold")
    return bbox_list, len(failed)/predictions.shape[0]*100

In [None]:
bboxes, _ = get_bbox(predictions, 0.35)

In [None]:
def f(threshold):
    return get_bbox(predictions, threshold)[1]

plt.figure(figsize=(12,7), dpi=300)
thresholds = np.arange(0, 1.05, 0.05)
plt.plot(thresholds, [f(threshold)for threshold in thresholds])

___
# Implementation 

# Setup

In [1]:
import os
from glob import glob

import matplotlib.pyplot as plt
import numpy as np
import torch

from napari_dinosim.dinoSim_pipeline import *
from napari_dinosim.utils import *

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print("Device:", device)

import h5py
from torch.nn import functional as F
import torchvision.transforms.v2.functional as T
from torchvision import transforms
from tqdm import tqdm
import matplotlib.patches as patches

Device: cpu


  return torch._C._cuda_getDeviceCount() > 0


In [2]:
def load_image(path):
    with h5py.File(path) as f:
        pre, post = f['annotations/locations'][:]/8
        x, y, z = pre[0].astype(int), pre[1].astype(int), pre[2].astype(int)
        slice_volume = f['volumes/raw'][:][np.newaxis,:,:,z]
        return slice_volume, x, y

In [3]:
def get_bbox(predictions, threshold):
    GT = (predictions[0,...] < threshold).astype(np.float32)
    bbox_1x, bbox_1y = min(np.where(GT == 1)[1])-5, min(np.where(GT == 1)[0])-5
    bbox_2x, bbox_2y = max(np.where(GT == 1)[1])+5, max(np.where(GT == 1)[0])+5
    return bbox_1x, bbox_2x, bbox_1y, bbox_2y
    #print(f"{len(failed)/predictions.shape[0]*100}% of images did not pass the threshold")

In [4]:
def f(predictions, threshold):
    return get_bbox(predictions, threshold)[1]

In [6]:
#TODO: For LINUX:
dataset_path = '/home/tomwelch/Cambridge/Datasets/neurotransmitter_data'

#TODO: For MAC:
#dataset_path = '/Users/tomw/Documents/MVA/Internship/Cambridge/Datasets/neurotransmitter_data'

dates = glob(os.path.join(dataset_path, '*'))
neurotransmitters = list(map(lambda x: os.path.basename(os.path.normpath(x)), glob(os.path.join(dates[0], '*')))) #@param {type:"string"} 

upsample = "bilinear" #@param {type:"string", options:["bilinear", "Nearest Neighbor", "None"], value-map:{bilinear:"bilinear", "Nearest Neighbor": "nearest", None:None}}
crop_shape = (512,512,1) #@param {type:"raw"}

#@markdown ### Model Input Settings
#@markdown Should be multiple of model patch_size
resize_size = 518 #@param {type:"integer"}

In [7]:
# select model size
model_size = 'small' #@param {type:"string", options:["small", "base", "large", "giant"]}

model_dims = {'small': 384, 'base': 768, 'large': 1024, 'giant': 1536}
assert model_size in model_dims, f'Invalid model size: ({model_size})'
model = torch.hub.load('facebookresearch/dinov2', f'dinov2_vit{model_size[0]}14_reg')
model.to(device)
model.eval()

feat_dim = model_dims[model_size]

few_shot = DinoSim_pipeline(model, model.patch_size, device, get_img_processing_f(resize_size),
                             feat_dim, dino_image_size=resize_size )
print("Model loaded")

Using cache found in /home/tomwelch/.cache/torch/hub/facebookresearch_dinov2_main


Model loaded


# Plot

In [8]:
'''
def DINOSim():
    thresholds = np.arange(0, 1.05, 0.05)

    fig, axs = plt.subplots(2, 3, figsize=(15, 10))
    axs = axs.flatten()

    for i, neuro in enumerate(neurotransmitters):
        all_success_rates = []

        for date in dates:
            # Load files
            train_x_fnames = glob(os.path.join(dataset_path, date, neuro, '*.hdf*'))
            train_x_fnames.sort()

            # Skip if no data
            if len(train_x_fnames) == 0:
                print(f"Skipping {neuro} on {dates[date]} (no data)")
                continue

            train_dataset = np.stack([load_image(p)[0] for p in train_x_fnames])
            train_dataset = train_dataset[..., np.newaxis]
            coordinates = np.stack([load_image(p)[1:] for p in train_x_fnames])
            points = [(0, coord[0], coord[1]) for coord in coordinates]

            # Convolution kernel
            kernel = gaussian_kernel(size=3, sigma=1)
            kernel = torch.tensor(kernel, dtype=torch.float32, device=device)
            filter_f = lambda x: torch_convolve(x, kernel)

            # Few-shot setup
            few_shot.delete_precomputed_embeddings()
            few_shot.delete_references()
            if not few_shot.emb_precomputed:
                few_shot.pre_compute_embeddings(train_dataset[:50], 
                                                overlap=(0, 0),
                                                padding=(0, 0),
                                                crop_shape=crop_shape, 
                                                verbose=False,
                                                batch_size=5)
            print('Done')
            predictions = []

            for point in tqdm(points, desc=f'{neuro} on {date}'):
                few_shot.set_reference_vector(list_coords=[(0, point[0], point[1])])
                distances = few_shot.get_ds_distances_sameRef(verbose=False)
                pred = few_shot.distance_post_processing(distances, filter_f, upsampling_mode=upsample)
                predictions.append(pred)

            predictions = np.array(predictions)
            failure_rate = [f(predictions, t) for t in thresholds]
            all_failure_rates.append(failure_rate)

        # Average across dates
        if all_success_rates:
            mean_rates = np.mean(all_success_rates, axis=0)
            axs[i].plot(thresholds, mean_rates, label=neuro)
            axs[i].set_title(f'{neuro}')
            axs[i].set_xlabel('Threshold')
            axs[i].set_ylabel('Success Rate')
            axs[i].grid(True)
            axs[i].legend()

    plt.suptitle('Success Rate vs Threshold for Neurotransmitters', fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.show()
'''

'\ndef DINOSim():\n    thresholds = np.arange(0, 1.05, 0.05)\n\n    fig, axs = plt.subplots(2, 3, figsize=(15, 10))\n    axs = axs.flatten()\n\n    for i, neuro in enumerate(neurotransmitters):\n        all_success_rates = []\n\n        for date in dates:\n            # Load files\n            train_x_fnames = glob(os.path.join(dataset_path, date, neuro, \'*.hdf*\'))\n            train_x_fnames.sort()\n\n            # Skip if no data\n            if len(train_x_fnames) == 0:\n                print(f"Skipping {neuro} on {dates[date]} (no data)")\n                continue\n\n            train_dataset = np.stack([load_image(p)[0] for p in train_x_fnames])\n            train_dataset = train_dataset[..., np.newaxis]\n            coordinates = np.stack([load_image(p)[1:] for p in train_x_fnames])\n            points = [(0, coord[0], coord[1]) for coord in coordinates]\n\n            # Convolution kernel\n            kernel = gaussian_kernel(size=3, sigma=1)\n            kernel = torch.tensor

In [9]:
def Images_DINOSim(threshold):
    '''
    Runs few-shot DINO-based detection over all dates and 6 neurotransmitters,
    and plots first 5 samples.
    '''
    # Convolution kernel
    kernel = gaussian_kernel(size=3, sigma=1)
    kernel = torch.tensor(kernel, dtype=torch.float32, device=device)
    filter_f = lambda x: torch_convolve(x, kernel)
    
    for date in dates:
        date_name = os.path.basename(os.path.normpath(date))
        
        for neuro in neurotransmitters:
            print('Loading data')
            # Load files
            train_x_fnames = glob(os.path.join(date, neuro, '*.hdf*'))
            train_x_fnames.sort()

            # Skip if no data
            if len(train_x_fnames) == 0:
                print(f"Skipping {neuro} in {date_name} (no data)")
                continue

            dataset = np.stack([load_image(p)[0] for p in train_x_fnames])
            dataset = dataset[..., np.newaxis]
            coordinates = np.stack([(0, load_image(p)[1], load_image(p)[2]) for p in train_x_fnames])

            predictions = []
            bboxes_list = []
            print('Done loading data')
            for k in tqdm(range(len(dataset[:10])), desc=f'Processing {neuro} from {date_name}', leave=True):
                few_shot.delete_precomputed_embeddings()
                few_shot.delete_references()
                if not few_shot.emb_precomputed:
                    few_shot.pre_compute_embeddings(dataset[k], 
                                                    overlap=(0, 0),
                                                    padding=(0, 0),
                                                    crop_shape=crop_shape, 
                                                    verbose=False,
                                                    batch_size=1)

                few_shot.set_reference_vector(list_coords=[(0, coordinates[k][0], coordinates[k][1])])
                distances = few_shot.get_ds_distances_sameRef(verbose=False)
                pred = few_shot.distance_post_processing(distances, filter_f, upsampling_mode=upsample)
                bboxes_list.append(get_bbox(pred, threshold))
                predictions.append(pred)

            for i in range(5):
                input_img = dataset[i]
                
                plt.figure(figsize=(15, 5), dpi=300)
                plt.suptitle(f"{neuro.capitalize()} - Sample {i+1}", fontsize=14)
        
                # Original image with ground truth and predicted box
                plt.subplot(131)
                if input_img.shape[-1] == 1:
                    plt.imshow(input_img[0,...,0], cmap='gray')
                else:
                    plt.imshow(input_img)
            
                # Plot ground truth point
                plt.scatter(coordinates[i][0], coordinates[i][1], color='red', marker='x', s=100, 
                            label='Ground Truth')
        
                # Plot bounding box
                x1, x2, y1, y2 = bboxes_list[i]
                width, height = x2 - x1, y2 - y1
        
                # Create a rectangle patch
                rect = patches.Rectangle(
                    (x1, y1), width, height, 
                    linewidth=2, edgecolor='blue', facecolor='none', 
                    label='Detection'
                    )
        
        # Add the rectangle to the plot
                plt.gca().add_patch(rect)
                plt.title("Original Image")
                plt.legend()

                # Distance map visualization
                plt.subplot(132)
                plt.imshow(1-predictions[i][0,...], cmap='magma')
                plt.colorbar(label='Normalized Distance')
                plt.title("DINOv2 Distance Map")

                # Binary segmentation
                plt.subplot(133)
                plt.imshow(predictions[i][0,...] < threshold, cmap='gray')
                plt.title(f"Binary Mask (threshold={threshold})")

                plt.tight_layout()
                plt.show()        

In [10]:
#Images_DINOSim(0.25)

In [11]:
def get_fnames():
    files, labels = [], []
    for date in dates:
        for neuro in neurotransmitters:
            fnames = glob(os.path.join(date, neuro, '*.hdf*'))
            fnames.sort()
            files.append(fnames)
            labels.append([neuro.capitalize() for _ in range(len(fnames))])
    return list(zip(np.concatenate(files), np.concatenate(labels)))

In [12]:
def get_embeddings():

    good_idx = [5, 7, 8, 10, 13, 300, 310, 316, 337, 343, 611, 614, 622, 623, 631, 901, 903, 905, 907, 912, 1210, 1211, 1213, 1220, 1221, 1500, 1507, 1509, 1510, 1514]
    files = get_fnames()
    datas = [files[idx][0] for idx in good_idx]
    dataset = [datas[i:i+5] for i in range(0, len(good_idx), 5)]
    distance_maps = []
    
    for k, neuro in enumerate(dataset):
        X = [load_image(file) for file in neuro]
        images = np.transpose(np.stack([X[i][0] for i in range(len(X))]), (0,2,3,1))
        coordinates = [(0, X[i][1], X[i][2]) for i in range(len(X))]
        few_shot.delete_precomputed_embeddings() 
        few_shot.delete_references() 
        if not few_shot.emb_precomputed: 
            few_shot.pre_compute_embeddings(images,  
                                            overlap=(0, 0),
                                            padding=(0, 0),
                                            #crop_shape=crop_shape, 
                                            verbose=False,
                                            batch_size=5) 
        print(vars(few_shot))
#get_embeddings()

Precomputing embeddings
{'model': DinoVisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (blocks): ModuleList(
    (0-11): 12 x NestedTensorBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): MemEffAttention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
      (ls2): LayerScale()
      (drop

In [13]:
def get_embeddings():

    good_idx = [5, 7, 8, 10, 13, 300, 310, 316, 337, 343, 611, 614, 622, 623, 631, 901, 903, 905, 907, 912, 1210, 1211, 1213, 1220, 1221, 1500, 1507, 1509, 1510, 1514]
    files = get_fnames()
    datas = [files[idx][0] for idx in good_idx]
    dataset = [datas[i:i+5] for i in range(0, len(good_idx), 5)]
    distance_maps = []
    
    for k, neuro in enumerate(dataset):
        X = [load_image(file) for file in neuro]
        images = np.transpose(np.stack([X[i][0] for i in range(len(X))]), (0,2,3,1))
        coordinates = [(0, X[i][1], X[i][2]) for i in range(len(X))]
        few_shot.delete_precomputed_embeddings() 
        few_shot.delete_references() 
        if not few_shot.emb_precomputed: 
            few_shot.pre_compute_embeddings(images,  
                                            overlap=(0, 0),
                                            padding=(0, 0),
                                            #crop_shape=crop_shape, 
                                            verbose=False,
                                            batch_size=5) 
        few_shot.set_reference_vector(list_coords=coordinates) 
        distance = few_shot.get_ds_distances_sameRef(verbose=False)
        distance_maps.append(np.where(distance<0.5, distance, 0)) 
    return distance_maps

In [None]:
import torch
x=torch.tensor([[1,2,3,4],[4,3,2,1]])
torch.max(x, dim=1)

torch.return_types.max(
values=tensor([4, 4]),
indices=tensor([3, 0]))

In [None]:
z = torch.randn(2,2,2)
print(z, torch.min(z, dim=1))

tensor([[[ 0.5081,  0.8226],
         [ 0.4566,  2.2191]],

        [[-1.0008,  1.0852],
         [ 0.0750,  0.1285]]]) torch.return_types.min(
values=tensor([[ 0.4566,  0.8226],
        [-1.0008,  0.1285]]),
indices=tensor([[1, 0],
        [0, 1]]))


In [None]:
from glob import glob
import os
import numpy as np
import DINOSim
import h5py
def get_fnames():
    files, labels = [], []
    for date in dates:
        for neuro in neurotransmitters:
            fnames = glob(os.path.join(date, neuro, '*.hdf*'))
            fnames.sort()
            files.append(fnames)
            labels.append([neuro.capitalize() for _ in range(len(fnames))])
    return list(zip(np.concatenate(files), np.concatenate(labels)))

#----------------------------------------------------------------------------------------------------------------------------------------------

def load_image(path):
    with h5py.File(path) as f:
        pre, post = f['annotations/locations'][:]/8
        x, y, z = pre[0].astype(int), pre[1].astype(int), pre[2].astype(int)
        slice_volume = f['volumes/raw'][:][np.newaxis,:,:,z]
        return slice_volume, x, y
    
dataset_path = '/Users/tomw/Documents/MVA/Internship/Cambridge/Datasets/neurotransmitter_data'

dates = glob(os.path.join(dataset_path, '*'))
neurotransmitters = list(map(lambda x: os.path.basename(os.path.normpath(x)), glob(os.path.join(dates[0], '*')))) #@param {type:"string"} 

Device: mps


Using cache found in /Users/tomw/.cache/torch/hub/facebookresearch_dinov2_main


Model loaded


In [2]:
import umap
from sklearn.cluster import KMeans
import torch
features = torch.randn(100,500)
reducer = umap.UMAP(
    n_neighbors=5,
    min_dist=0.05,
    n_components=2,
    metric='cosine'
    )
#umap_embedding = reducer.fit_transform(features)

kmeans = KMeans(n_clusters=5)
kmeans.fit_transform(features)

array([[31.44900799, 20.68695976, 21.75466736, 23.54900088, 22.08817143],
       [32.72367459, 21.61417463, 22.58425609, 23.56488406, 23.33573509],
       [33.17471773, 23.22079605, 22.28389328, 24.32930476, 22.92696658],
       [31.63865942, 22.41327987, 21.41305415, 23.597216  , 22.42280089],
       [32.2151819 , 22.249454  , 23.0327111 , 24.52196459, 23.41859529],
       [32.78029346, 21.70934818, 22.38294921, 24.48022365, 23.31536753],
       [33.69259097, 23.38612547, 23.07941929, 25.06341228, 22.12429661],
       [32.37838898, 22.26516929, 21.26860444, 23.78209522, 22.62658032],
       [32.75509554, 21.88302604, 22.98339458, 24.31074202, 23.44044795],
       [32.09684076, 23.08175658, 21.91220886, 23.55538814, 23.32228357],
       [33.77631639, 23.82941801, 22.99924835, 24.81144018, 24.06240372],
       [32.88315776, 22.77255392, 22.08845556, 24.40518744, 23.04487095],
       [33.9231781 , 22.59427241, 22.0659266 , 23.74388109, 22.86848424],
       [31.46657719, 22.29360776, 21.3

In [4]:
kmeans.inertia_

47233.2788382432