In [3]:
%load_ext autoreload
%autoreload 2

import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(os.path.join('../'))))

import tqdm
import numpy
import torch
import wandb
import pandas
import joblib
import itertools
import torchvision
import gtda.images
import gtda.diagrams
import gtda.homology
import sklearn.pipeline
import sklearn.ensemble
import sklearn.metrics
import tqdm.contrib.itertools
import sklearn.decomposition

import lib.topology

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
BINARIZATION_THESHOLD = 0.4

height_filtration_directions = [
    [ -1, -1 ], [ 1, 1 ], [ 1, -1 ], [ -1, 1 ],
    [ 0, -1 ], [ 0, 1 ], [ -1, 0 ], [ 1, 0 ]
]


radial_filtration_centers = list(itertools.product([ 7, 14, 21 ], [ 7, 14, 21 ]))
radial_filtration_metrics = [ "euclidean", "manhattan", "cosine" ]

density_filtration_metrics = [ "euclidean" , "manhattan", "cosine" ]
density_filtration_radiuses = [ 1, 5, 15 ]

FILTRATIONS = [
    *[ [ gtda.images.HeightFiltration, { 'direction': numpy.array(direction), 'n_jobs': -1 } ] for direction in height_filtration_directions ],
    *[
        [ gtda.images.RadialFiltration, { 'center': numpy.array(center), 'metric': metric, 'n_jobs': -1 } ]
        for center in radial_filtration_centers
        for metric in radial_filtration_metrics
    ],
    [ gtda.images.DilationFiltration, { 'n_jobs': -1 } ],
    [ gtda.images.ErosionFiltration, { 'n_jobs': -1 } ],
    [ gtda.images.SignedDistanceFiltration, { 'n_jobs': -1 } ],
    *[
        [ gtda.images.DensityFiltration, { 'radius': radius, 'metric': metric, 'n_jobs': -1 } ]
        for metric in density_filtration_metrics
        for radius in density_filtration_radiuses
    ]
]

In [5]:
train = torchvision.datasets.MNIST('mnist', train = True, download = True)
test = torchvision.datasets.MNIST('mnist', train = False, download = True)

train_images = numpy.array([ item[0] for item in train ])
train_labels = numpy.array([ item[1] for item in train ])

test_images = numpy.array([ item[0] for item in test ])
test_labels = numpy.array([ item[1] for item in test ])

In [6]:
def make_filtrations(images: numpy.ndarray):
    images_bin = gtda.images.Binarizer(threshold = BINARIZATION_THESHOLD).fit_transform(images)
    filtrations = [
        filtration[0](**filtration[1]).fit_transform(images_bin)
        for filtration in tqdm.tqdm(FILTRATIONS, desc = 'filtrations')
    ]
    return [ images, images_bin ] + filtrations


def make_point_clouds(images: numpy.ndarray):
    def make_point_cloud(image, threshold):
        point_cloud = [ ]
        for i in range(image.shape[0]):
            for j in range(image.shape[1]):
                if image[i][j] < threshold:
                    continue
                point_cloud.append([ i, j, image[i][j] ])
        return numpy.array(point_cloud)

    def _make_point_clouds(imgs, threshold):
        imgs = numpy.swapaxes(numpy.flip(imgs, axis = 1), 1, 2)
        return [ make_point_cloud(image, threshold) for image in tqdm.tqdm(imgs, desc = 'point_clouds') ]

    images_bin = gtda.images.Binarizer(threshold = BINARIZATION_THESHOLD).fit_transform(images)
    point_cloud = gtda.images.ImageToPointCloud().fit_transform(images_bin)
    return [ point_cloud, _make_point_clouds(images, 50) ]

In [7]:
def make_filtration_diagrams(images: numpy.ndarray):
    filtrations = make_filtrations(images)
    all_filtrations = [ ]
    for diagrams in zip(*filtrations):
        all_filtrations.extend(diagrams)

    print('Making filtration diagrams')
    persistence = gtda.homology.CubicalPersistence(homology_dimensions = [ 0, 1 ], n_jobs = -1)
    return persistence.fit_transform(all_filtrations)

def make_point_cloud_diagrams(images: numpy.ndarray):
    point_clouds = make_point_clouds(images)
    all_point_clouds = [ ]
    for diagrams in zip(*point_clouds):
        all_point_clouds.extend(diagrams)
        
    print('Making point cloud diagrams')
    persistence = gtda.homology.VietorisRipsPersistence(homology_dimensions = [ 0, 1, 2 ], n_jobs = -1)
    return persistence.fit_transform(all_point_clouds)

In [8]:
def make_filtration_features(images: numpy.ndarray):
    feature_calculator = lib.topology.FeatureCalculator(n_jobs = -1, verbose = True)
    return feature_calculator.calc_features(make_filtration_diagrams(images), prefix = 'filtration')

def make_point_cloud_features(images: numpy.ndarray):
    feature_calculator = lib.topology.FeatureCalculator(n_jobs = -1, verbose = True)
    return feature_calculator.calc_features(make_point_cloud_diagrams(images), prefix = 'point_cloud')

def make_features(images: numpy.ndarray):
    filtration_features = make_filtration_features(images)
    point_cloud_features = make_point_cloud_features(images)

    filtrations_per_image = len(filtration_features) // len(images)
    point_clouds_per_image = len(point_cloud_features) // len(images)
    
    features = [ ]
    for img_idx in tqdm.trange(len(images), desc = 'combine features'):
        idx1 = filtrations_per_image * img_idx
        idx2 = filtrations_per_image * (img_idx + 1)
        filtration_features_flat = filtration_features[idx1:idx2].to_numpy().flatten()
        
        idx1 = point_clouds_per_image * img_idx
        idx2 = point_clouds_per_image * (img_idx + 1)
        point_cloud_features_flat = point_cloud_features[idx1:idx2].to_numpy().flatten()
        
        features_flat = numpy.concatenate((filtration_features_flat, point_cloud_features_flat), axis = 0)
        features.append(features_flat)

    return numpy.array(features)

In [7]:
train_features = None
for i in range(0, 60000, 4000):
    print(f'Calculating {i} - {i + 4000}')
    train_features_batch = make_features(train_images[i:i + 4000])
    if train_features is None:
        train_features = train_features_batch
    else:
        train_features = numpy.concatenate((train_features, train_features_batch), axis = 0)
    numpy.save("train_features.npy", train_features)

Calculating 0 - 4000


filtrations: 100%|██████████| 47/47 [00:15<00:00,  3.00it/s]


Making filtration diagrams
Filtered diagrams: (196000, 71, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:04<00:00, 3059.93it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:03<00:00, 3109.61it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:01<00:00, 3162.92it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [00:58<00:00, 3332.56it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:06<00:00,  7.46s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:48<00:00, 1162.77it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1317.18it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 316, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2067.62it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2073.75it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2028.92it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2166.53it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:17<00:00,  1.01s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:10<00:00, 745.92it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 3152.93it/s]


Calculating 4000 - 8000


filtrations: 100%|██████████| 47/47 [00:13<00:00,  3.51it/s]


Making filtration diagrams
Filtered diagrams: (196000, 73, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:02<00:00, 3123.69it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:03<00:00, 3102.87it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:02<00:00, 3127.25it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:02<00:00, 3147.68it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:04<00:00,  7.34s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:51<00:00, 1145.40it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1302.45it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 333, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2105.01it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2032.45it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2082.97it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2052.73it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:17<00:00,  1.03s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:11<00:00, 705.98it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2910.76it/s]


Calculating 8000 - 12000


filtrations: 100%|██████████| 47/47 [00:13<00:00,  3.51it/s]


Making filtration diagrams
Filtered diagrams: (196000, 76, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:03<00:00, 3104.97it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:01<00:00, 3184.46it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:01<00:00, 3177.51it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:01<00:00, 3169.73it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:09<00:00,  7.61s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:50<00:00, 1151.10it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1273.91it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 339, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2118.33it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2136.77it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2111.03it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2150.90it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:17<00:00,  1.04s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:10<00:00, 754.30it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 3009.05it/s]


Calculating 12000 - 16000


filtrations: 100%|██████████| 47/47 [00:13<00:00,  3.48it/s]


Making filtration diagrams
Filtered diagrams: (196000, 62, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:03<00:00, 3094.91it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:01<00:00, 3170.27it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:00<00:00, 3219.82it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:02<00:00, 3154.26it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [01:53<00:00,  6.68s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:57<00:00, 1103.74it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1270.88it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 308, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2078.63it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2025.54it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2079.40it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2089.44it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:17<00:00,  1.03s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:11<00:00, 721.79it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2847.36it/s]


Calculating 16000 - 20000


filtrations: 100%|██████████| 47/47 [00:14<00:00,  3.28it/s]


Making filtration diagrams
Filtered diagrams: (196000, 58, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:05<00:00, 3007.91it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:03<00:00, 3083.48it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:03<00:00, 3078.99it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:03<00:00, 3093.23it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:00<00:00,  7.07s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:59<00:00, 1088.99it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1272.66it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 299, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2168.93it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2141.28it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2121.67it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2112.35it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:17<00:00,  1.02s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:10<00:00, 755.35it/s] 
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2374.34it/s]


Calculating 20000 - 24000


filtrations: 100%|██████████| 47/47 [00:13<00:00,  3.41it/s]


Making filtration diagrams
Filtered diagrams: (196000, 62, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:02<00:00, 3159.61it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:01<00:00, 3197.91it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:00<00:00, 3260.22it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [00:59<00:00, 3314.06it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [01:56<00:00,  6.83s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:52<00:00, 1139.13it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1242.03it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 317, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2107.13it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2130.01it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2112.83it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2134.40it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:17<00:00,  1.02s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:10<00:00, 738.33it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2740.59it/s]


Calculating 24000 - 28000


filtrations: 100%|██████████| 47/47 [00:13<00:00,  3.49it/s]


Making filtration diagrams
Filtered diagrams: (196000, 53, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:03<00:00, 3083.24it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:01<00:00, 3166.67it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:02<00:00, 3160.01it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:01<00:00, 3164.38it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [01:52<00:00,  6.60s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:51<00:00, 1146.08it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1277.36it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 349, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2110.46it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2137.13it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2081.48it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2124.30it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:17<00:00,  1.05s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:10<00:00, 728.06it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2847.74it/s]


Calculating 28000 - 32000


filtrations: 100%|██████████| 47/47 [00:13<00:00,  3.45it/s]


Making filtration diagrams
Filtered diagrams: (196000, 68, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:04<00:00, 3029.77it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:03<00:00, 3098.44it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:01<00:00, 3208.89it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [00:59<00:00, 3310.62it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:05<00:00,  7.35s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:52<00:00, 1137.01it/s]
point_clouds: 100%|██████████| 4000/4000 [00:02<00:00, 1348.05it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 326, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:04<00:00, 1997.54it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:04<00:00, 1798.61it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2033.42it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:04<00:00, 1996.86it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:18<00:00,  1.11s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:11<00:00, 705.23it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2741.32it/s]


Calculating 32000 - 36000


filtrations: 100%|██████████| 47/47 [00:14<00:00,  3.16it/s]


Making filtration diagrams
Filtered diagrams: (196000, 59, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:06<00:00, 2955.95it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:06<00:00, 2966.10it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:05<00:00, 2978.73it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:07<00:00, 2887.47it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:07<00:00,  7.53s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:53<00:00, 1126.60it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1210.64it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 290, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2235.06it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2292.05it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2094.05it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2291.60it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:15<00:00,  1.09it/s]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:10<00:00, 771.61it/s] 
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2991.34it/s]


Calculating 36000 - 40000


filtrations: 100%|██████████| 47/47 [00:13<00:00,  3.42it/s]


Making filtration diagrams
Filtered diagrams: (196000, 73, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:06<00:00, 2966.10it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:04<00:00, 3032.61it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:07<00:00, 2885.54it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:06<00:00, 2954.96it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:20<00:00,  8.25s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:49<00:00, 1158.94it/s]
point_clouds: 100%|██████████| 4000/4000 [00:02<00:00, 1340.45it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 322, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2111.00it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2118.50it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2167.95it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2090.21it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:17<00:00,  1.03s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:11<00:00, 688.17it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2615.10it/s]


Calculating 40000 - 44000


filtrations: 100%|██████████| 47/47 [00:13<00:00,  3.44it/s]


Making filtration diagrams
Filtered diagrams: (196000, 60, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:05<00:00, 3000.42it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:01<00:00, 3168.99it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:00<00:00, 3236.40it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:01<00:00, 3166.87it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:00<00:00,  7.11s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:54<00:00, 1121.78it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1278.67it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 383, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2100.78it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:04<00:00, 1983.40it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2108.30it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2131.85it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:19<00:00,  1.13s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:10<00:00, 742.39it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2826.91it/s]


Calculating 44000 - 48000


filtrations: 100%|██████████| 47/47 [00:13<00:00,  3.50it/s]


Making filtration diagrams
Filtered diagrams: (196000, 77, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:03<00:00, 3092.48it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:02<00:00, 3135.84it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:00<00:00, 3238.69it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:01<00:00, 3182.43it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:17<00:00,  8.06s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:57<00:00, 1106.62it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1253.75it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 320, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2020.34it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2055.41it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2058.76it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2050.93it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:17<00:00,  1.06s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:11<00:00, 702.80it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2808.57it/s]


Calculating 48000 - 52000


filtrations: 100%|██████████| 47/47 [00:14<00:00,  3.34it/s]


Making filtration diagrams
Filtered diagrams: (196000, 63, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:05<00:00, 3005.12it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:05<00:00, 3013.09it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:03<00:00, 3089.28it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:05<00:00, 2998.38it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:12<00:00,  7.81s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [03:13<00:00, 1014.94it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1231.54it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 314, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2009.51it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2217.54it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2186.29it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2106.27it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:17<00:00,  1.06s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:11<00:00, 699.41it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2314.64it/s]


Calculating 52000 - 56000


filtrations: 100%|██████████| 47/47 [00:14<00:00,  3.25it/s]


Making filtration diagrams
Filtered diagrams: (196000, 74, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [01:06<00:00, 2951.26it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [01:05<00:00, 2975.92it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:05<00:00, 3012.63it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:06<00:00, 2954.91it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:29<00:00,  8.77s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [03:09<00:00, 1035.18it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1249.12it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 299, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:03<00:00, 2020.56it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2020.39it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:04<00:00, 1970.05it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2031.23it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:17<00:00,  1.02s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:11<00:00, 705.53it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2759.69it/s]


Calculating 56000 - 60000


filtrations: 100%|██████████| 47/47 [00:14<00:00,  3.22it/s]


Making filtration diagrams
Filtered diagrams: (196000, 61, 3)
Calculating Betti features


filtration betti: 100%|██████████| 196000/196000 [00:59<00:00, 3276.31it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 196000/196000 [00:59<00:00, 3300.16it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 196000/196000 [01:04<00:00, 3035.71it/s]
filtration silhouette-2: 100%|██████████| 196000/196000 [01:06<00:00, 2950.53it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [02:12<00:00,  7.77s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 196000/196000 [02:59<00:00, 1093.26it/s]
point_clouds: 100%|██████████| 4000/4000 [00:03<00:00, 1271.95it/s]


Making point cloud diagrams
Filtered diagrams: (8000, 334, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 8000/8000 [00:04<00:00, 1988.57it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 8000/8000 [00:03<00:00, 2055.54it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 8000/8000 [00:03<00:00, 2075.32it/s]
point_cloud silhouette-2: 100%|██████████| 8000/8000 [00:03<00:00, 2100.88it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:18<00:00,  1.08s/it]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 8000/8000 [00:11<00:00, 715.43it/s]
combine features: 100%|██████████| 4000/4000 [00:01<00:00, 2660.91it/s]


In [11]:
test_features = None
for i in range(0, 10000, 2500):
    print(f'Calculating {i} - {i + 2500}')
    test_features_batch = make_features(test_images[i:i + 2500])
    if test_features is None:
        test_features = test_features_batch
    else:
        test_features = numpy.concatenate((test_features, test_features_batch), axis = 0)
    numpy.save("test_features.npy", test_features)

Calculating 0 - 2500


filtrations: 100%|██████████| 47/47 [00:08<00:00,  5.73it/s]


Making filtration diagrams
Filtered diagrams: (122500, 56, 3)
Calculating Betti features


filtration betti: 100%|██████████| 122500/122500 [00:42<00:00, 2869.31it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 122500/122500 [00:41<00:00, 2976.50it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 122500/122500 [00:40<00:00, 3011.48it/s]
filtration silhouette-2: 100%|██████████| 122500/122500 [00:43<00:00, 2843.11it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [01:16<00:00,  4.52s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 122500/122500 [01:56<00:00, 1050.70it/s]
point_clouds: 100%|██████████| 2500/2500 [00:03<00:00, 637.29it/s]


Making point cloud diagrams
Filtered diagrams: (5000, 314, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 5000/5000 [00:02<00:00, 1835.59it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 5000/5000 [00:02<00:00, 1905.79it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 5000/5000 [00:02<00:00, 1917.73it/s]
point_cloud silhouette-2: 100%|██████████| 5000/5000 [00:02<00:00, 1914.69it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:13<00:00,  1.23it/s]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 5000/5000 [00:08<00:00, 608.05it/s]
combine features: 100%|██████████| 2500/2500 [00:01<00:00, 1584.74it/s]


Calculating 2500 - 5000


filtrations: 100%|██████████| 47/47 [00:08<00:00,  5.34it/s]


Making filtration diagrams
Filtered diagrams: (122500, 72, 3)
Calculating Betti features


filtration betti: 100%|██████████| 122500/122500 [00:42<00:00, 2896.55it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 122500/122500 [00:42<00:00, 2866.93it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 122500/122500 [00:42<00:00, 2908.36it/s]
filtration silhouette-2: 100%|██████████| 122500/122500 [00:41<00:00, 2927.36it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [01:24<00:00,  4.98s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 122500/122500 [01:52<00:00, 1087.58it/s]
point_clouds: 100%|██████████| 2500/2500 [00:01<00:00, 1264.62it/s]


Making point cloud diagrams
Filtered diagrams: (5000, 331, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 5000/5000 [00:02<00:00, 1931.68it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 5000/5000 [00:02<00:00, 1937.96it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 5000/5000 [00:02<00:00, 1713.00it/s]
point_cloud silhouette-2: 100%|██████████| 5000/5000 [00:02<00:00, 1866.97it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:12<00:00,  1.32it/s]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 5000/5000 [00:07<00:00, 640.39it/s]
combine features: 100%|██████████| 2500/2500 [00:01<00:00, 2491.56it/s]


Calculating 5000 - 7500


filtrations: 100%|██████████| 47/47 [00:08<00:00,  5.67it/s]


Making filtration diagrams
Filtered diagrams: (122500, 60, 3)
Calculating Betti features


filtration betti: 100%|██████████| 122500/122500 [00:41<00:00, 2977.84it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 122500/122500 [00:41<00:00, 2923.66it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 122500/122500 [00:40<00:00, 3027.95it/s]
filtration silhouette-2: 100%|██████████| 122500/122500 [00:40<00:00, 3038.26it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [01:13<00:00,  4.32s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 122500/122500 [01:51<00:00, 1097.65it/s]
point_clouds: 100%|██████████| 2500/2500 [00:02<00:00, 1242.71it/s]


Making point cloud diagrams
Filtered diagrams: (5000, 307, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 5000/5000 [00:02<00:00, 1925.89it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 5000/5000 [00:02<00:00, 1849.64it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 5000/5000 [00:02<00:00, 1828.87it/s]
point_cloud silhouette-2: 100%|██████████| 5000/5000 [00:02<00:00, 1878.84it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:14<00:00,  1.19it/s]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 5000/5000 [00:07<00:00, 639.21it/s] 
combine features: 100%|██████████| 2500/2500 [00:00<00:00, 2575.55it/s]


Calculating 7500 - 10000


filtrations: 100%|██████████| 47/47 [00:08<00:00,  5.70it/s]


Making filtration diagrams
Filtered diagrams: (122500, 62, 3)
Calculating Betti features


filtration betti: 100%|██████████| 122500/122500 [00:40<00:00, 2988.06it/s]


Calculating landscape features


filtration landscape: 100%|██████████| 122500/122500 [00:40<00:00, 3015.45it/s]


Calculating silhouette features


filtration silhouette-1: 100%|██████████| 122500/122500 [00:40<00:00, 3016.72it/s]
filtration silhouette-2: 100%|██████████| 122500/122500 [00:40<00:00, 3056.33it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


filtration amplitudes: 100%|██████████| 17/17 [01:14<00:00,  4.39s/it]


Calculating lifetime features


filtration lifetime: 100%|██████████| 122500/122500 [01:49<00:00, 1113.67it/s]
point_clouds: 100%|██████████| 2500/2500 [00:02<00:00, 1233.01it/s]


Making point cloud diagrams
Filtered diagrams: (5000, 325, 3)
Calculating Betti features


point_cloud betti: 100%|██████████| 5000/5000 [00:02<00:00, 1892.61it/s]


Calculating landscape features


point_cloud landscape: 100%|██████████| 5000/5000 [00:02<00:00, 1958.14it/s]


Calculating silhouette features


point_cloud silhouette-1: 100%|██████████| 5000/5000 [00:02<00:00, 1895.34it/s]
point_cloud silhouette-2: 100%|██████████| 5000/5000 [00:02<00:00, 1857.95it/s]


Calculating entropy features
Calculating number of points features
Calculating amplitude features


point_cloud amplitudes: 100%|██████████| 17/17 [00:12<00:00,  1.32it/s]


Calculating lifetime features


point_cloud lifetime: 100%|██████████| 5000/5000 [00:07<00:00, 658.14it/s]
combine features: 100%|██████████| 2500/2500 [00:00<00:00, 2540.88it/s]


In [9]:
train_features = numpy.load("train_features.npy")
test_features = numpy.load("test_features.npy")

In [4]:
train_features = numpy.minimum(train_features, 1e9)
test_features = numpy.minimum(test_features, 1e9)

In [5]:
rf = sklearn.ensemble.RandomForestClassifier(n_jobs = -1, n_estimators = 1000)
rf.fit(train_features, train_labels)
print(rf.score(test_features, test_labels))

0.9736


In [8]:
pca = sklearn.decomposition.PCA(n_components = 8, random_state = 42)
train_features_reduced = pca.fit_transform(train_features)
print('Explained variance', round(pca.explained_variance_ratio_.sum(), 2))
print([ round(x, 3) for x in pca.explained_variance_ratio_ ])
test_features_reduced = pca.transform(test_features)

Explained variance 1.0
[0.476, 0.381, 0.143, 0.0, 0.0, 0.0, 0.0, 0.0]


In [None]:
rf_reduced = sklearn.ensemble.RandomForestClassifier(n_jobs = -1, n_estimators = 10000)
rf_reduced.fit(train_features_reduced, train_labels)
print(rf_reduced.score(test_features_reduced, test_labels))