In [1]:
import pandas as pd

#scale features
from sklearn.preprocessing import StandardScaler

from pyod.models.ecod import ECOD
from pyod.models.copod import COPOD
from pyod.models.iforest import IForest


In [2]:
from sklearn.neighbors import KDTree

def assign_labels(features1, features2, radius=2.0):
    """
    Assign labels to features1 from features2 where
    1) frame_id matches and
    2) distance between (x, y) <= radius.
    
    Returns an updated copy of features1 with a 'label' column.
    """
    # Ensure we have a 'label' column in features1, set default 0
    if 'label' not in features1.columns:
        features1['label'] = 0
    
    # Convert to avoid SettingWithCopyWarnings:
    features1 = features1.copy()
    features2 = features2.copy()
    
    # We will store subsets of features1 in a dictionary keyed by frame_id
    grouped_f1 = dict(tuple(features1.groupby('frame_id')))
    # Also group features2 by frame_id
    grouped_f2 = dict(tuple(features2.groupby('frame_id')))
    
    # For efficient column access:
    label_col_idx = features1.columns.get_loc('label')
    
    # We'll also store updated subsets in a dictionary
    updated_subsets = {}
    
    # Iterate only over frame_ids that are in features1
    for fid, subset_f1 in grouped_f1.items():
        # Build a KDTree for the (x, y) coords in features1 for this frame
        coords_f1 = subset_f1[['x', 'y']].values
        if len(coords_f1) > 0:
            kdtree = KDTree(coords_f1)
        else:
            updated_subsets[fid] = subset_f1
            continue
        
        # If this frame_id also exists in features2, we do the radius queries
        if fid in grouped_f2:
            subset_f2 = grouped_f2[fid]
            coords_f2 = subset_f2[['x', 'y']].values
            
            # Use query_radius to find neighbors within the given radius
            neighbor_indices_array = kdtree.query_radius(coords_f2, r=radius)
            
            # Convert subset_f1 to numpy for assignment, then we will put it back in a DataFrame
            subset_f1_values = subset_f1.values
            
            # Assign label for each set of neighbor indices
            for i, indices in enumerate(neighbor_indices_array):
                if len(indices) > 0:
                    lbl = subset_f2.iloc[i]['label']
                    subset_f1_values[indices, label_col_idx] = lbl
            
            # Re-wrap in a dataframe
            subset_f1_updated = pd.DataFrame(subset_f1_values, 
                                             columns=subset_f1.columns,
                                             index=subset_f1.index)
        else:
            # No labels to assign if there's no corresponding frame in features2
            subset_f1_updated = subset_f1
        
        updated_subsets[fid] = subset_f1_updated
    
    # Concatenate all updated subsets back together
    updated_features1 = pd.concat(updated_subsets.values(), axis=0)
    
    return updated_features1

In [6]:
import os

data_path = '/mnt/deepstore/Final_DeepPhenotyping/pipeline/output/spike_0428'
annotated_path = '/mnt/deepstore/Final_DeepPhenotyping/figures/figure5_spikein/annotated_slides'
slides = os.listdir(data_path)
slides = [s for s in slides if os.path.isdir(os.path.join(data_path, s))]

#make a dictionary to store the results for each slide
results = {}


for slide in slides: 

    print(f'Processing {slide}')

    features1 = pd.read_parquet(os.path.join(data_path, slide, f'{slide}.parquet.gz'))
    features2 = pd.read_hdf(os.path.join(annotated_path, slide, f'{slide}.hdf5'), key='features')

    features1 = assign_labels(features1, features2, radius=3.0)

    #features1 contains the features, 'label' contains the labels
    del features2 #free memory

    #run the model on features1 columns 'z0' to 'z127' and store the output in 'prediction'

    features1 = features1.dropna()
    features1 = features1.reset_index(drop=True)

    features = features1.loc[:, 'z0':'z127'].values

    print("Scaling Features")

    scaler = StandardScaler()

    features = scaler.fit_transform(features)
    
    contamination = 1e-3

    print("Running COPOD on features")

    clf_name = 'COPOD'
    clf = COPOD(n_jobs=-1, contamination=contamination)
    clf.fit(features)

    print("COPOD Completed, Storing results")

    features1['copod_scores'] = clf.decision_scores_

    print("Running ECOD on features")
    clf_name = 'ECOD'
    clf = ECOD(contamination=contamination,n_jobs=-1)
    clf.fit(features)
    print("ECOD Completed, Storing results")
    features1['ecod_scores'] = clf.decision_scores_

    print("Running IForest on features")
    clf_name = 'IForest'
    clf = IForest(contamination=contamination, n_jobs=-1, n_estimators=100)
    clf.fit(features)
    print("IForest Completed, Storing results")
    features1['iforest_scores'] = clf.decision_scores_

    #write the features1 to a parquet file
    features1 = features1.reset_index(drop=True)
    features1.to_parquet(os.path.join(data_path, slide, f'{slide}_OD.parquet.gz'), index=False)




Processing 0B68620
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:   11.8s remaining:  6.1min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   20.5s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.2s remaining:  1.1min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:    9.2s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B60414
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.0s remaining:  1.6min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   17.3s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.5s remaining:  1.3min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   17.2s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B87816
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.0s remaining:  1.6min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   17.2s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.9s remaining:  1.5min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   17.2s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B87911
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.7s remaining:  1.4min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   16.4s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.3s remaining:  1.7min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   16.4s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B68720
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.6s remaining:  1.3min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   15.3s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.7s remaining:  1.9min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   15.9s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B8B306
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.6s remaining:  1.3min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   18.2s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.0s remaining:  1.5min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   18.9s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B68520
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.2s remaining:  1.7min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   15.9s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.1s remaining:  1.6min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   15.6s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B60214
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.0s remaining:  1.5min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   17.8s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.5s remaining:  1.3min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   17.7s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B60314
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.8s remaining:  1.5min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   16.0s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.8s remaining:  1.4min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   16.0s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B87711
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.9s remaining:  1.5min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   20.4s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.2s remaining:  1.7min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   21.1s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B60114
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.6s remaining:  1.4min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   11.7s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    2.6s remaining:  1.4min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   11.5s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B68820
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.0s remaining:  1.6min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   16.2s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.0s remaining:  1.6min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   15.9s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results


In [5]:
import os

data_path = '/mnt/deepstore/Final_DeepPhenotyping/pipeline/output/spike_0428'
annotated_path = '/mnt/deepstore/Final_DeepPhenotyping/figures/figure5_outlierdetection/trad_features'
slides = os.listdir(data_path)
slides = [s for s in slides if os.path.isdir(os.path.join(data_path, s))]

#make a dictionary to store the results for each slide
results = {}


for slide in slides: 

    print(f'Processing {slide}')

    if slide=='0B8B306':
        continue
    elif slide=='0B60114':
        #skip this slide, it is not in the annotated path
        continue
    elif slide=='0B68520':
        #skip this slide, it is not in the annotated path
        continue

    #if result already exists, skip
    if os.path.exists(os.path.join(data_path, slide, f'{slide}_trad_OD.parquet.gz')):
        print(f"Results for {slide} already exist, skipping")
        continue

    features1 = pd.read_parquet(os.path.join(annotated_path, f'{slide}_trad_ann.parquet.gz'))


    features1 = features1.dropna()
    features1 = features1.reset_index(drop=True)

    #keep all but the last three columns
    features = features1.iloc[:, :-3]

    print("Scaling Features")

    scaler = StandardScaler()

    features = scaler.fit_transform(features)
    
    contamination = 1e-3

    print("Running COPOD on features")

    clf_name = 'COPOD'
    clf = COPOD(n_jobs=-1, contamination=contamination)
    clf.fit(features)

    print("COPOD Completed, Storing results")

    features1['copod_scores'] = clf.decision_scores_

    print("Running ECOD on features")
    clf_name = 'ECOD'
    clf = ECOD(contamination=contamination,n_jobs=-1)
    clf.fit(features)
    print("ECOD Completed, Storing results")
    features1['ecod_scores'] = clf.decision_scores_

    print("Running IForest on features")
    clf_name = 'IForest'
    clf = IForest(contamination=contamination, n_jobs=-1, n_estimators=100)
    clf.fit(features)
    print("IForest Completed, Storing results")
    features1['iforest_scores'] = clf.decision_scores_

    #write the features1 to a parquet file
    features1 = features1.reset_index(drop=True)
    features1.to_parquet(os.path.join(data_path, slide, f'{slide}_trad_OD.parquet.gz'), index=False)




Processing 0B68620
Results for 0B68620 already exist, skipping
Processing 0B60414
Results for 0B60414 already exist, skipping
Processing 0B87816
Results for 0B87816 already exist, skipping
Processing 0B87911
Results for 0B87911 already exist, skipping
Processing 0B68720
Results for 0B68720 already exist, skipping
Processing 0B8B306
Processing 0B68520
Processing 0B60214
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:   18.1s remaining:  9.4min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:  1.1min finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    6.0s remaining:  3.1min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   53.5s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B60314
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    5.6s remaining:  2.9min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   50.5s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    5.6s remaining:  2.9min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   50.8s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B87711
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    7.0s remaining:  3.6min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:  1.1min finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    7.9s remaining:  4.1min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:  1.1min finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B60114
Processing 0B68820
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:   14.6s remaining:  7.5min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   59.0s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    5.8s remaining:  3.0min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   49.7s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results


In [6]:
import os

#add PCA of the features to the features1 dataframe

from sklearn.decomposition import PCA

data_path = '/mnt/deepstore/Final_DeepPhenotyping/pipeline/output/spike_0428'
annotated_path = '/mnt/deepstore/Final_DeepPhenotyping/figures/figure5_outlierdetection/trad_features'
slides = os.listdir(data_path)
slides = [s for s in slides if os.path.isdir(os.path.join(data_path, s))]

#make a dictionary to store the results for each slide
results = {}


for slide in slides: 

    print(f'Processing {slide}')

    if slide=='0B8B306':
        continue
    elif slide=='0B60114':
        #skip this slide, it is not in the annotated path
        continue
    elif slide=='0B68520':
        #skip this slide, it is not in the annotated path
        continue

    #if result already exists, skip
    if os.path.exists(os.path.join(data_path, slide, f'{slide}_trad_PCA_OD.parquet.gz')):
        print(f"Results for {slide} already exist, skipping")
        continue

    features1 = pd.read_parquet(os.path.join(annotated_path, f'{slide}_trad_ann.parquet.gz'))


    features1 = features1.dropna()
    features1 = features1.reset_index(drop=True)

    #keep all but the last three columns
    features = features1.iloc[:, :-3]

    print("Scaling Features")

    scaler = StandardScaler()

    features = scaler.fit_transform(features)

    #run PCA on the features
    pca = PCA(n_components=128)
    features = pca.fit_transform(features)
    
    contamination = 1e-3

    print("Running COPOD on features")

    clf_name = 'COPOD'
    clf = COPOD(n_jobs=-1, contamination=contamination)
    clf.fit(features)

    print("COPOD Completed, Storing results")

    features1['copod_scores'] = clf.decision_scores_

    print("Running ECOD on features")
    clf_name = 'ECOD'
    clf = ECOD(contamination=contamination,n_jobs=-1)
    clf.fit(features)
    print("ECOD Completed, Storing results")
    features1['ecod_scores'] = clf.decision_scores_

    print("Running IForest on features")
    clf_name = 'IForest'
    clf = IForest(contamination=contamination, n_jobs=-1, n_estimators=100)
    clf.fit(features)
    print("IForest Completed, Storing results")
    features1['iforest_scores'] = clf.decision_scores_

    #write the features1 to a parquet file
    features1 = features1.reset_index(drop=True)
    features1.to_parquet(os.path.join(data_path, slide, f'{slide}_trad_PCA_OD.parquet.gz'), index=False)




Processing 0B68620
Results for 0B68620 already exist, skipping
Processing 0B60414
Results for 0B60414 already exist, skipping
Processing 0B87816
Results for 0B87816 already exist, skipping
Processing 0B87911
Results for 0B87911 already exist, skipping
Processing 0B68720
Results for 0B68720 already exist, skipping
Processing 0B8B306
Processing 0B68520
Processing 0B60214
Results for 0B60214 already exist, skipping
Processing 0B60314
Scaling Features
Running COPOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:   21.1s remaining: 10.9min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   36.4s finished


COPOD Completed, Storing results
Running ECOD on features


[Parallel(n_jobs=64)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=64)]: Done   2 out of  64 | elapsed:    3.2s remaining:  1.6min
[Parallel(n_jobs=64)]: Done  64 out of  64 | elapsed:   18.7s finished


ECOD Completed, Storing results
Running IForest on features
IForest Completed, Storing results
Processing 0B87711
Results for 0B87711 already exist, skipping
Processing 0B60114
Processing 0B68820
Results for 0B68820 already exist, skipping


In [None]:
import os

#add PCA of the features to the features1 dataframe

from sklearn.random_project import GaussianRandomProjection

data_path = '/mnt/deepstore/Final_DeepPhenotyping/pipeline/output/spike_0428'
annotated_path = '/mnt/deepstore/Final_DeepPhenotyping/figures/figure5_outlierdetection/trad_features'
slides = os.listdir(data_path)
slides = [s for s in slides if os.path.isdir(os.path.join(data_path, s))]

#make a dictionary to store the results for each slide
results = {}


for slide in slides: 

    print(f'Processing {slide}')

    if slide=='0B8B306':
        continue
    elif slide=='0B60114':
        #skip this slide, it is not in the annotated path
        continue
    elif slide=='0B68520':
        #skip this slide, it is not in the annotated path
        continue

    #if result already exists, skip
    if os.path.exists(os.path.join(data_path, slide, f'{slide}_trad_JL_OD.parquet.gz')):
        print(f"Results for {slide} already exist, skipping")
        continue

    features1 = pd.read_parquet(os.path.join(annotated_path, f'{slide}_trad_ann.parquet.gz'))


    features1 = features1.dropna()
    features1 = features1.reset_index(drop=True)

    #keep all but the last three columns
    features = features1.iloc[:, :-3]

    print("Scaling Features")

    scaler = StandardScaler()

    features = scaler.fit_transform(features)

    #run PCA on the features
    rp = GaussianRandomProjection(n_components=128)
    features = rp.fit_transform(features)
    
    contamination = 1e-3

    print("Running COPOD on features")

    clf_name = 'COPOD'
    clf = COPOD(n_jobs=-1, contamination=contamination)
    clf.fit(features)

    print("COPOD Completed, Storing results")

    features1['copod_scores'] = clf.decision_scores_

    print("Running ECOD on features")
    clf_name = 'ECOD'
    clf = ECOD(contamination=contamination,n_jobs=-1)
    clf.fit(features)
    print("ECOD Completed, Storing results")
    features1['ecod_scores'] = clf.decision_scores_

    print("Running IForest on features")
    clf_name = 'IForest'
    clf = IForest(contamination=contamination, n_jobs=-1, n_estimators=100)
    clf.fit(features)
    print("IForest Completed, Storing results")
    features1['iforest_scores'] = clf.decision_scores_

    #write the features1 to a parquet file
    features1 = features1.reset_index(drop=True)
    features1.to_parquet(os.path.join(data_path, slide, f'{slide}_trad_JL_OD.parquet.gz'), index=False)


