This notebook performs the following steps:

- Imports necessary libraries for data manipulation, visualization, and machine learning.
- Defines utility functions to load change events from a JSON file, extract a variety of features into a DataFrame, and plot clusters.
- Loads change events, extracts features, and removes incomplete data.
- Standardizes the features and uses UMAP to reduce dimensionality for clustering and visualization.
- Applies Gaussian Mixture Model clustering to assign cluster labels.
- Updates the original events with the new cluster labels and saves the results to a new JSON file.

In [None]:
# Install missing packages if required: UMAP with pip install umap-learn in notebook

!python -m pip install umap-learn scikit-learn pandas matplotlib
from aimon import ProjectChange
import os
from sklearn.mixture import GaussianMixture
from aimon import ChangeEventCollection

# Set current directory to the location of the notebook (works even if __file__ is not defined)
try:
    current_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    current_dir = os.getcwd()

os.chdir(current_dir)

### Relevant parameters

In [None]:
# Path to the unlabelled change events test file. Lets assume that the events are not labelled yet and we want to label them using UMAP and GMM.
change_events_file = "../test_data/change_events_labelled.json"
outfolder = "../test_data/out"

random_state=3
# Parameter for UMAP 
n_neighbors=100
min_dist=0.1
n_components_umap=2
metric = "manhattan" 

# Parameter for clustering
n_components_GMM=4

os.makedirs(outfolder, exist_ok=True)
# Path to the labelled change events file
change_events_file_labelled = os.path.join(outfolder,"change_events_labelled_using_umap_and_gmm.json")
#Path to saved umap model
umap_model_file = os.path.join(outfolder,"umap_model.joblib")

### Load unlabelled change events and reduce the dimensionality of the feature set using UMAP

In [None]:
ces = ChangeEventCollection.load_from_file(change_events_file)
# Use only events that are labelled
ces.events = ces.filter_events_rule_based({"event_type":{"not_in":"undefined"}})
ces.prep_data_for_umap(ignore_features=["object_id", "event_type", "delta_t_hours", "hull_surface_area", "hull_volume","number_of_points"],
                       supervised_label="event_type")
ces.fit_UMAP(n_neighbors=n_neighbors, 
             min_dist=min_dist, 
             n_components=n_components_umap, 
             metric=metric, 
             random_state=random_state)
ces.transform_UMAP()
ces.save_UMAP_model(umap_model_file)

In [None]:
ces.plot_UMAP()
ces.plot_feature_expression_heatmap(cmap="Reds")

In [None]:
fitted_umap_model = ces.umap_reducer
ces_undef = ChangeEventCollection.load_from_file(change_events_file)
# Use only events that are labelled
# ces_undef.events = ces_undef.filter_events_rule_based({"event_type":{"in":"undefined"}})
ces_undef.prep_data_for_umap(ignore_features=["object_id", "event_type", "delta_t_hours", "hull_surface_area", "hull_volume","number_of_points"],
                             supervised_label="event_type")
ces_undef.umap_reducer = fitted_umap_model
ces_undef.transform_UMAP()
ces_undef.plot_UMAP()
ces_undef.plot_feature_expression_heatmap()

### Gaussian Mixture Model Clustering

In [None]:
import numpy as np
# Clustering using Gaussian Mixture Model (GMM)
gmm = GaussianMixture(n_components=3, random_state=random_state)
ces_undef.y_umap = gmm.fit_predict(ces_undef.X_umap_transformed)
ces_undef.y_label_map = {}
for i in np.unique(ces_undef.y_umap):
    ces_undef.y_label_map[i] = f"Cluster {i}"
# Plot UMAP clustered by GMM
ces_undef.plot_UMAP()

# Save to .geojson for visual inspection

In [None]:
change_prj = ProjectChange(change_event_file = change_events_file_labelled,
                           project_name = os.path.basename(change_events_file_labelled)[:-4],
                           projected_image_folder = "",
                           projected_events_folder = outfolder,
                           epsg=31254)
change_prj.project_change()