# Information
This notebook is about to give you some **examples** about how to use the notebooks and associated functionalities in the **beak** package.

# Examples

## 1. Create a list of raster files based on a pre-defined model definition.
Case:
- You have a list of raster files in a folder or some subfolders.
- You have a model definition for a specific model, stored in the respective model module.
- You need a list of files corresponding to the evidence layers defined in the model definition.

Load model definition

In [1]:
from beak.models import mvt_nat

MODEL = "MVT_PREFERRED"
model = mvt_nat.models[MODEL]

print("Model definition:")
print("-----------------")
for layer, value in model.items():
  if value is True: 
    print(layer)
  

Model definition:
-----------------
Geology_Lithology_Majority
Geology_Lithology_Minority
Geology_Period_Maximum_Majority
Geology_Period_Minimum_Majority
Geology_Dictionary_Calcareous
Geology_Dictionary_Carbonaceous
Geology_Dictionary_FineClastic
Geology_Dictionary_Felsic
Geology_Dictionary_Intermediate
Geology_Dictionary_UltramaficMafic
Geology_Dictionary_Anatectic
Geology_Dictionary_Gneissose
Geology_Dictionary_Schistose
Terrane_Proximity
Geology_PassiveMargin_Proximity
Geology_BlackShale_Proximity
Geology_Fault_Proximity
Geology_Paleolatitude_Period_Maximum
Geology_Paleolatitude_Period_Minimum
Gravity_GOCE_ShapeIndex
Gravity_Bouguer
Gravity_Bouguer_HGM
Gravity_Bouguer_UpCont30km_HGM
Gravity_Bouguer_HGM_Worms_Proximity
Gravity_Bouguer_UpCont30km_HGM_Worms_Proximity
Magnetic_HGM
Magnetic_LongWavelength_HGM
Magnetic_HGM_Worms_Proximity
Magnetic_LongWavelength_HGM_Worms_Proximity
Seismic_LAB_Hoggard
Seismic_Moho


Load file paths: You can use the practical handling from the package, if data are stored in the beak data folder.<br>
Also, you can provide multiple folders to load from

In [2]:
from importlib_resources import files

BASE_PATH = files("beak.data") / "LAWLEY22-EXPORT" / "EPSG_4326_RES_0_05" / "COMPLETE_DATASET"

PATH_NUMERICAL = BASE_PATH / "NUMERICAL_MINMAX"
PATH_CATEGORICAL = BASE_PATH / "CATEGORICAL"


In [3]:
from pathlib import Path
from typing import Sequence, Union
from collections import Counter

def load_model(
    model: dict,
    folders: Sequence[Path],
    file_extensions: Sequence[str] = [".tif", ".tiff"],
    exclude_files: Sequence[Union[Path, str]] = [],
    verbose: int = 1,
):
    # Load evidence layers from model dictionary
    print("Loading model definition...")
    evidence_layers = []
    for layer, value in model.items():
        if value == True:
            evidence_layers.append(layer)

    if not evidence_layers:
        raise ValueError("No valid selection.")
    else:
        print(f"Selected {str(len(evidence_layers))} evidence layers.")
        if verbose == 1:
            [print(f"- {layer}") for layer in evidence_layers]
            
    # Create potential filenames based on evidence layers and file extensions
    evidence_layer_files = []
    for extension in file_extensions:
        for evidence_layer in evidence_layers:
            evidence_layer_files.append(evidence_layer + extension)
        
    # Create file list from provided folders
    print("\nCreate file list...")
    file_list = []
    
    for folder in folders:
        for file in folder.rglob("*"):
            file = Path(file)

            if any(file.suffix.lower() == ext for ext in file_extensions):
                file_list.append(file)

    if not file_list:
        raise ValueError("No files found.")
    else:
        print(f"Found {str(len(file_list))} files.")

    # Check if files exist
    print("\nSearching for corresponding files...")
    matching_list = []
    layers_list = []
    for file in file_list:
        file_name_lower = file.name.lower()
        for layer_file in evidence_layer_files:
            layer_file_lower = layer_file.lower()
            if file_name_lower == layer_file_lower:
                matching_list.append(file)
                layers_list.append(Path(layer_file).stem)
        
    file_list = matching_list
    
    if not file_list:
        raise ValueError("No matching files found.")
    else:
        print(f"Found {str(len(file_list))} matching files.")
        if verbose == 1:
            [print(f"- {file}") for file in file_list]

    # Check if all layers have files
    print("\nEnsuring that all layers have matching files...")
    missing_layers = []
    for layer in evidence_layers:
        if not any(layer in file.stem for file in file_list):
            missing_layers.append(layer)
    
    if missing_layers:
        [print(f"ERROR: No file found for evidence layer '{layer}'.") for layer in missing_layers]
        raise ValueError("\nMissing files. Exit.")
    else:
        print("All layers have matching files.")
        
    # Count the occurrences of each filename
    print("\nChecking files for multiple occurences...")
    filename_counts = Counter([file.name for file in file_list])

    # Print the filenames that occur multiple times and their counts
    if max(filename_counts.values()) == 1:
        print("No duplicates found. All filenames occur only once.")
    else:
        if verbose == 1:    
            for filename, count in filename_counts.items():
                if count > 1:
                    print(f"- '{filename}' occurs {count} times")
        else:
            print(f"Some filenames occur multiple times. Please check with option verbose=1 to see which files are affected.")

    # Exclude files
    if exclude_files:
        print("\nExcluding files from provided list...")
        for i, file in enumerate(file_list):
            if str(file) in exclude_files:
                print(f"- {file}")
                file = Path(file)                
                file_list.remove(file)
                layers_list.remove(layers_list[i])
        
    if len(evidence_layers) != len(file_list):
        print(f"\nWARNING: Number of evidence layers ({str(len(evidence_layers))}) does not match number of files found ({str(len(file_list))}).")
        print(f"Can be ignored if the model contains multiple files per layer, e.g. binary encoded categoricals.")
    if len(layers_list) != len(file_list):
        raise ValueError("Number of layers and does not match number of files found. Please check manually excluded files and file extensions.")
    
    layers_files = zip(layers_list, file_list)

    return evidence_layers, layers_files, filename_counts

In [4]:
# from beak.utilities.io import load_model

layers, matches, counts = load_model(model=model, 
                                     folders=[PATH_NUMERICAL, PATH_CATEGORICAL], 
                                     file_extensions=[".tif", ".tiff"], 
                                     exclude_files=[], 
                                     verbose=0)

Loading model definition...
Selected 31 evidence layers.

Create file list...
Found 812 files.

Searching for corresponding files...
Found 18 matching files.

Ensuring that all layers have matching files...
ERROR: No file found for evidence layer 'Geology_Lithology_Majority'.
ERROR: No file found for evidence layer 'Geology_Lithology_Minority'.
ERROR: No file found for evidence layer 'Geology_Period_Maximum_Majority'.
ERROR: No file found for evidence layer 'Geology_Period_Minimum_Majority'.
ERROR: No file found for evidence layer 'Geology_Dictionary_Calcareous'.
ERROR: No file found for evidence layer 'Geology_Dictionary_Carbonaceous'.
ERROR: No file found for evidence layer 'Geology_Dictionary_FineClastic'.
ERROR: No file found for evidence layer 'Geology_Dictionary_Felsic'.
ERROR: No file found for evidence layer 'Geology_Dictionary_Intermediate'.
ERROR: No file found for evidence layer 'Geology_Dictionary_UltramaficMafic'.
ERROR: No file found for evidence layer 'Geology_Dictionary

ValueError: 
Missing files. Exit.

In [None]:
def test_load_models():
    folder = Path(
        "paste_path_here"
    )
    
    model = { "dummy": False, "utils": True, "eda": True, "nat": True, "rolling_stone": False }
    file_extensions = [".py", ".txt"]
    exclude_files = ["paste_excluded_files_here"]
    
    layers, matches, counts = load_model(model,
                                        folder,
                                        file_extensions,
                                        exclude_files,
                                        )
    
    layers_matched, files_matched = zip(*list(matches))
 
    print(f"\nLayers: {layers}:")
    for i, layer in enumerate(layers_matched):
        if i < len(files_matched):
            print(f"- {layer}: {files_matched[i]}")
            
# endregion: Test code