# 02-Generate-Trainingset.ipynb

## Generate a training set from the example data downloaded in the previous notebook


In [1]:
import sys, os

root_dir = os.path.abspath('..')
sys.path.append(root_dir)

import glob
import torch
import xarray as xr
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime

import voodoonet


  from .autonotebook import tqdm as notebook_tqdm


# Fetch the LV0 files from the sample data directory


In [2]:
# fetch the LV0 files
rpg_lv0_files_dir = os.path.join(root_dir, 'sample_data', 'rpg-fmcw-94', '*.LV0')
all_lv0_files = sorted(glob.glob(rpg_lv0_files_dir))
print(f'Found {len(all_lv0_files)} LV0 files')

Found 24 LV0 files


In [3]:
# fetch the classification file name
class_files_dir = os.path.join(root_dir, 'sample_data', 'classification', '*.nc')
class_files = sorted(glob.glob(class_files_dir))
print(f'Found {len(class_files)} classification files')

Found 1 classification files


# Split the LV0 into 10 folds 

In [4]:
# fold number 0 to 9 in a list
fn_X = []

for i in range(10):
    fn_X.append(all_lv0_files[i::10]) # use every 10th file
    print(f'Fold number {i} has {len(fn_X[i])} files :: {fn_X[i]}')


Fold number 0 has 3 files :: ['/Users/schimmel/code/voodoo_tutorial/Cloudnet-VOODOO-Processing/sample_data/rpg-fmcw-94/230217_000004_P03_ZEN.LV0', '/Users/schimmel/code/voodoo_tutorial/Cloudnet-VOODOO-Processing/sample_data/rpg-fmcw-94/230217_100004_P03_ZEN.LV0', '/Users/schimmel/code/voodoo_tutorial/Cloudnet-VOODOO-Processing/sample_data/rpg-fmcw-94/230217_200001_P03_ZEN.LV0']
Fold number 1 has 3 files :: ['/Users/schimmel/code/voodoo_tutorial/Cloudnet-VOODOO-Processing/sample_data/rpg-fmcw-94/230217_010000_P03_ZEN.LV0', '/Users/schimmel/code/voodoo_tutorial/Cloudnet-VOODOO-Processing/sample_data/rpg-fmcw-94/230217_110000_P03_ZEN.LV0', '/Users/schimmel/code/voodoo_tutorial/Cloudnet-VOODOO-Processing/sample_data/rpg-fmcw-94/230217_210002_P03_ZEN.LV0']
Fold number 2 has 3 files :: ['/Users/schimmel/code/voodoo_tutorial/Cloudnet-VOODOO-Processing/sample_data/rpg-fmcw-94/230217_020002_P03_ZEN.LV0', '/Users/schimmel/code/voodoo_tutorial/Cloudnet-VOODOO-Processing/sample_data/rpg-fmcw-94/23

In [5]:
z_limits_list = [(-60, 5), (-55, 10), (-50, 20)]


for z_limits in z_limits_list:
    # generate training data set
    model_options = voodoonet.utils.VoodooOptions(
        device="mps",           # use `mps` for training on MacOS, `cuda:X` for NVIDIA GPUs, else `cpu`
        z_limits = z_limits    # spectrum normalization limits in dBZ, controls sensitivity of the model, usually minimum and maximum values of the data set
        )

    # define output file name
    current_date = datetime.now().strftime('%Y%m%d_%H%M%S')
    training_dataset_dir = os.path.join(root_dir, 'sample_data', 'training_datasets')
    training_dataset_file = f'training-data-set-eriswil-{model_options.z_limits[0]}-{model_options.z_limits[1]}dBZ.pt'
    training_dataset_file = os.path.join(training_dataset_dir, training_dataset_file)

    voodoonet.generate_training_data(all_lv0_files, class_files, training_dataset_file, options=model_options)

    print(f'Training data set saved to {os.path.abspath(training_dataset_file)}')

Training data set saved to /Users/schimmel/code/voodoo_tutorial/Cloudnet-VOODOO-Processing/sample_data/training_datasets/training-data-set-eriswil--60-5dBZ.pt
Training data set saved to /Users/schimmel/code/voodoo_tutorial/Cloudnet-VOODOO-Processing/sample_data/training_datasets/training-data-set-eriswil--55-10dBZ.pt
Training data set saved to /Users/schimmel/code/voodoo_tutorial/Cloudnet-VOODOO-Processing/sample_data/training_datasets/training-data-set-eriswil--50-20dBZ.pt
