In [1]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = '-1'

import sys

from importlib import reload

import logging
reload(logging)
logging.basicConfig(
    format='%(asctime)s | %(name)s | %(levelname)s : %(message)s',
    level=logging.INFO,
    stream=sys.stdout
)

logger = logging.getLogger('hysped')
logger.setLevel(logging.DEBUG)

import contextily as ctx
import geocube
import geopandas as gpd
import json
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
import re
import seaborn as sns
import shapely
import tensorflow as tf
import zipfile

import wandb
wandb.login()

from functools import partial
from geocube.api.core import make_geocube
from geocube.rasterize import rasterize_points_griddata, rasterize_points_radial
from IPython.display import JSON, HTML
from keras.utils.np_utils import to_categorical
from matplotlib.colors import to_rgba
from shapely.geometry import box, mapping
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import layers
from tqdm.notebook import trange, tqdm
from wandb.keras import WandbMetricsLogger, WandbEvalCallback


gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # configure utilization of GPUs
    try:
        for gpu in gpus:
            # memory growth setting
            tf.config.experimental.set_memory_growth(gpu, True)
            tf.config.set_logical_device_configuration(
                gpu,
                [tf.config.LogicalDeviceConfiguration(memory_limit=256)]
            )
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        module_logger.error(e)


# Make numpy values easier to read.
np.set_printoptions(precision=6, suppress=True)

%matplotlib inline

[34m[1mwandb[0m: Currently logged in as: [33mstevo[0m ([33mhysped[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
cfg = dict(
    crs = 'EPSG:5514',
    figsize = (12,12),
    data_dir = os.path.join('..', 'data'),
    data = 'Sverepec_spolu.zip'
)
display(JSON(cfg))

<IPython.core.display.JSON object>

In [3]:
def split_df(df:pd.DataFrame, split:float=0.7) -> pd.DataFrame:
    rows = df.shape[0]
    at = int(split * rows)
    return df[:at], df[at:]

def nsplit_df(df:pd.DataFrame, splits:[float]=[0.7]) -> pd.DataFrame:
    rows = df.shape[0]
    df_splits = []
    beg = 0
    for split in splits:
        at = int(split * rows)
        df_splits.append(df[beg:at])
        beg = at
    df_splits.append(df[beg:])
    return df_splits

def predict_model(m, X, Y, label_encoder):
    Y_pred = m.predict(X)
    Y_pred = np.argmax(Y_pred, axis=1)
    Y_pred = label_encoder.inverse_transform(Y_pred)
    Y_pred = pd.DataFrame(Y_pred, columns=['Y_pred'])
    return Y_pred

def print_cm(ax, cm, labels, title='Confusion Matrix'):
    sns.heatmap(
        cm,
        annot=True,
        fmt='g',
        ax=ax,
        cmap='Blues',
        # square=True,
        xticklabels=labels,
        yticklabels=labels,
        linewidths=1,
        # annot_kws={'size': 12}
    )

    ax.set_title(title)
    ax.set_xlabel('Predicted label')
    ax.set_ylabel('True label')

    plt.xticks(rotation=90)
    plt.yticks(rotation=0)
    plt.tight_layout()

# Preprocess data

## Load

In [4]:
filename = os.path.join(cfg['data_dir'], cfg['data'])
df = pd.read_csv(filename)

display(HTML('<h2>Data preview:</h2>'))
display(df)
display(HTML('<hr>'))
display(HTML('<h2>Data statistics:</h2>'))
display(df.describe())

Unnamed: 0,b1_12_10_2,b2_12_10_2,b3_12_10_2,b4_12_10_2,b5_12_10_2,b6_12_10_2,b7_12_10_2,b8_12_10_2,b9_12_10_2,b10_12_10_,...,b178_12_10,b179_12_10,b180_12_10,b181_12_10,b182_12_10,b183_12_10,b184_12_10,b185_12_10,b186_12_10,DRUH_DR
0,0.002312,0.002194,0.002344,0.002613,0.002497,0.002717,0.002706,0.002898,0.002586,0.003052,...,0.046049,0.043585,0.043107,0.045929,0.051554,0.052172,0.054623,0.054570,0.054656,CR
1,0.002277,0.002337,0.002361,0.002488,0.002334,0.002713,0.002796,0.002779,0.002748,0.003034,...,0.042515,0.040856,0.039885,0.043333,0.047781,0.048725,0.047153,0.050167,0.052182,CR
2,0.002243,0.002479,0.002402,0.002322,0.002627,0.002943,0.002809,0.002854,0.002725,0.003096,...,0.044127,0.042798,0.042160,0.042548,0.048506,0.049747,0.050620,0.051325,0.054525,CR
3,0.001883,0.002045,0.002057,0.001970,0.002191,0.002052,0.002102,0.001991,0.001920,0.002308,...,0.035453,0.032728,0.033034,0.034035,0.037514,0.040421,0.040852,0.040839,0.042109,CR
4,0.002193,0.001915,0.002164,0.002090,0.002031,0.002108,0.002084,0.002123,0.001950,0.002291,...,0.026016,0.025324,0.024879,0.026462,0.029662,0.030414,0.031456,0.031279,0.032800,CR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33715,0.002469,0.002863,0.003135,0.003027,0.003056,0.003230,0.003328,0.003523,0.003396,0.003746,...,0.029615,0.028368,0.026700,0.027315,0.031556,0.033276,0.032708,0.033844,0.033287,BR
33716,0.001235,0.000966,0.000953,0.001050,0.000891,0.001143,0.000930,0.001075,0.000944,0.001074,...,0.006762,0.007508,0.006300,0.007368,0.007559,0.008111,0.009299,0.008149,0.008633,BR
33717,0.001709,0.001565,0.001225,0.001534,0.001722,0.002060,0.001738,0.001615,0.001751,0.002011,...,0.026855,0.025384,0.025439,0.025496,0.027390,0.029442,0.033317,0.032171,0.032443,BR
33718,0.001823,0.001716,0.001534,0.001772,0.001997,0.002358,0.002048,0.001988,0.001989,0.002377,...,0.029279,0.027654,0.027549,0.027910,0.030475,0.032133,0.035963,0.035193,0.035305,BR


Unnamed: 0,b1_12_10_2,b2_12_10_2,b3_12_10_2,b4_12_10_2,b5_12_10_2,b6_12_10_2,b7_12_10_2,b8_12_10_2,b9_12_10_2,b10_12_10_,...,b177_12_10,b178_12_10,b179_12_10,b180_12_10,b181_12_10,b182_12_10,b183_12_10,b184_12_10,b185_12_10,b186_12_10
count,33720.0,33720.0,33720.0,33720.0,33720.0,33720.0,33720.0,33720.0,33720.0,33720.0,...,33720.0,33720.0,33720.0,33720.0,33720.0,33720.0,33720.0,33720.0,33720.0,33720.0
mean,0.002247,0.002304,0.00243,0.002565,0.002615,0.002691,0.002687,0.002638,0.002487,0.002943,...,0.024226,0.029662,0.028207,0.027531,0.029403,0.031902,0.033641,0.03456,0.035118,0.035639
std,0.000882,0.000927,0.000999,0.001066,0.001103,0.001149,0.001161,0.001158,0.001098,0.001331,...,0.01498,0.01845,0.017572,0.017085,0.018232,0.019761,0.020832,0.021402,0.021732,0.022028
min,0.000369,0.000332,0.000345,0.000441,0.000444,0.00041,0.000456,0.000441,0.000372,0.000461,...,0.000646,0.00058,0.000697,0.000635,0.000652,0.000638,0.000869,0.000687,0.001013,0.000771
25%,0.001588,0.001611,0.001675,0.001761,0.001775,0.001806,0.001789,0.001735,0.001632,0.001898,...,0.011574,0.014084,0.013299,0.01306,0.013952,0.015179,0.016041,0.016421,0.01677,0.016946
50%,0.002207,0.002261,0.002381,0.00252,0.002568,0.00265,0.002657,0.002603,0.002462,0.002909,...,0.022414,0.027441,0.026038,0.025482,0.027207,0.029539,0.031093,0.03197,0.032546,0.033004
75%,0.002824,0.002898,0.003079,0.003261,0.003335,0.003446,0.003457,0.003407,0.00323,0.003837,...,0.03604,0.044239,0.042118,0.04104,0.043859,0.047575,0.050114,0.051479,0.052331,0.053096
max,0.005978,0.00626,0.006729,0.007038,0.007524,0.007449,0.00736,0.007368,0.006909,0.008184,...,0.066931,0.082097,0.078523,0.076033,0.080653,0.085611,0.091917,0.093709,0.09418,0.095604


In [5]:
print('columns (%d): %s' % (df.columns.shape[0], ', '.join(df.columns)))

columns (187): b1_12_10_2, b2_12_10_2, b3_12_10_2, b4_12_10_2, b5_12_10_2, b6_12_10_2, b7_12_10_2, b8_12_10_2, b9_12_10_2, b10_12_10_, b11_12_10_, b12_12_10_, b13_12_10_, b14_12_10_, b15_12_10_, b16_12_10_, b17_12_10_, b18_12_10_, b19_12_10_, b20_12_10_, b21_12_10_, b22_12_10_, b23_12_10_, b24_12_10_, b25_12_10_, b26_12_10_, b27_12_10_, b28_12_10_, b29_12_10_, b30_12_10_, b31_12_10_, b32_12_10_, b33_12_10_, b34_12_10_, b35_12_10_, b36_12_10_, b37_12_10_, b38_12_10_, b39_12_10_, b40_12_10_, b41_12_10_, b42_12_10_, b43_12_10_, b44_12_10_, b45_12_10_, b46_12_10_, b47_12_10_, b48_12_10_, b49_12_10_, b50_12_10_, b51_12_10_, b52_12_10_, b53_12_10_, b54_12_10_, b55_12_10_, b56_12_10_, b57_12_10_, b58_12_10_, b59_12_10_, b60_12_10_, b61_12_10_, b62_12_10_, b63_12_10_, b64_12_10_, b65_12_10_, b66_12_10_, b67_12_10_, b68_12_10_, b69_12_10_, b70_12_10_, b71_12_10_, b72_12_10_, b73_12_10_, b74_12_10_, b75_12_10_, b76_12_10_, b77_12_10_, b78_12_10_, b79_12_10_, b80_12_10_, b81_12_10_, b82_12_10_, b

## prepare data (assuming that the last column is label)

In [6]:
X=df[df.columns[:-1]]
Y=df[df.columns[-1]]

num_classes = Y.unique().shape[0]

label_encoder = LabelEncoder()
Y = label_encoder.fit_transform(Y)
Y_1hot = to_categorical(Y, num_classes=num_classes)

In [7]:
def make_conv1d_model(
    input_shape: tuple,
    filters: int,
    kernel_size: int,
    num_classes: int
):
    model = tf.keras.Sequential()
    model.add(
        tf.keras.layers.Input(
            shape=input_shape
        )
    )
    model.add(
        tf.keras.layers.BatchNormalization()
    )
    for i in range(2):
        model.add(
            tf.keras.layers.Conv1D(
                filters=filters,
                kernel_size=kernel_size,
                activation=tf.nn.sigmoid,
                padding='causal'
            )
        )
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(num_classes * 2))
    model.add(tf.keras.layers.Dense(num_classes))
    model.add(tf.keras.layers.Softmax())
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['categorical_accuracy']
    )
    return model

In [None]:
project_name = 'hysped-conv1d-sweep'

history = None

def main():
    global history
    wandb.init(project=project_name)
    model = make_conv1d_model(
        input_shape=(X.shape[1], 1),
        filters=wandb.config.filters,
        kernel_size=wandb.config.kernel_size,
        num_classes=num_classes
    )
    history = model.fit(
        X,
        Y_1hot,
        epochs=50,
        validation_split=0.7,
        shuffle=True,
        batch_size=wandb.config.batch_size,
        callbacks=[
            WandbMetricsLogger(),
        ],
        verbose=0
    )
    wandb.log(pd.DataFrame(history.history)[-1:].reset_index(drop=True).to_dict(orient='records')[0])

# Define the search space
sweep_configuration = {
    'method': 'random',
    'metric': {
        'goal': 'maximize',
        'name': 'val_categorical_accuracy'
    },
    'parameters': {
        'kernel_size': {
            'values': [x for x in range(3,17)]
        },
        'filters': {
            'values': [2**x for x in range(4,8)]
        },
        'batch_size': {
            'values': [2**x for x in range(8)]
        }
     }
}

# Start the sweep
sweep_id = wandb.sweep(sweep=sweep_configuration, project=project_name)
wandb.agent(sweep_id, function=main, count=25)

Create sweep with ID: obdw3rnc
Sweep URL: https://wandb.ai/hysped/hysped-conv1d-sweep/sweeps/obdw3rnc
2023-04-03 20:34:30,536 | wandb.agents.pyagent | INFO : Starting sweep agent: entity=None, project=None, count=25


[34m[1mwandb[0m: Agent Starting Run: 5k6rbrc7 with config:
[34m[1mwandb[0m: 	batch_size: 1
[34m[1mwandb[0m: 	filters: 16
[34m[1mwandb[0m: 	kernel_size: 10
