In [2]:
import os
import pickle
import time
import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
import seaborn as sns

import tensorflow as tf
from tf.keras.models import Sequential
from tf.keras.layers import Flatten, Dense, Dropout
from keras.optimizers import SGD, Adam

from avaml.aggregatedata import ForecastDataset, LabeledData, REG_ENG, CsvMissingError
from utils.process import *

In [3]:
tf.__version__

'2.2.0'

# Download, read in, preprocess data

In [4]:
model_prefix = ''
days = 2
regobs_types = list(REG_ENG.keys())
labeled_data = None
try:
    print("Reading csv")
    labeled_data = LabeledData.from_csv(days=days, regobs_types=regobs_types, with_varsom=False)
except CsvMissingError:
    print("Csv missing. Fetching online data. (This takes a long time.)")
    labeled_data = ForecastDataset(regobs_types=regobs_types).label(days=days, with_varsom=False)
    labeled_data.to_csv()

Reading csv


In [5]:
# read in data, extract data and labels
labeled_data = preprocess(labeled_data)
regions = labeled_data.data.reorder_levels([1, 0])
labels = labeled_data.label.reorder_levels([1, 0])

In [6]:
# encode cause and aspect columns in labels
labels = encode_causes(labels)
labels = encode_aspects(labels)

In [7]:
regions

Unnamed: 0_level_0,Unnamed: 1_level_0,precip_most_exposed_0,precip_0,wind_speed_0,wind_change_speed_0,temp_min_0,temp_max_0,temp_lev_0,temp_freeze_lev_0,wind_dir_N_0,wind_dir_NE_0,...,regobs_snowprofile_h_f_4_2,regobs_snowprofile_h_4_f_4_2,regobs_snowprofile_w_m_4_2,regobs_snowprofile_w_w_4_2,regobs_snowprofile_w_v_4_2,regobs_snowprofile_w_s_4_2,regobs_snowprofile_t_max_4_2,regobs_snowprofile_t_mean_4_2,regobs_snowprofile_t_min_4_2,accuracy_2
region,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
3031,2017-11-22,35.75,25.7,18.5,0.0,-10.9,-3.5,1050.0,564.45,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3034,2017-11-22,56.40,47.0,18.5,0.0,-11.2,-2.0,1400.0,795.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3031,2017-11-23,79.70,55.4,15.5,0.0,-5.5,3.0,1050.0,685.78,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3034,2017-11-23,65.90,53.1,18.5,0.0,-5.6,3.0,1400.0,1117.89,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3007,2017-11-29,9.70,9.3,12.0,0.0,-10.0,-4.9,1050.0,107.41,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3031,2021-01-12,0.00,0.0,9.0,0.0,-17.0,-7.0,1100.0,0.00,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3032,2021-01-12,0.00,0.0,12.0,0.0,-21.0,-11.0,1400.0,0.00,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3034,2021-01-12,2.00,0.0,9.0,0.0,-19.0,-11.0,1400.0,0.00,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3035,2021-01-12,0.00,0.0,12.0,0.0,-18.0,-10.0,1100.0,0.00,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
labels

Unnamed: 0_level_0,Unnamed: 1_level_0,CLASS_danger_level,CLASS_emergency_warning,CLASS_problem_1,CLASS_problem_2,CLASS_problem_3,CLASS_problem_amount,CLASS_drift_slab_dist,CLASS_drift_slab_dsize,CLASS_drift_slab_lev_fill,CLASS_drift_slab_prob,...,MULTI_wet_loose_aspect_6,MULTI_wet_loose_aspect_7,MULTI_wet_slab_aspect_0,MULTI_wet_slab_aspect_1,MULTI_wet_slab_aspect_2,MULTI_wet_slab_aspect_3,MULTI_wet_slab_aspect_4,MULTI_wet_slab_aspect_5,MULTI_wet_slab_aspect_6,MULTI_wet_slab_aspect_7
region,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
3031,2017-11-22,4,0,1,0,0,1,3,3,1,5,...,0,0,0,0,0,0,0,0,0,0
3034,2017-11-22,4,0,1,0,0,1,3,3,1,5,...,0,0,0,0,0,0,0,0,0,0
3031,2017-11-23,4,0,1,0,0,1,3,3,1,5,...,0,0,0,0,0,0,0,0,0,0
3034,2017-11-23,4,0,1,0,0,1,3,3,1,5,...,0,0,0,0,0,0,0,0,0,0
3007,2017-11-29,3,1,4,5,0,2,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3031,2021-01-12,3,0,5,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3032,2021-01-12,3,0,5,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3034,2021-01-12,3,0,5,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3035,2021-01-12,3,0,5,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Split data

In [17]:
train_idx = [3007, 3012, 3010, 3009, 3013, 3017, 3014, 3032, 
             3027, 3029, 3022, 3031, 3023, 3037, 3024, 3028]
val_idx =   [3011, 3016, 3035]
test_idx =  [3006, 3015, 3034]

# create training, validation, and test sets and labels
# X_train = pd.concat([regions.loc[idx] for idx in train_idx])
# y_train = pd.concat([labels.loc[idx] for idx in train_idx])

# X_val = pd.concat([regions.loc[idx] for idx in val_idx])
# y_val = pd.concat([labels.loc[idx] for idx in val_idx])

# X_test = pd.concat([regions.loc[idx] for idx in test_idx])
# y_test = pd.concat([labels.loc[idx] for idx in test_idx])

X_train = [np.expand_dims(regions.loc[idx].values, 0) for idx in train_idx]
y_train = [np.expand_dims(labels.loc[idx].values, 0) for idx in train_idx]

X_val = [regions.loc[idx] for idx in val_idx]
y_val = [labels.loc[idx] for idx in val_idx]

X_test = [regions.loc[idx] for idx in test_idx]
y_test = [labels.loc[idx] for idx in test_idx]

In [18]:
for arr in X_train:
    print(arr.shape)

(1, 605, 873)
(1, 605, 873)
(1, 605, 873)
(1, 605, 873)
(1, 604, 873)
(1, 604, 873)
(1, 604, 873)
(1, 604, 873)
(1, 604, 873)
(1, 604, 873)
(1, 604, 873)
(1, 606, 873)
(1, 604, 873)
(1, 352, 873)
(1, 604, 873)
(1, 604, 873)


In [24]:
np.concatenate(X_train, 1).shape

(1, 9418, 873)

In [29]:
regions.shape

(13217, 873)

In [28]:
labels.shape

(13217, 138)

```python
model = models.Sequential([
    layers.Flatten(input_shape=(None, regions.shape[1])),
    layers.Dense(100, activation='relu'),
    layers.Droupout(0.2),
    layers.Dense(labels.shape[1], activation='softmax')
])

model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(X_train, y_train, 
                              batch_size=??, 
                              epochs=3).history
```