In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
path = '/content/drive/MyDrive/kw-ai'

In [3]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np

In [4]:
x_train_path = os.path.join(path, 'data/train_features.csv')
y_train_path = os.path.join(path, 'data/train_labels.csv')
x_test_path = os.path.join(path, 'data/test_features.csv')
sub_path = os.path.join(path, 'data/sample_submission.csv')

x_train = pd.read_csv(x_train_path)
y_train = pd.read_csv(y_train_path)
x_test = pd.read_csv(x_test_path)
sub = pd.read_csv(sub_path)

In [5]:
# make dataset
def make_dataset(data):
  ids = data['id'].unique()
  id_data = data.groupby('id')
  series_data = []

  for i in ids:
    df = id_data.get_group(i)
    df = df.drop(['id', 'time'], axis=1)
    series_data.append(df.to_numpy())

  series_data = np.array(series_data)
  return series_data

In [6]:
series_train = make_dataset(x_train)
series_test = make_dataset(x_test)

In [7]:
series_train.shape, series_test.shape

((3125, 600, 6), (782, 600, 6))

In [8]:
import tensorflow as tf

In [9]:
cat_y = tf.keras.utils.to_categorical(y_train['label'])

In [10]:
cat_y.shape

(3125, 61)

In [11]:
BATCH_SIZE = 64
train_dataset = tf.data.Dataset.from_tensor_slices((series_train, cat_y))
train_dataset = train_dataset.batch(BATCH_SIZE).shuffle(1000, seed=42)
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [12]:
next(iter(train_dataset))[0].shape

TensorShape([64, 600, 6])

In [13]:
from tensorflow import keras

In [14]:
# tup
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  
    # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() 
    # default distribution strategy in Tensorflow. Works on CPU and single GPU.
print("REPLICAS: ", strategy.num_replicas_in_sync)

REPLICAS:  1


In [28]:

model = keras.models.Sequential([
          keras.layers.Conv1D(128, 64, input_shape=[600, 6]),
          keras.layers.BatchNormalization(),
          keras.layers.Activation('relu'),
          keras.layers.Dropout(0.2),
          keras.layers.Conv1D(128, 64),
          keras.layers.Activation('relu'),
          keras.layers.GRU(256, return_sequences=True),
          keras.layers.GlobalAveragePooling1D(),
          keras.layers.Dense(61, activation='softmax')
])

model.compile(optimizer=keras.optimizers.RMSprop(0.003*0.2),
              loss='categorical_crossentropy', metrics=['accuracy'])

In [29]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_8 (Conv1D)            (None, 537, 128)          49280     
_________________________________________________________________
batch_normalization_4 (Batch (None, 537, 128)          512       
_________________________________________________________________
activation_8 (Activation)    (None, 537, 128)          0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 537, 128)          0         
_________________________________________________________________
conv1d_9 (Conv1D)            (None, 474, 128)          1048704   
_________________________________________________________________
activation_9 (Activation)    (None, 474, 128)          0         
_________________________________________________________________
gru_4 (GRU)                  (None, 474, 256)         

In [32]:
# checkpoint path
ckpt_name = 'base_ckpt.hdf5'
checkpoint_dir_path = os.path.join(path, 'checkpoint')
checkpoint_path = os.path.join(path, 'checkpoint', ckpt_name)

# check checkpoint paht
# if not exist, make dir
if not(os.path.exists(checkpoint_dir_path)):
  os.mkdir(checkpoint_dir_path)

callbacks_list = [
    # save model weights, at every epoch
    tf.keras.callbacks.ModelCheckpoint(
        filepath = checkpoint_path,
        monitor='loss',
        mode='min',
        save_weights_only=True,
        save_best_only=True
    ),
    # EarlyStopping, if val_loss is not improved five times
    tf.keras.callbacks.EarlyStopping(
        monitor='loss',
        mode='min',
        verbose=1, 
        patience=10
    )
]

In [33]:
hist = model.fit(train_dataset, callbacks=callbacks_list, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100

KeyboardInterrupt: ignored

In [34]:
model.load_weights(checkpoint_path)

In [36]:
pred = model.predict(series_test)

In [37]:
pred.shape

(782, 61)

In [40]:
sub.iloc[:, 1:] = pred

In [42]:
sub.to_csv('base.csv', index=False)