In [None]:
import numpy as np
import pandas as pd
import os 

import tensorflow as tf
from sklearn.model_selection import train_test_split

from DeepModel import DeepModel

%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
model_folder = '../model'

checkpoint_path = model_folder + "/TerrAI.ckpt"

In [None]:
if not os.path.exists(model_folder):
    os.mkdir(model_folder)

## Ensure available GPU utilized correctly

In [None]:
physical_devices = tf.config.list_physical_devices("GPU")

if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

## Load training data

In [None]:
data = pd.read_csv('../Dataset/Train.csv').dropna()
data.shape

In [None]:
# Select main columns to be used in training
main_cols = data.columns.difference(['Sample_ID', 'Label'])
X = data[main_cols]
y = data.Label

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=2022)

In [None]:
X_train.shape, y_train.shape

In [None]:
input_size = (1, X_train.shape[1])
input_size

## Calculate the output class distribution percentage
Used in order to ensure class weight adjusted properly

In [None]:
zero_state_count = len(y[y == 0])
total_label_count = len(y)
zero_state_count, total_label_count

In [None]:
zero_weight = 1
one_weight = 1 / total_label_count * zero_state_count + 1

class_weight = {0: zero_weight, 1: one_weight}
class_weight

## Dataset generator

In [None]:
def fire(X, y, batch_size=64):
    return_index = 0
    max_index = len(y) // batch_size
    
    for i in range(1, max_index):
        return np.expand_dims(X[batch_size * i:batch_size * (i+1)], axis=1), np.expand_dims(y[batch_size * i:batch_size * (i+1)], axis=1)

In [None]:
X_train_np = np.array(X_train)
y_train_np = np.array(y_train)

X_test_np = np.array(X_test)
y_test_np = np.array(y_test)

train_dataset = tf.data.Dataset.from_generator(
    generator=lambda: fire(X=X_train_np, y=y_train_np),
    output_types=(tf.float64, tf.float64),
    output_shapes=([None] + list(input_size), [None, 1]))

test_dataset = tf.data.Dataset.from_generator(
    generator=lambda: fire(X=X_test_np, y=y_test_np),
    output_types=(tf.float64, tf.float64),
    output_shapes=([None] + list(input_size), [None, 1]))

In [None]:
a, b = fire(X=np.array(X_test), y=np.array(y_test))
a.shape, b.shape

In [None]:
model = DeepModel(checkpoint_path, 1, input_size)

In [None]:
model.train(train_dataset, test_dataset, class_weight=class_weight, epochs=50, batch_size=64)