Always stick to the Standard Code after you have loaded your data!!

In [1]:
import tensorflow as tf
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import string
from sklearn.preprocessing import LabelEncoder

**Note:** Make sure you read the **dataset** in the variable dataset! So that you can use the code diretly.

In [None]:
dataset = pd.read_csv('weather.csv')
dataset.head()

# Default Models: 

For classification and Regression:

In [None]:
features = 9
classes = 1
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=[features]),
    tf.keras.layers.Dense(128, activation='elu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(classes)
])

In [None]:
features = 9
classes = 1
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(50, return_sequences=True, input_shape=[None, features]),
    tf.keras.layers.LSTM(50),
    tf.keras.layers.Dense(classes),
])

In [None]:
# Defining the Model
embedding_dim = 64

model = tf.keras.Sequential([
  tf.keras.layers.Embedding(input_dim=VOCABSIZE,
                            output_dim=embedding_dim,
                            mask_zero=True,
                            input_length=MAX_SEQUENCE),
  tf.keras.layers.LSTM(50, return_sequences=True),
  tf.keras.layers.LSTM(50),
  tf.keras.layers.Dense(1)])

In [None]:
embedding = 64
model = tf.keras.models.Sequential([
                                    tf.keras.layers.Embedding(input_dim=VOCABSIZE,
                                                              output_dim=embedding,
                                                              input_length=MAX_SEQUENCE,
                                                              mask_zero=True),
                                    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
                                    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
                                    tf.keras.layers.Dense(64, activation='relu'),
                                    tf.keras.layers.Dropout(0.5),
                                    tf.keras.layers.Dense(5)
])

# Vision

In [None]:
root_path = os.path.join(os.curdir, 'dataset', 'cats_and_dogs')

# Let's read the dataset
BATCH = 32
IMG_HEIGHT = IMG_WIDTH = 224
BUFFER = 1000
AUTOTUNE = tf.data.experimental.AUTOTUNE

# Include validation_split, subsetname and seed when needed
train = tf.keras.preprocessing.image_dataset_from_directory(
    os.path.join(root_path, 'train'),
    batch_size=BATCH,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
)
val = tf.keras.preprocessing.image_dataset_from_directory(
    os.path.join(root_path, 'validation'),
    batch_size=BATCH,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
)

train = train.cache().shuffle(BUFFER).prefetch(AUTOTUNE)
val = val.cache().prefetch(AUTOTUNE)

In [None]:
augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomRotation(factor=0.2),
    tf.keras.layers.RandomContrast(factor=0.4)
])

In [None]:
rescaled = tf.keras.layers.Rescaling(1. /255)

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    rescaled,
    augmentation,
    tf.keras.layers.Conv2D(16, 3, activation="relu"),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(32, 3, activation="relu"),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, activation="relu"),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    tf.keras.layers.Conv2D(16, 3, activation="relu"),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(32, 3, activation="relu"),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, 3, activation="relu"),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [None]:
print(model.summary())

model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              metrics=['accuracy'])

earlystop = tf.keras.callbacks.EarlyStopping(patience=15)
checkpoint = tf.keras.callbacks.ModelCheckpoint('best_classifier/', save_best_only=True)

history = model.fit(train, validation_data=val, epochs=50, callbacks=[earlystop, checkpoint])

pd.DataFrame(history.history).plot(figsize=(10, 10))
plt.grid()
plt.show()

In [None]:

# Let's try transfer learning, Always include the image shape as well.
basemodel = tf.keras.applications.MobileNetV2(include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
basemodel.trainable = False
basemodel.summary()

preprocess_layer = tf.keras.applications.mobilenet_v2.preprocess_input
# Note this preprocess layer wants input in range [0,255]. That's why we used rescaling layer above instead of
# manually dividing it. But here the preprocess_layer will do that for ya!

# First Augment the image and then apply preprocess_layer on it.

NUM_CLASSES = 1
input_layer = tf.keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
x = augmentation(input_layer)
x = preprocess_layer(x)
x = basemodel(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
out = tf.keras.layers.Dense(NUM_CLASSES)(x)

model = tf.keras.Model(inputs=input_layer, outputs=out)

print(model.summary())

model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              metrics=['accuracy'])

earlystop = tf.keras.callbacks.EarlyStopping(patience=15)
checkpoint = tf.keras.callbacks.ModelCheckpoint('best_classifier/', save_best_only=True)

history = model.fit(train, validation_data=val, epochs=50, callbacks=[earlystop, checkpoint])

pd.DataFrame(history.history).plot(figsize=(10, 10))
plt.grid()
plt.show()

# Compilation:

In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  optimizer=tf.keras.optimizers.Adam(),
                  metrics=['accuracy'])

In [None]:
model.compile(loss=tf.keras.losses.MeanSquaredError(),
                  optimizer=tf.keras.optimizers.Adam(),
                  metrics=['mse'])

# Callbacks:

In [None]:
earlystop = tf.keras.callbacks.EarlyStopping(patience=10)
checkpoint = tf.keras.callbacks.ModelCheckpoint('best_temp/', save_best_only=True)

In [None]:
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    '''
    Halts the training after reaching 60 percent accuracy

    Args:
      epoch (integer) - index of epoch (required but unused in the function definition below)
      logs (dict) - metric results from the training epoch
    '''

    # Check accuracy
    if(logs.get('loss') < 0.4): # or if(logs.get('accuracy') > 0.95):

      # Stop if threshold is met
      print("\nLoss is lower than 0.4 so cancelling training!")
      self.model.stop_training = True

# Instantiate class
callbacks = myCallback()

# Time Series:

In [None]:
dataset['date'] = pd.to_datetime(dataset['date'])
dataset.set_index('date', inplace=True)
dataset = dataset.sort_index(ascending=True)

# Time Format: 2004-03-10 18:00:00

In [None]:
# If flow speed and its direction(in degree) is given then
wv = dataset.pop('Speed (km/h)')
wd = dataset.pop('Direction (degrees)') * np.pi / 180   # Converting to radian

dataset['Wx'] = wv * np.cos(wd)
dataset['Wy'] = wv * np.sin(wd)

In [None]:
# If periodic data present

timestamp_s = dataset.index.map(pd.Timestamp.timestamp)
# Lets include day periodicity, this way model gets access to most important frequency features
day = 24*60*60
year = (365.2425)*day

df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))

# Reducing Memory Usage:

In [None]:
# Lets Reduce the Size of the dataset

def Reduce_Me(dataset):
    Initial = data.memory_usage().sum()/ 1024**2
    print("Initial Memory : {:.2f} MB".format(Initial))
    Columns = dataset.columns
    for column in Columns:
        Dtype = str(data[column].dtype)
        
        min_ = data[column].min()
        max_ = data[column].max()
            
        if 'int' in Dtype:
            if min_ > np.iinfo(np.int8).min and max_ < np.iinfo(np.int8).max:
                data[column] = data[column].astype(np.int8)
            elif min_ > np.iinfo(np.int16).min and max_ < np.iinfo(np.int16).max:
                data[column] = data[column].astype(np.int16)
            elif min_ > np.iinfo(np.int32).min and max_ < np.iinfo(np.int32).max:
                data[column] = data[column].astype(np.int32)
            elif min_ > np.iinfo(np.int64).min and max_ < np.iinfo(np.int64).max:
                data[column] = data[column].astype(np.int64)
        else:
            if min_ > np.finfo(np.float16).min and max_ < np.finfo(np.float16).max:
                data[column] = data[column].astype(np.float16)
            elif min_ > np.finfo(np.float32).min and max_ < np.finfo(np.float32).max:
                data[column] = data[column].astype(np.float32)
            elif min_ > np.finfo(np.float64).min and max_ < np.finfo(np.float64).max:
                data[column] = data[column].astype(np.float64)
    Final = data.memory_usage().sum()/1024**2
    print("Final Memory : {:.2f} MB".format(Final))
    print("Reduced By: {:.2f}%".format((Initial-Final)/Initial * 100))
    return dataset

# Label Encoder:

In [None]:
for column in dataset.select_dtypes('object').columns:
  encoder = LabelEncoder()
  dataset[column] = encoder.fit_transform(dataset[column])

In [None]:
# In presence of null values

for column in dataset.select_dtypes('object').columns:
  encoder = LabelEncoder()
  null_index = dataset.loc[dataset[column].isnull()].index
  dataset[column] = encoder.fit_transform(dataset[column])
  dataset.loc[null_index, column] = np.nan
  # Note Label encoder also changes np.nan so this will reset it back to np.nan

In [None]:
from sklearn.preprocessing import LabelEncoder

columns = dataset.select_dtypes('object').columns
data[columns] = dataset.select_dtypes('object').apply(LabelEncoder().fit_transform)  # One direct way of Encoding 
# Another Way : OneHotEncoder().fit_transform(df) or LabelEncoder().fit_transform(df)

# Null Values:

In [None]:
# Lets check the NULL Values first

null = pd.DataFrame(dataset.isnull().sum()).rename(columns={0:"Total"})
null['percentage'] = null['Total'] / len(dataset)
null.sort_values('percentage',ascending=False).head()

In [None]:
# Forward fill, for time series data, if this still has some null values then we will fill it with Median
dataset.fillna(method="ffill", inplace=True)

In [None]:
# Still there are some null values lets fill them with the mean value
meanvalue = dataset['variable'].mean() # or median()
dataset['variable'] = dataset['variable'].fillna(value=meanvalue)

# Plotting Functions

In [None]:
plt.figure(figsize=(10,10))
plt.plot(dataset['Day Sin'].to_numpy(), label='Day Sin')
plt.plot(dataset['Day Cos'].to_numpy(), label='Day Cos')
plt.legend()
plt.axis()

In [None]:
# For plotting if needed

plt.figure(figsize=(20, 20))
for idx, col in enumerate(dataset.columns):
    plt.subplot(len(dataset.columns), 1, idx+1)
    plt.plot(dataset[col], label=col)
    plt.legend()    
plt.subplots_adjust(hspace=0.3, wspace=0.5)

In [None]:
# Lets plot how everything looks like

# Lets do EDA at Day level, KDE Plot

plt.figure(figsize=(25, 25))
column = dataset.select_dtypes("float").columns
for idx, col in enumerate(column):
    ax = plt.subplot(8, 2, idx+1)
    sns.kdeplot(dataset[col], ax=ax)
    plt.xlabel(col, fontsize=12)
    plt.ylabel("Density")
    plt.grid()
    plt.axis()
plt.subplots_adjust(wspace=0.5, hspace=0.5)

In [None]:
# For imbalanced dataset

def plot_metrics(history):
  plt.figure(figsize=(20,20))
  metrics = ['loss', 'prc', 'precision', 'recall']
  for n, metric in enumerate(metrics):
    name = metric.replace("_"," ").capitalize()
    plt.subplot(2,2,n+1)
    plt.plot(history.epoch, history.history[metric], label='Train')
    plt.plot(history.epoch, history.history['val_'+metric],
             linestyle="--", label='Val')
    plt.xlabel('Epoch')
    plt.ylabel(name)
    if metric == 'loss':
      plt.ylim([0, plt.ylim()[1]])
    elif metric == 'auc':
      plt.ylim([0.8,1])
    else:
      plt.ylim([0,1])

    plt.legend();

# Data Split:

In [None]:
# Splitting dataset
# Lets split the dataset 

train = dataset[0 : int(0.7 * len(dataset))]
val = dataset[int(0.7 * len(dataset)):int(0.9 * len(dataset))]
test = dataset[int(0.9 * len(dataset)):]

In [None]:
# Lets first split the dataset
from sklearn.model_selection import train_test_split

X_train,X_test, Y_train, Y_test = train_test_split(dataset, target, test_size=0.2)

In [None]:
# Lets split to train and validation set
train_size = int(len(X_train) * 0.8)
index = tf.random.shuffle(tf.range(len(X_train)))

train, train_label = tf.gather(X_train, index[:train_size]), tf.gather(Y_train, index[:train_size])
val, val_label = tf.gather(X_train, index[train_size:]), tf.gather(Y_train, index[train_size:])

In [None]:
def train_val_split(data, target, train_size=0.8):
  train_len = int(len(data) * train_size)
  index = tf.random.shuffle(tf.range(len(data)))
  x_train = tf.gather(data, index[:train_len])
  y_train = tf.gather(target, index[:train_len])

  x_val = tf.gather(data, index[train_len:])
  y_val = tf.gather(target, index[train_len:])

  return (x_train, y_train), (x_val, y_val)

In [None]:
train, val = train_val_split(train, labels)

In [None]:
train_label = train['Target_label']
train = train.drop(['Target_label'], axis=1)

val_label = val['Target_label']
val = val.drop(['Target_label'], axis=1)

test_label = test['Target_label']
test = test.drop(['Target_label'], axis=1)

# Standardization:

In [None]:
mean = train.mean()
std = train.std() + 1e-12

train = (train - mean) / std
val = (val - mean) / std
test = (test - mean) / std

# Data Loaders:

In [None]:
def non_sequential_train_loader(data, labels, batchsize=32, buffersize=100):
  dataset = tf.data.Dataset.from_tensor_slices((data, labels))
  dataset = dataset.cache().shuffle(buffersize).batch(batchsize)
  dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
  return dataset

In [None]:
def non_sequential_val_loader(data, labels, batchsize=32):
  dataset = tf.data.Dataset.from_tensor_slices((data, labels))
  dataset = dataset.cache().batch(batchsize)
  dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
  return dataset

In [None]:
def non_sequential_test_loader(data, batchsize=32):
  dataset = tf.data.Dataset.from_tensor_slices(data)
  dataset = dataset.cache().batch(batchsize)
  dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
  return dataset

In [None]:
def create_sequential_train_loader(series, window_size=24, batchsize=32, buffersize=100):
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size+1, drop_remainder=True, shift=1)
    dataset = dataset.flat_map(lambda window: window.batch(window_size+1))
    dataset = dataset.map(lambda window: (window[:-1,:-1], window[-1,-1]), num_parallel_calls=AUTOTUNE)
    dataset = dataset.cache().shuffle(buffersize).batch(batchsize)
    return dataset.prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
def create_sequential_val_loader(series, window_size=24, batchsize=32):
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size+1, drop_remainder=True, shift=1)
    dataset = dataset.flat_map(lambda window: window.batch(window_size+1))
    dataset = dataset.map(lambda window: (window[:-1,:-1], window[-1,-1]), num_parallel_calls=AUTOTUNE)
    dataset = dataset.cache().batch(batchsize)
    return dataset.prefetch(tf.data.experimental.AUTOTUNE)

In [None]:
train_load = non_sequential_train_loader(train.to_numpy(), train_label.to_numpy())
val_load = non_sequential_val_loader(val.to_numpy(), val_label.to_numpy())
test_load = non_sequential_test_loader(test.to_numpy())

In [None]:
train = tf.concat([train.to_numpy(), train_label.to_numpy().reshape(-1,1)], axis=1)
val = tf.concat([val.to_numpy(), val_label.to_numpy().reshape(-1,1)], axis=1)

In [None]:
train = create_sequential_train_loader(train)
val = create_sequential_val_loader(val)

In [None]:
for X, Y in train.take(1):
    print(X.shape)
    print(Y.shape)

# Natural Language Processing:

In [None]:
def clean_string(dataframe, field, targets=None, is_train=True):

  dataframe[field] = dataframe[field].str.replace("[{}]".format(string.punctuation), " ")

  dataframe[field] = dataframe[field].str.lower()

  # Everything in one line
  lines = []
  target = []

  for idx, line in enumerate(dataframe[field].to_numpy()):
    
    if line != '':
      lines.append(line.strip())
      if targets is not None:
        target.append(targets[idx])

  if is_train:
    return lines, target

  return lines

In [None]:
def clean_string(dataframe, field):
  dataframe[field] = dataframe[field].str.replace("[{}]".format(string.punctuation), '')
  dataframe[field] = dataframe[field].str.lower()

  lines = []
  target = []

  for idx, text in enumerate(dataframe[field].to_numpy()):

    if text != '':
      lines.append(text.strip())
      target.append(list(dataset.iloc[idx, 'target_value'].to_numpy().astype(np.float32)))
  
  return lines, target

In [None]:
url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"   # EXample

dataset = tf.keras.utils.get_file(fname='aclImdb_v1',
                                  origin=url,
                                  cache_dir='.',
                                  untar=True,
                                  cache_subdir='')   # Will download and load it in the same directory

path = os.path.join(os.curdir, 'aclImdb')
BATCH = 32
SEED = 123

# If you need to remove some unwanted directories
shutil.rmtree(os.path.join(path, 'train', 'unsup'))


train = tf.keras.utils.text_dataset_from_directory(
    os.path.join(path, 'train'),
    subset='training',
    validation_split=0.2,
    seed=SEED,
    batch_size=BATCH
)
val = tf.keras.utils.text_dataset_from_directory(
    os.path.join(path, 'train'),
    subset='validation',
    validation_split=0.2,
    seed=SEED,
    batch_size=BATCH
)
test = tf.keras.utils.text_dataset_from_directory(
    os.path.join(path, 'test'),
    batch_size=BATCH
)
for text, label in train.take(1):
    print(text.shape)
    print(label.shape)
    # print(text)
    # print(label)


def clean_string(instance):
    instance = tf.strings.lower(instance)
    instance = tf.strings.regex_replace(instance, '<br />', '')
    instance = tf.strings.regex_replace(instance, '[{}]'.format(string.punctuation), '')
    instance = tf.strings.strip(instance)
    return instance



MAX_SEQUENCE = 250
VOCABSIZE = 10000

vectorizer = tf.keras.layers.TextVectorization(
    max_tokens=VOCABSIZE,
    standardize=clean_string,   # Note the vectorizer doesnt remove html tags
    output_mode='int',
    output_sequence_length=MAX_SEQUENCE
)
data = train.map(lambda text, label: text, num_parallel_calls=tf.data.AUTOTUNE)
vectorizer.adapt(data)


def vectorize(text, label):
    return vectorizer(text), label


train = train.map(vectorize, num_parallel_calls=tf.data.AUTOTUNE)
val = val.map(vectorize, num_parallel_calls=tf.data.AUTOTUNE)
test = test.map(vectorize, num_parallel_calls=tf.data.AUTOTUNE)

for text, label in train.take(1):
    print(text.shape)
    print(label.shape)
    # print(text)
    # print(label)

train = train.cache().shuffle(1000).prefetch(tf.data.AUTOTUNE)
val = val.cache().prefetch(tf.data.AUTOTUNE)
test = test.cache().prefetch(tf.data.AUTOTUNE)

In [None]:
labels = train.target.to_numpy()

train, labels = clean_string(train, "text", targets=labels)
test = clean_string(test, "text", is_train=False)

In [None]:
MAX_SEQUENCE = 250
VOCABSIZE = 10000

vectorizer = tf.keras.layers.TextVectorization(
    max_tokens=VOCABSIZE,
    output_mode='int',
    output_sequence_length=MAX_SEQUENCE
)

In [None]:
text = train.map(lambda text, label: text)          # Adapt only using training set.
vectorizer.adapt(text)

In [None]:
def vectorize_text(text, labels):
  return vectorizer(text), labels

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

train = train.map(vectorize_text, num_parallel_calls=AUTOTUNE)
val = val.map(vectorize_text, num_parallel_calls=AUTOTUNE)

In [None]:
for X, Y in train.take(1):
  print(X.shape)
  print(Y.shape)
  print(X)
  print(Y)

In [None]:
BATCH_SIZE = 64
train = train.cache().shuffle(1000).batch(BATCH_SIZE).prefetch(AUTOTUNE)
val = val.cache().batch(BATCH_SIZE).prefetch(AUTOTUNE)

# Redundancy Removal(Avoid this)

In [None]:
# LEts look for redundant variables
correlation = dataset.corr()
upper = correlation.where(np.triu(np.ones(correlation.shape), k=1).astype(bool))
redundant = [col for col in upper if np.any(np.abs(upper[col]) >= 0.95)]
redundant

In [None]:
sns.heatmap(correlation.loc[correlation['variable'].abs() >= 0.95, correlation['variable'].abs() >= 0.95],
            annot=True, cmap=plt.cm.autumn_r, fmt='0.3f')

# Dealing with Imbalanced dataset: 

In [None]:
METRICS = [
           tf.keras.metrics.TruePositives(name='tp'),
           tf.keras.metrics.FalsePositives(name='fp'),
           tf.keras.metrics.TrueNegatives(name='tn'),
           tf.keras.metrics.FalseNegatives(name='fn'),
           tf.keras.metrics.Precision(name='precision'),
           tf.keras.metrics.Recall(name='recall'),
           tf.keras.metrics.AUC(name='auc'),
           tf.keras.metrics.BinaryAccuracy(name='accuracy'),
           tf.keras.metrics.AUC(name='prc', curve='PR')
]

In [None]:
zeros, ones = np.bincount(dataset.target_variable.to_numpy())    # Here Zeros count > ones. Verify to see if all good.
# You can verify using value_counts()
total = zeros + ones
bias = np.log(ones / zeros)

w0 = (1 / zeros) * (total / 2.0)
w1 = (1 / ones) * (total / 2.0)

class_weights = {0 : w0, 1: w1}
class_weights

In [None]:
def create_model(output_bias=None):
  if output_bias is not None:
    output_bias = tf.keras.initializers.Constant(output_bias)

  model = tf.keras.models.Sequential([
                                      tf.keras.layers.Flatten(input_shape=[24]),
                                      tf.keras.layers.Dense(100, activation='elu', kernel_initializer='he_normal'),
                                      tf.keras.layers.Dense(1, activation='sigmoid', bias_initializer=output_bias)
  ])
  return model

In [None]:
model = create_model(bias)                  

In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=METRICS)

In [None]:
history = model.fit(train_load, validation_data=val_load, epochs=100, class_weight=class_weights, callbacks=[earlystop, checkpoint])

# Forecasting:

In [None]:
def forecast(model, series, batch_size=32):
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.cache().batch(batch_size).prefetch(AUTOTUNE)
    return model.predict(dataset)

In [None]:
# Lets try non sequential loader
def forecasting(model,series, window_size=24, batchsize=32):
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size, drop_remainder=True, shift=1)
    dataset = dataset.flat_map(lambda window: window.batch(window_size))
    dataset = dataset.cache().batch(batchsize).prefetch(AUTOTUNE)
    return model.predict(dataset)

In [None]:
window_size = 60
prediction = forecasting(model, test_data.to_numpy(), window_size=window_size)
prediction = np.where(prediction > 0.5, 1, 0)
prediction.shape

In [None]:
target = test_label.to_numpy()[window_size-1:]   # Because all the previous values will be dropped!
target.shape

In [None]:
tf.keras.metrics.Accuracy()(target, tf.squeeze(prediction, axis=-1))

In [None]:
print(f'Sequential Model Loss: {tf.keras.metrics.mean_squared_error(tf.squeeze(prediction, axis=-1), test_label.to_numpy())}')
