In [None]:
#Connect to Google Drive
from google.colab import drive
import os

#drive.mount('/content/gdrive')
drive.mount('/content/gdrive', force_remount=True)

print(os.getcwd())
os.chdir('gdrive/MyDrive/Deep_Learning_Project')
print(os.getcwd())

In [None]:
#Required Modules
import numpy as np
import pandas as pd
import tensorflow as tf
import datetime
import os
import sklearn.preprocessing as skp
import collections
import time
import math
import matplotlib.pyplot as plt
import json

from tensorflow import keras
from tensorflow.keras import layers, Sequential, Model
from tensorflow.keras.layers import Input, Dense, Activation, Conv2D, Conv3D, MaxPool2D, MaxPool3D, AveragePooling2D, AveragePooling3D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.layers import LocallyConnected2D, Concatenate, Reshape, ConvLSTM2D, LSTM, RNN, Bidirectional, TimeDistributed, LeakyReLU
from tensorflow.keras.optimizers.legacy import SGD, RMSprop, Adam

from tensorflow.keras.metrics import BinaryAccuracy, Precision, Recall
from tensorflow.keras.losses import BinaryCrossentropy

In [None]:
#Get Sense of Label Imbalance to Correct for
label_weights = {0:0, 1:0}
for files in sorted(os.listdir('NOAA_Event_Labels')):
  a = np.load('NOAA_Event_Labels/'+files, mmap_mode='r')
  label_weights[1] += np.sum(a)
  label_weights[0] += a.shape[0] * 400 - label_weights[1]
print(label_weights)

numerator = sum(label_weights.values())
label_weights[0] = numerator/(2*label_weights[0])
label_weights[1] = numerator/(2*label_weights[1])
print(label_weights)

In [None]:
#Create Lists for Validation and Testing
total_files = list(zip(sorted(os.listdir('ERA_Numpy_Files')), sorted(os.listdir('NOAA_Event_Labels'))))
train_files = total_files[:96]
val_files = total_files[96:120]
test_files = total_files[120:]
#Final_Training
true_train = total_files[:120]

#Quickly Geerate the Test-Labels
test_labelset = []
label_weights = {0:0, 1:0}
for files in test_files:
  a = np.load('NOAA_Event_Labels/'+files[1], mmap_mode='r')
  test_labelset.append(a)
test_labelset = np.concatenate(test_labelset, axis = 0)
print(test_labelset.shape)

In [None]:
#Creating Generator Functions
def generate_batchs(files, batch_size):
  counter = 0
  while True:
    fname = files[counter]
    counter = (counter + 1) % len(files)
    x = np.load('ERA_Numpy_Files/'+fname[0], mmap_mode='r')
    y = np.load('NOAA_Event_Labels/'+fname[1], mmap_mode='r')
    for local_index in range(0, x.shape[0], batch_size):
      input_local = x[local_index:(local_index+batch_size)]
      output_local = y[local_index:(local_index+batch_size)]
      yield input_local, output_local

def test_batchs(files, batch_size):
  counter = 0
  while True:
    fname = files[counter]
    counter += 1
    x = np.load('ERA_Numpy_Files/'+fname[0], mmap_mode='r')
    y = np.load('NOAA_Event_Labels/'+fname[1], mmap_mode='r')
    for local_index in range(0, x.shape[0], batch_size):
      input_local = x[local_index:(local_index+batch_size)]
      output_local = y[local_index:(local_index+batch_size)]
      yield input_local

In [None]:
#Create Datasets From Loaders
batch_size = 128

train_dataset = tf.data.Dataset.from_generator(
    generator=lambda: generate_batchs(files=train_files, batch_size=batch_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 5, 20, 20, 148], [None, 20, 20, 1])
)

valid_dataset = tf.data.Dataset.from_generator(
    generator=lambda: generate_batchs(files=val_files, batch_size=batch_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 5, 20, 20, 148], [None, 20, 20, 1])
)

test_dataset = tf.data.Dataset.from_generator(
    generator=lambda: test_batchs(files=test_files, batch_size=batch_size),
    output_types=tf.float32,
    output_shapes=[None, 5, 20, 20, 148]
)

true_train_ds = tf.data.Dataset.from_generator(
    generator=lambda: generate_batchs(files=true_train, batch_size=batch_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 5, 20, 20, 148], [None, 20, 20, 1])
)

test_valid = tf.data.Dataset.from_generator(
    generator=lambda: generate_batchs(files=test_files, batch_size=batch_size),
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 5, 20, 20, 148], [None, 20, 20, 1])
)

In [None]:
#Method 2 Model: Single Convolution and Feed Forward

model_2 = tf.keras.models.Sequential()
model_2.add(tf.keras.layers.Conv3D(filters=32, kernel_size=(3, 3, 3), input_shape=(5, 20, 20, 148), padding='same', activation='relu'))
model_2.add(tf.keras.layers.MaxPooling3D(pool_size=(2, 2, 2)))
model_2.add(tf.keras.layers.Flatten())
model_2.add(tf.keras.layers.Dense(128, activation='relu'))
model_2.add(tf.keras.layers.Dropout(0.5))

# Output layer with units equal to the number of pixels
model_2.add(tf.keras.layers.Dense(20 * 20 * 1, activation='sigmoid'))

# Reshape the output to match the label shape
model_2.add(tf.keras.layers.Reshape((20, 20, 1)))

# Print model summary
model_2.summary()

In [None]:
#Validation Step for Fine Tuning
#Note: Due to Failing Session on Collab, process was required done in parts with resaving

model_2.compile(optimizer = tf.keras.optimizers.Adam(), loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = [tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
history = model_2.fit(train_dataset, epochs=20, validation_data=valid_dataset, class_weight={0:1, 1:5000},
          validation_steps = len(val_files)*6, steps_per_epoch = len(train_files)*6, verbose = 1, max_queue_size = 32)

model.save('Models_and_Histories/aratry_m1.h5')

In [None]:
#Final Results of Validation:
#Weights: {0:1, 1:5000}
#Epochs: 6

#Recreate Model 2
model_2 = tf.keras.models.Sequential()
model_2.add(tf.keras.layers.Conv3D(filters=32, kernel_size=(3, 3, 3), input_shape=(5, 20, 20, 148), padding='same', activation='relu'))
model_2.add(tf.keras.layers.MaxPooling3D(pool_size=(2, 2, 2)))
model_2.add(tf.keras.layers.Flatten())
model_2.add(tf.keras.layers.Dense(128, activation='relu'))
model_2.add(tf.keras.layers.Dropout(0.5))

# Output layer with units equal to the number of pixels
model_2.add(tf.keras.layers.Dense(20 * 20 * 1, activation='sigmoid'))

# Reshape the output to match the label shape
model_2.add(tf.keras.layers.Reshape((20, 20, 1)))

# Print model summary
model_2.summary()

In [None]:
#Retrain Model 2 under all but training data
model_2.compile(optimizer = tf.keras.optimizers.Adam(), loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = [tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
history = model_2.fit(true_train_ds, epochs=6, validation_data=test_valid, class_weight={0:1, 1:5000},
          validation_steps = (len(test_files) + 1)*6, steps_per_epoch = len(true_train)*6, verbose = 1, max_queue_size = 32)

#Save model
model.save('Models_and_Histories/aratry_m1_final.h5')

In [None]:
#Model 3:
myDO = 0.3
model_3 = tf.keras.models.Sequential()
model_3.add(tf.keras.layers.ConvLSTM2D(filters = 64, kernel_size = (4,4), input_shape = (5, 20, 20, 148), padding = 'same', return_sequences = True,
                                       activation = 'relu', dropout = myDO, recurrent_dropout = myDO, recurrent_regularizer='l1'))
model_3.add(BatchNormalization())
model_3.add(tf.keras.layers.ConvLSTM2D(filters = 64, kernel_size = (5,5), padding = 'same', return_sequences = True,
                                       activation = 'relu', dropout = myDO, recurrent_dropout = myDO, recurrent_regularizer='l1'))
model_3.add(BatchNormalization())
model_3.add(tf.keras.layers.ConvLSTM2D(filters = 64, kernel_size = (3,3), padding = 'same', return_sequences = True,
                                       activation = 'relu', dropout = myDO, recurrent_dropout = myDO, recurrent_regularizer='l1'))
model_3.add(BatchNormalization())
model_3.add(tf.keras.layers.ConvLSTM2D(filters = 32, kernel_size = (1,1), padding = 'same', return_sequences = False,
                                       activation = 'relu', dropout = myDO, recurrent_dropout = myDO, recurrent_regularizer='l1'))
model_3.add(Conv2D(filters = 1, kernel_size = (1,1), activation = 'sigmoid', padding = 'same'))

model_3.summary()

In [None]:
#Validate + Finetune
model_3.compile(optimizer = tf.keras.optimizers.Adam(clipnorm = 1), loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = [tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

history_3 = model_3.fit(steps_per_epoch = len(train_files)*6, x = train_dataset, verbose = 1, max_queue_size = 32, epochs = 20,
          validation_data = valid_dataset, validation_steps = len(val_files)*6, class_weight = {0:1, 1: 5000}, shuffle = False)
model_3.save('Models_and_Histories/model_2.h5')

In [None]:
#Validation Fine Tuning Results:
#Loss Weights: {0:1, 1:5000}
#Epochs: 8

#Recreate Model 3
myDO = 0.3
model_3 = tf.keras.models.Sequential()
model_3.add(tf.keras.layers.ConvLSTM2D(filters = 64, kernel_size = (4,4), input_shape = (5, 20, 20, 148), padding = 'same', return_sequences = True,
                                       activation = 'relu', dropout = myDO, recurrent_dropout = myDO, recurrent_regularizer='l1'))
model_3.add(BatchNormalization())
model_3.add(tf.keras.layers.ConvLSTM2D(filters = 64, kernel_size = (5,5), padding = 'same', return_sequences = True,
                                       activation = 'relu', dropout = myDO, recurrent_dropout = myDO, recurrent_regularizer='l1'))
model_3.add(BatchNormalization())
model_3.add(tf.keras.layers.ConvLSTM2D(filters = 64, kernel_size = (3,3), padding = 'same', return_sequences = True,
                                       activation = 'relu', dropout = myDO, recurrent_dropout = myDO, recurrent_regularizer='l1'))
model_3.add(BatchNormalization())
model_3.add(tf.keras.layers.ConvLSTM2D(filters = 32, kernel_size = (1,1), padding = 'same', return_sequences = False,
                                       activation = 'relu', dropout = myDO, recurrent_dropout = myDO, recurrent_regularizer='l1'))
model_3.add(Conv2D(filters = 1, kernel_size = (1,1), activation = 'sigmoid', padding = 'same'))

model_3.summary()

In [None]:
#Retrain
model_3.compile(optimizer = Adam(clipnorm = 1), loss = tf.keras.losses.BinaryCrossentropy(),
              metrics = [tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
history_3 = model_3.fit(true_train_ds, epochs=8, validation_data=test_valid, class_weight={0:1, 1:5000},
          validation_steps = (len(test_files) + 1)*6, steps_per_epoch = len(true_train)*6, verbose = 1, max_queue_size = 32)

model_3.save('Models_and_Histories/model_2_final.h5')