In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np


airports = ['VABB', 'VOTV', 'VOBL', 'VECC', 'VIDP']

def get_windspeed(code):
    arr = np.load("WindSpeed_ArrayFinal.npy")
    idx = airports.index(code)
    return arr[:, idx].astype(np.float32)

def get_era_full(param, level):
    arr = np.load("18To20{}{}_uint8.npy".format(param, level))
    return arr/256

# Import data
params = ["z", "z", "z"]
levels = [500, 700, 1000]

in1_var = get_era_full(params[0], levels[0])
in2_var = get_era_full(params[1], levels[1])
in3_var = get_era_full(params[2], levels[2])

# data for a given airport
X = np.concatenate((np.expand_dims(in1_var, axis=3), np.expand_dims(in2_var, axis=3), np.expand_dims(in3_var, axis=3)), axis=3)
Y = get_windspeed('VABB').reshape(-1, 1)

import sklearn
from sklearn.model_selection import train_test_split

# In the first step we will split the data in training and remaining dataset
X_train, X_rem, Y_train, Y_rem = train_test_split(X,Y, train_size=0.7)

# Now since we want the valid and test size to be equal. 
X_valid, X_test, Y_valid, Y_test = train_test_split(X_rem,Y_rem, test_size=0.5)

#to clear space for gpu, if occupied by any process
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.compat.v1.Session(config=config)

conv_base = tf.keras.applications.ResNet50(
    include_top=False,
    weights="imagenet",
    input_shape=(157, 157, 3))

# preprocess_input

train_features = tf.keras.applications.resnet.preprocess_input(X_train)
val_features = tf.keras.applications.resnet.preprocess_input(X_valid)
test_features = tf.keras.applications.resnet.preprocess_input(X_test)

train_features = conv_base.predict(train_features)
val_features = conv_base.predict(val_features)
test_features = conv_base.predict(test_features)

train_labels = Y_train
val_labels = Y_valid
test_labels = Y_test

# simple baseline
train_labels_mean = np.mean(train_labels)
simple_baseline_mae = np.mean(np.abs(test_labels - train_labels_mean))
print(simple_baseline_mae)

# model building
inputs = keras.Input(shape = (5, 5, 2048))
x = layers.GlobalAveragePooling2D()(inputs)
x = layers.BatchNormalization()(x)
top_dropout_rate = 0.4
x = layers.Dropout(top_dropout_rate)(x)
outputs = layers.Dense(1)(x)
model = keras.Model(inputs, outputs, name="ResNet50")

# compiling the model
opt =tf.keras.optimizers.RMSprop(learning_rate=0.002)
model.compile(loss = 'mse', optimizer = opt, metrics = [tf.keras.losses.MeanAbsoluteError()])
callbacks = [keras.callbacks.ModelCheckpoint(filepath = f"wVABB_resnet.keras",
                                            save_best_only = True, monitor = "val_loss")]

history = model.fit(train_features, train_labels, batch_size = 64, epochs = 100, validation_data = (val_features, val_labels), verbose = 0, callbacks = callbacks)

import matplotlib.pyplot as plt
%matplotlib inline
f1 = plt.figure();
mae = history.history["mean_absolute_error"]
loss = history.history["loss"]
val_mae = history.history["val_mean_absolute_error"]
val_loss = history.history["val_loss"]
epochs = range(1, len(loss) + 1)

plt.plot(epochs, loss, "bo", label = "Training loss(MSE)");
plt.plot(epochs, val_loss, "b", label = "Validation loss(MSE)");
plt.title("Training and validation loss(MSE)");
plt.xlabel("epochs")
plt.ylabel("MSE")
plt.legend();

f2 = plt.figure();
plt.plot(epochs, mae, "bo", label = "Training accuracy(MAE)");
plt.plot(epochs, val_mae, "b", label = "Validation accuracy(MAE)");
plt.title("Training and validation accuracy(MAE)");
plt.legend();
plt.xlabel("epochs")
plt.ylabel("MAE")
plt.show();

test_loss, test_mae = model.evaluate(test_features, test_labels)
print(f"Test MAE: {test_mae:.3f}")
f1.savefig(f"wVABB_resnet.jpg", bbox_inches='tight', dpi=600);
f2.savefig(f"wVABB_resnet.jpg", bbox_inches='tight', dpi=600);

pred = model.predict(test_features)
error = pred - test_labels

import pandas as pd
print(error.shape)
error = np.reshape(error, (3942,))
print(error.shape)
x = pd.Series(error)
print(x.plot.hist(bins=35, figsize=[12, 7]))
#y = pd.Series(percent_error)
#print(y.plot.hist(bins=35, figsize=[12, 7]))
plt.xlabel("prediction - ground truth")
plt.show();

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

airports = ['VABB', 'VOTV', 'VOBL', 'VECC', 'VIDP']

def get_windspeed(code):
    arr = np.load("windspeed21_22FINAL.npy")
    idx = airports.index(code)
    return arr[:, idx].astype(np.float32)

def get_era_full(param, level):
    arr = np.load("21To22{}{}_uint8final.npy".format(param, level))
    return arr/256

# Import data
params = ["z", "z", "z"]
levels = [500, 700, 1000]

in1_var = get_era_full(params[0], levels[0])
in2_var = get_era_full(params[1], levels[1])
in3_var = get_era_full(params[2], levels[2])

# data for a given airport
X = np.concatenate((np.expand_dims(in1_var, axis=3), np.expand_dims(in2_var, axis=3), np.expand_dims(in3_var, axis=3)), axis=3)
Y = get_windspeed('VABB').reshape(-1, 1)

X_test = X
Y_test = Y

#to clear space for gpu, if occupied by any process
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.compat.v1.Session(config=config)

conv_base = tf.keras.applications.ResNet50(
    include_top=False,
    weights="imagenet",
    input_shape=(157, 157, 3))

# preprocess_input
test_features = tf.keras.applications.resnet.preprocess_input(X_test)
test_features = conv_base.predict(test_features)
test_labels = Y_test

model = keras.models.load_model('wVABB_resnet.keras', compile = False)
pred = model.predict(test_features)
error = pred - test_labels

remove_idx = []
for i, j in enumerate(list(test_labels)):
    if j == 0.0:
        remove_idx.append(i)
        
test_labels2 = np.reshape(test_labels, (8736,))
pred2 = np.reshape(pred, (8736,))
test_labels2 = np.delete(test_labels2, remove_idx)
pred2 = np.delete(pred2, remove_idx)
error0 = pred2 - test_labels2
percent_error = (error0/test_labels2)*100

pearson_matrix = np.corrcoef(test_labels2, pred2)
print(pearson_matrix)

import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
#plt.plot(range(24*90), test_labels[:24*90], label = "observation")
#plt.plot(range(24*90), pred[:24*90], label = "prediction")
plt.plot(range(24*90), test_labels[24*90:24*2*90], label = "observation")
plt.plot(range(24*90), pred[24*90:24*2*90], label = "prediction")
#plt.plot(range(24*90), test_labels[24*2*90:24*3*90], label = "observation")
#plt.plot(range(24*90), pred[24*2*90:24*3*90], label = "prediction")
#plt.plot(range(24*90), test_labels[24*3*90:24*4*90], label = "observation")
#plt.plot(range(24*90), pred[24*3*90:24*4*90], label = "prediction")
plt.title("observation vs prediction for the monsoon season")
plt.legend()
plt.xlabel("Hour since first june 00:00 in UTC")
plt.ylabel("windspeed (miles/hr)")
plt.show();

import pandas as pd
print(error.shape)
error = np.reshape(error, (8736,))
print(error.shape)
x = pd.Series(error)
print(x.plot.hist(bins=35, figsize=[12, 7]))
#y = pd.Series(percent_error)
#print(y.plot.hist(bins=35, figsize=[12, 7]))
plt.xlabel("prediction - ground truth")
plt.show();

import pandas as pd
print(percent_error.shape)
#error = np.reshape(percent_error, (8736-1205,))
#print(error.shape)
y = pd.Series(percent_error)
print(y.plot.hist(bins=35, figsize=[12, 7]))
plt.xlabel("percentage error ((error/observation)*100)")
plt.show();

#plot for summer
import pandas as pd
summer_err = error[:2184]
monsoon_err = error[2184:2*2184]
autumn_err = error[2*2184:3*2184]
winter_err = error[3*2184:]
error1 = np.reshape(summer_err, (2184,))
print(error1.shape)
x = pd.Series(error1)
print(x.plot.hist(bins=35, figsize=[12, 7]))
#y = pd.Series(percent_error)
#print(y.plot.hist(bins=35, figsize=[12, 7]))
plt.xlabel("prediction - observation")
plt.show();

#plot for monsoon
error2 = np.reshape(monsoon_err, (2184,))
print(error2.shape)
x = pd.Series(error2)
print(x.plot.hist(bins=35, figsize=[12, 7]))
#y = pd.Series(percent_error)
#print(y.plot.hist(bins=35, figsize=[12, 7]))
plt.xlabel("prediction - observation")
plt.show();

#plot for autumn
error3 = np.reshape(autumn_err, (2184,))
print(error3.shape)
x = pd.Series(error3)
print(x.plot.hist(bins=35, figsize=[12, 7]))
#y = pd.Series(percent_error)
#print(y.plot.hist(bins=35, figsize=[12, 7]))
plt.xlabel("prediction - observation")
plt.show();

#plot for winter
error4 = np.reshape(winter_err, (2184,))
print(error4.shape)
x = pd.Series(error4)
print(x.plot.hist(bins=35, figsize=[12, 7]))
#y = pd.Series(percent_error)
#print(y.plot.hist(bins=35, figsize=[12, 7]))
plt.xlabel("prediction - observation")
plt.show();

ResourceExhaustedError:  OOM when allocating tensor with shape[2048,81,81] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node resnet50/pool1_pad/Pad (defined at <ipython-input-3-8a6961ecdf90>:66) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_predict_function_20736]

Function call stack:
predict_function


In [4]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np


airports = ['VABB', 'VOTV', 'VOBL', 'VECC', 'VIDP']

def get_windspeed(code):
    arr = np.load("WindSpeed_ArrayFinal.npy")
    idx = airports.index(code)
    return arr[:, idx].astype(np.float32)

def get_era_full(param, level):
    arr = np.load("18To20{}{}_uint8.npy".format(param, level))
    return arr/256

# Import data
params = ["z", "z", "z"]
levels = [500, 700, 1000]

in1_var = get_era_full(params[0], levels[0])
in2_var = get_era_full(params[1], levels[1])
in3_var = get_era_full(params[2], levels[2])

# data for a given airport
X = np.concatenate((np.expand_dims(in1_var, axis=3), np.expand_dims(in2_var, axis=3), np.expand_dims(in3_var, axis=3)), axis=3)
Y = get_windspeed('VABB').reshape(-1, 1)

import sklearn
from sklearn.model_selection import train_test_split

# In the first step we will split the data in training and remaining dataset
X_train, X_rem, Y_train, Y_rem = train_test_split(X,Y, train_size=0.7)

# Now since we want the valid and test size to be equal. 
X_valid, X_test, Y_valid, Y_test = train_test_split(X_rem,Y_rem, test_size=0.5)

#to clear space for gpu, if occupied by any process
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.compat.v1.Session(config=config)

conv_base = tf.keras.applications.ResNet50(
    include_top=False,
    weights="imagenet",
    input_shape=(157, 157, 3))

# preprocess_input

train_features = tf.keras.applications.resnet.preprocess_input(X_train)
val_features = tf.keras.applications.resnet.preprocess_input(X_valid)
test_features = tf.keras.applications.resnet.preprocess_input(X_test)

In [8]:
train_features = conv_base.predict(train_features)
val_features = conv_base.predict(val_features)
test_features = conv_base.predict(test_features)

ResourceExhaustedError:  OOM when allocating tensor with shape[2048,81,81] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node resnet50/pool1_pad/Pad (defined at <ipython-input-8-45aa0d35849b>:1) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_predict_function_27991]

Function call stack:
predict_function
