In [1]:
# Tensorflow / Keras
from tensorflow import keras # for building Neural Networks
print('Tensorflow/Keras: %s' % keras.__version__) # print version
from keras.models import Model, load_model # for creating a Neural Network Autoencoder model
from keras import Input # for instantiating a keras tensor
from keras.layers import Dense, LeakyReLU, BatchNormalization # for adding layers to AE model
from tensorflow.keras.utils import plot_model # for plotting model diagram

# Data manipulation
import pandas as pd # for data manipulation
print('pandas: %s' % pd.__version__) # print version

# Sklearn
import sklearn # for model evaluation
print('sklearn: %s' % sklearn.__version__) # print version
from sklearn.preprocessing import MinMaxScaler # For rescaling metrics to fit into 0 to 1 range
from sklearn.model_selection import train_test_split # for splitting the data into train and test samples

# Visualization
import matplotlib 
import matplotlib.pyplot as plt # for plotting model loss
print('matplotlib: %s' % matplotlib.__version__) # print version
import graphviz # for showing model diagram
print('graphviz: %s' % graphviz.__version__) # print version

# Other utilities
import sys
import os

# Assign main directory to a variable
main_dir=os.path.dirname(sys.path[0])

In [4]:

# Set Pandas options to display more columns
pd.options.display.max_columns=50

# Read in the weather data csv
df=pd.read_csv('../input/weather-dataset-rattle-package/weatherAUS.csv', encoding='utf-8')

# Drop rows where any of the values are missing. 
# Note, in this case it drops ~60% of the rows. Since we are using this data just as an example, it's ok.
# However, when you work with your own data, you may want to explore other options to fill in NA's with, say, mean values
df=df.dropna(axis=0)

# Create a flag for RainToday
df['RainTodayFlag']=df['RainToday'].apply(lambda x: 1 if x=='Yes' else 0)

# Show a snaphsot of data
df


In [5]:
# Select data for modeling
X=df[['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine', 'WindGustSpeed', 
      'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am', 'Humidity3pm', 'Pressure9am',  
      'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am', 'Temp3pm', 'RainTodayFlag']]

# Scale training data so it is between 0 and 1
scaler = MinMaxScaler()
X_scaled=scaler.fit_transform(X)

# Create training and testing samples
X_train, X_test = train_test_split(X_scaled, test_size=0.2, random_state=0)


In [11]:
(round(float(X_train.shape[1]) / 2.0))

# Building and training an Autoencoder model

In [13]:
#--- Define Shapes
n_inputs=X_train.shape[1] # number of input neurons = the number of features in X_train
n_bottleneck=(round(float(n_inputs) / 2.0)) # bottleneck to have half the number of neurons 

#--- Input Layer 
visible = Input(shape=(n_inputs,), name='Input-Layer') # Specify input shape

#--- Encoder Layer
e = Dense(units=n_inputs, name='Encoder-Layer')(visible)
e = BatchNormalization(name='Encoder-Layer-Normalization')(e)
e = LeakyReLU(name='Encoder-Layer-Activation')(e)

#--- Bottleneck
bottleneck = Dense(units=n_bottleneck, name='Bottleneck-Layer')(e)

#--- Decoder Layer
d = Dense(units=n_inputs, name='Decoder-Layer')(bottleneck)
d = BatchNormalization(name='Decoder-Layer-Normalization')(d)
d = LeakyReLU(name='Decoder-Layer-Activation')(d)

#--- Output layer
output = Dense(units=n_inputs, activation='linear', name='Output-Layer')(d)

# Define autoencoder model
model = Model(inputs=visible, outputs=output, name='Autoencoder-Model')

# Compile autoencoder model
model.compile(optimizer='adam', loss='mse')

# Print model summary
print(model.summary())

# Plot the autoencoder model diagram
plot_model(model, to_file='./Autoencoder.png', show_shapes=True, dpi=300)


In [14]:

# Fit the autoencoder model to reconstruct input
history = model.fit(X_train, X_train, epochs=10, batch_size=16, verbose=1, validation_data=(X_test, X_test))

# Plot a loss chart
fig, ax = plt.subplots(figsize=(16,9), dpi=300)
plt.title(label='Model Loss by Epoch', loc='center')

ax.plot(history.history['loss'], label='Training Data', color='black')
ax.plot(history.history['val_loss'], label='Test Data', color='red')
ax.set(xlabel='Epoch', ylabel='Loss')
plt.legend()

plt.show()

In [17]:

# Define an encoder model without the decoder
encoder = Model(inputs=visible, outputs=bottleneck)

# Compile encoder model
encoder.compile(optimizer='adam', loss='mse')

# Save the encoder model to file
encoder.save('./encoder.h5')

# Plot the autoencoder model diagram
plot_model(encoder, to_file='./Encoder_only.png', show_shapes=True, dpi=300)

In [18]:
# Load the model from file
encoder = load_model('../input/autoencoder-ausweather/encoder.h5')

# Encode train and test data
X_train_encoded = encoder.predict(X_train)
X_test_encoded = encoder.predict(X_test)

In [20]:
X_train_encoded.shape

In [21]:
X_test_encoded.shape