In [1]:
'''
### Import Relevant Libraries
'''
import numpy as np # Version 1.16.0
import tensorflow as tf # Version 1.12.0
import pandas as pd
import keras # Version 2.2.4
from sklearn.metrics import r2_score
import innvestigate as inn # Version 1.0.8
print("tensorflow version:", tf.__version__)
print("keras version:", keras.__version__)

tensorflow version: 2.13.0
keras version: 2.13.1


In [10]:
'''
### Read and Process Data
'''
# Load in raw count data for neural network
X = pd.read_csv("Data/X.csv", index_col=0)
# Load in ground truth methane production rate data
y = pd.read_csv("Data/Y.csv", index_col=0)
# Set parameters
num_samples = X.shape[0]
num_folds = 149 # Leave-one-out = 149
num_features = 50 # All features = 489
# Create linearly spaced chunks for cross validation
chunks = np.ceil(np.linspace(0,num_samples, num=num_folds+1)).astype(int)

In [11]:
'''
### Define and Compile Model
'''
def build_model(input_shape):
    # Define model
    model = keras.models.Sequential([
            keras.layers.Conv1D(filters=128, kernel_size=1, activation='relu', input_shape=input_shape),
            keras.layers.Conv1D(filters=128, kernel_size=1, activation='relu'),
            keras.layers.Conv1D(filters=128, kernel_size=1, activation='relu'),
            keras.layers.Dropout(0.1),
            keras.layers.Conv1D(filters=128, kernel_size=1, activation='relu'),
            keras.layers.Conv1D(filters=128, kernel_size=1, activation='relu'),
            keras.layers.Conv1D(filters=128, kernel_size=1, activation='relu'),
            keras.layers.Dropout(0.1),
            keras.layers.Conv1D(filters=64, kernel_size=1, activation='relu'),
            keras.layers.Conv1D(filters=64, kernel_size=1, activation='relu'),
            keras.layers.Conv1D(filters=64, kernel_size=1, activation='relu'),
            keras.layers.Dropout(0.1),
            keras.layers.Conv1D(filters=64, kernel_size=1, activation='relu'),
            keras.layers.Conv1D(filters=64, kernel_size=1, activation='relu'),
            keras.layers.Conv1D(filters=64, kernel_size=1, activation='relu'),
            keras.layers.Dropout(0.1),
            keras.layers.Conv1D(filters=32, kernel_size=1, activation='relu'),
            keras.layers.Conv1D(filters=32, kernel_size=1, activation='relu'),
            keras.layers.Conv1D(filters=32, kernel_size=1, activation='relu'),
            keras.layers.Dropout(0.1),
            keras.layers.Conv1D(filters=32, kernel_size=1, activation='relu'),
            keras.layers.Conv1D(filters=32, kernel_size=1, activation='relu'),
            keras.layers.Conv1D(filters=32, kernel_size=1, activation='relu'),
            keras.layers.Flatten(),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dense(1)
    ])

    # Compile Model
    model.compile(loss='mse', optimizer=keras.optimizers.Adam(lr=0.001))
    
    return model

In [None]:
'''
### Run Neural Network Cross Validation
'''
lrp_cache = pd.DataFrame()
ann_predictions = np.array([])
history_cache = []
for fold in range(num_folds):

    # Reset keras session to reduce model clutter
    tf.keras.backend.clear_session()

    # Select validation samples
    X_val = X[chunks[fold]:chunks[fold+1]]
    y_val = y[chunks[fold]:chunks[fold+1]]
    # Select training samples 
    X_train = X.drop(X_val.index)
    y_train = y.drop(X_val.index).values.flatten()
    
    # Feature selection using Layerwise Relevance Propegation (LRP)
    # Build and train model for LRP
    model = build_model((X_train.shape[1],1))
    model.fit(np.expand_dims(X_train.values, axis=2), y_train, batch_size=32, epochs=150, verbose=0)
    # Sort features by LRP Relevance Score
    analyzer = inn.create_analyzer("lrp.z_plus_fast", model)
    # Perform backwards pass through trained neural network to generate relevance scores
    scores = analyzer.analyze(np.expand_dims(X_train.values, axis=2))[...,0]
    # Store data in Dataframe
    lrp = pd.DataFrame(scores.mean(axis=0), index=X_train.columns, columns=["Score"])
    # Sort scores by absolute value
    lrp["Abs Score"] = np.abs(lrp["Score"])
    lrp_cache[fold] = lrp["Abs Score"]
    lrp.sort_values(by="Abs Score", ascending=False, inplace=True)

    # Select most important features
    X_train = X_train[lrp.index[:num_features]]
    X_val = X_val[lrp.index[:num_features]]

    # Reshape data for nerual network
    X_train = np.asarray(X_train).reshape((X_train.shape[0],X_train.shape[1],1))
    X_val = np.asarray(X_val).reshape((X_val.shape[0],X_val.shape[1],1))

    # Run neural network model
    model = build_model((X_train.shape[1],1))
    history = model.fit(X_train, y_train, batch_size=32, epochs=150, verbose=0, validation_data=(X_val, y_val))

    # Cache prediction values to array
    predictions = model.predict(X_val).flatten()
    ann_predictions = np.concatenate([ann_predictions, predictions])
    history_cache.append(history)

    # Print status update
    print("--------[{}/{}]--------".format(fold+1, num_folds))
    for i in range(chunks[fold+1] - chunks[fold]):
        print("Validation Sample:", y.index.values[chunks[fold]+i])
        print("ANN Prediction: {:.5f}".format(ann_predictions[chunks[fold]+i]))
        print("Ground Truth: {:.5f}\n".format(y.values.flatten()[chunks[fold]+i]))

# Print results    
print("\nCross Validation Results:\n")
ann_r2 = r2_score(y, ann_predictions)
print("Neural Network R2 Score: {:.5f}".format(ann_r2))