In [59]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv1D, MaxPooling1D, Reshape, Flatten

In [2]:
import numpy as np
import os

np.random.seed(42)

DATA_PATH = 'dataset'

In [3]:
from scipy.io import arff

def load_pulsar_csv(path = DATA_PATH):
    csv_path = os.path.join(path, 'HTRU_2.csv')
    return np.loadtxt(csv_path, delimiter=',', dtype=np.float32)

def load_pulsar_arff(path = DATA_PATH):
    arff_path = os.path.join(path, 'HTRU_2.arff')
    return arff.loadarff(arff_path)

In [4]:
pulsars = load_pulsar_csv()

In [5]:
import numpy as np

def split_train_dataset(data, test_ratio):
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(test_ratio * len(data))
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data[train_indices,:], data[test_indices,:]

# Use hash of identifier to decide if instance goes into train or test set

In [6]:
# Save Model
import os

MODELS_DIR = "models"

def save_model(model, name, acc=None):
    name += str(model.input.shape[1])
    for layer in model.layers:
        name += "-" + str(layer.output.shape[1])
    
    name += "_" + (("%.2f" % acc) if acc is not None else "")
    path = os.path.join(MODELS_DIR, name + ".h5")
    model.save(path)

In [7]:
train_set, test_set = split_train_dataset(pulsars, 0.2)

X_train, Y_train = train_set[:, :-1], train_set[:, -1]
X_test, Y_test = test_set[:, :-1], test_set[:, -1]

In [106]:
# Create Model
input_dimension = np.size(X_train, axis=1)

def create_model():
    model = Sequential()
    model.add(Dense(8, activation='relu', input_shape=(input_dimension, 1)))
    model.add(Reshape((None, 8, 1)))
    model.add(Conv1D(2, kernel_size=2, activation='relu'))
    model.add(MaxPooling1D())
    model.add(Flatten())
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [107]:
model = create_model()

ValueError: Tried to convert 'shape' to a tensor and failed. Error: None values not supported.

In [105]:
# Fit the Model
model.fit(X_train, Y_train, epochs=150, batch_size=15, validation_data=[X_test, Y_test])

ValueError: Error when checking input: expected dense_20_input to have 3 dimensions, but got array with shape (14319, 8)

In [21]:
X_train.shape

(14319, 8)

In [97]:
np.expand_dims(X_train, axis=0).shape

(1, 14319, 8)

In [94]:
np.expand_dims(np.random.randn(10, 5), axis=2).shape

(10, 5, 1)

In [96]:
np.reshape(X_train, (1, X_train.shape[0], X_train.shape[1])).shape

(1, 14319, 8)