In [250]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import preprocessing

print(tf.__version__)

1.12.0


In [255]:
## Import Data
GENRES_IDX = {"blues":0, "gospel":1, "rap":2, "country":3, "rock":4}
NON_SPARSE_FEATS = ['n_wrds', 'avg_wrd_len',
       'n_lines', 'avg_line_len', 'n_contractions', 'contraction_density',
       'vocab_size', 'edge_density',
       'edge_density_weighted', 'edge_weight_var', 'degree_var',
       'degree_var_weighted', 'degree_avg', 'degree_avg_weighted',
       'comp_size_avg']

N_NONSPARSE = len(NON_SPARSE_FEATS)

def df_to_arr(df):
    x = df[NON_SPARSE_FEATS]
    x_arr = x.values
    y = df["topk"]
    y_list = [list(y.values[i]) for i in range(y.values.shape[0])]
    y_arr = np.array(y_list)
    
    kron = np.eye(5)
    labels = df["genre"].values
    labels = np.array([kron[:, GENRES_IDX[genre]] for genre in labels])
    
    data = np.concatenate((x_arr, y_arr), axis=1)
    return data, labels

def read_data(fn):
    df = pd.read_pickle(fn)
    df.fillna(0, inplace=True)
    df_train = df.query("data_split == 'train'").copy()
    df_test = df.query("data_split == 'test'").copy()
    df_val = df.query("data_split == 'val'").copy()
    
    X_train, Y_train = df_to_arr(df_train)
    X_val, Y_val = df_to_arr(df_val)
    X_test, Y_test = df_to_arr(df_test)
    
    ## Standardize Data
    scaler = preprocessing.StandardScaler().fit(X_train[:, :N_NONSPARSE])
    X_train[:, :len(NON_SPARSE_FEATS)] = scaler.transform(X_train[:, :N_NONSPARSE])
    X_val[:, :len(NON_SPARSE_FEATS)] = scaler.transform(X_val[:, :N_NONSPARSE])
    X_test[:, :len(NON_SPARSE_FEATS)] = scaler.transform(X_test[:, :N_NONSPARSE])
    
    scaler_sparse = preprocessing.MinMaxScaler().fit(X_train[:, N_NONSPARSE:])
    X_train[:, len(NON_SPARSE_FEATS):] = scaler_sparse.transform(X_train[:, N_NONSPARSE:])
    X_val[:, len(NON_SPARSE_FEATS):] = scaler_sparse.transform(X_val[:, N_NONSPARSE:])
    X_test[:, len(NON_SPARSE_FEATS):] = scaler_sparse.transform(X_test[:, N_NONSPARSE:])
    
    return X_train, Y_train, X_val, Y_val, X_test, Y_test

ALL_DATA_FN = "all.data"
X_train, Y_train, X_val, Y_val, X_test, Y_test = read_data(ALL_DATA_FN)

In [256]:
# Train Model
model = keras.Sequential([
    keras.layers.Dense(200, activation=tf.nn.relu),
    keras.layers.Dense(5, activation=tf.nn.softmax)
])

model.compile(optimizer=tf.train.AdamOptimizer(), 
              loss='categorical_crossentropy',
              metrics=['accuracy'])

callbacks = [
  # Interrupt training if `val_loss` stops improving for over 2 epochs
  tf.keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
  # Write TensorBoard logs to `./logs` directory
  tf.keras.callbacks.TensorBoard(log_dir='./logs')
]

model_fitted = model.fit(X_train, Y_train, 
                         epochs=20, 
                         validation_data = (X_val, Y_val), 
                         batch_size=32, 
                         callbacks=callbacks
                        )

# Evaluate Model
model.evaluate(X_test, Y_test, batch_size=32)

Train on 35380 samples, validate on 11851 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


[1.008090195312893, 0.5855072463666489]