In [None]:
import graphlab as gl
import pandas as pd
import cPickle as pickle
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 36)
gl.canvas.set_target('ipynb')

### IMPORT TARGET INFO FROM PREPPER

In [None]:
def open_prepper(file_path):
    """Open the DataPrepper from pickled file."""
    with open(file_path) as f:
        prepper = pickle.load(f)
    return prepper

file_path = '../data/store/data_prepper_BUILDING.pkl'
prepper = open_prepper(file_path)

X_train, y_train = prepper.return_training_data()
X_test, y_test = prepper.return_testing_data()

In [None]:
y_combined = pd.concat((y_train, y_test), axis=0)

In [None]:
plt.hist(y_combined['image_views_quantized'], bins=40)

### GRAPH LAB IMPORT IMAGES

In [None]:
images_raw = gl.image_analysis.load_images('/home/ubuntu/data/images/BUILDING/')

In [None]:
images_raw.head(1)

In [None]:
images = images_raw

In [None]:
images['filename'] = images['path'].apply(lambda x: x[x.find('/')+1:])

In [None]:
check = True
while check:
    images['filename'] = images['filename'].apply(lambda x: x[x.find('/')+1:])
    if images[0]['filename'].find('/') < 0:
        check = False

In [None]:
images['owner'] = images['filename'].apply(lambda x: x[:x.find('_')])
images['id'] = images['filename'].apply(lambda x: x[x.find('_')+1:])
images['id'] = images['id'].apply(lambda x: x[:x.find('.')])

In [None]:
images.remove_columns(['path', 'filename'])

In [None]:
images.column_types()

In [None]:
images['id'] = images['id'].apply(lambda x: int(x))

### COMBINE IMAGES WITH TARGET DATA

In [None]:
y_combined.reset_index(inplace=True)

In [None]:
target = gl.SFrame(data=y_combined)

In [None]:
target['user_is_pro'] = target['user_is_pro'].apply(lambda x: int(x))
target['user_can_buy_pro'] = target['user_can_buy_pro'].apply(lambda x: int(x))
target['user_total_views'] = target['user_total_views'].apply(lambda x: int(x))
target['image_views'] = target['image_views'].apply(lambda x: int(x))

In [None]:
target[['image_views_quantized', 'owner', 'id']].head(1)

In [None]:
images = images.join(target[['image_views_quantized', 'owner', 'id']], on=['owner', 'id'], how='inner')

In [None]:
images.head(1)

### RESIZE IMAGES

In [None]:
images['resized_image'] = gl.image_analysis.resize(images['image'], 800, 600, channels=3)

### Save Data

In [None]:
images.save('/home/ubuntu/data/GL_BUILDINGS_MODELING_DATA')

In [None]:
images.head(1)

### LOAD MODEL IF NEED BE

In [None]:
images = gl.load_sframe('/home/ubuntu/data/GL_BUILDINGS_MODELING_DATA_RESIZED')
#images_train, images_test = images.random_split(0.8)

### Train Test Split

In [None]:
images_train, images_test = images.random_split(0.8)
images_train.remove_column('image')
images_test.remove_column('image')

In [None]:
images_train.head(1)

In [None]:
images_test.head(1)

### NEURAL NET MODEL

In [None]:
images_test.head(1)

In [None]:
cust_network_layers = list()
cust_network_layers.append(gl.deeplearning.layers.ConvolutionLayer(6, num_channels=99, stride=1, padding=0, num_groups=1))
cust_network_layers.append(gl.deeplearning.layers.MaxPoolingLayer(2, stride=1, padding=0))
cust_network_layers.append(gl.deeplearning.layers.ConvolutionLayer(6, 100, stride=1, padding=0, num_groups=1))
cust_network_layers.append(gl.deeplearning.layers.MaxPoolingLayer(2, stride=1, padding=0))
cust_network_layers.append(gl.deeplearning.layers.FlattenLayer())
cust_network_layers.append(gl.deeplearning.layers.FullConnectionLayer(2000, init_bias=0, init_sigma=.01))
cust_network_layers.append(gl.deeplearning.layers.TanhLayer())
cust_network_layers.append(gl.deeplearning.layers.FullConnectionLayer(2000, init_sigma=.1))
cust_network_layers.append(gl.deeplearning.layers.TanhLayer())
cust_network_layers.append(gl.deeplearning.layers.FullConnectionLayer(5, init_sigma=0.5))
cust_network_layers.append(gl.deeplearning.layers.SoftmaxLayer())

custom_network = gl.deeplearning.NeuralNet()
#custom_network.params['max_iterations'] = 20
custom_network.layers = cust_network_layers
custom_network.verify(input_shape=[800, 600, 3], output_shape=5)

In [None]:
network = gl.neuralnet_classifier.create(images, target='image_views_quantized', features=['image'],
                                         max_iterations=100, network=custom_network, validation_set='auto',
                                         class_weights='auto', metric='auto', random_crop=False,
                                         input_shape=None, random_mirror=False, learning_rate=0.001, momentum=0.9,
                                         l2_regularization=0.0005, bias_l2_regularization=0.0, init_random='gaussian',
                                         init_sigma=0.01, init_bias=0.0,
                                         model_checkpoint_path='/home/ubuntu/data/GL_BUILDINGS_MODEL_CHECKPOINT',
                                         model_checkpoint_interval=5, verbose=True)

In [None]:
network.layers

In [None]:
model_neuralnetclassifier_ispro = gl.neuralnet_classifier.create(images_train, target='user_is_pro',
                                                                 features=['deep_.resized_image'],
                                                                 network=custom_network, max_iterations=100,
                                                                 validation_set=images_test, batch_size=1000,
                                                                 model_checkpoint_path='neural_network_model',
                                                                 model_checkpoint_interval=5
                                                                 )

In [None]:
model_neuralnetclassifier_ispro.extract_features() # WHAT IS THIS?!

In [None]:
neuralnetclassifier_results = model_neuralnetclassifier_ispro.evaluate(images_test)

In [None]:
neuralnetclassifier_results.keys()

In [None]:
neuralnetclassifier_results['accuracy']

In [None]:
neuralnetclassifier_results['confusion_matrix']

In [None]:
nn_test_predictions = model_neuralnetclassifier_ispro.predict(images_test)

In [None]:
type(nn_test_predictions)

In [None]:
gl.deeplearning.get_builtin_neuralnet('imagenet')

### TRY GBC in GRAPHLAB

In [None]:
data_train = pd.merge(X_train, y_train[['image_views_quantized']], how='inner', left_index=['owner', 'id'], right_index=['owner', 'id'])
data_test = pd.merge(X_test, y_test[['image_views_quantized']], how='inner', left_index=['owner', 'id'], right_index=['owner', 'id'])

In [None]:
data_train = gl.SFrame(data=data_train)
data_test = gl.SFrame(data=data_test)

In [None]:
model_RF = gl.boosted_trees_classifier.create(data_train, target='image_views_quantized', max_iterations=1000,
                                              class_weights='auto', verbose=True, column_subsample=0.5, row_subsample=0.5,
                                              metric=['accuracy'], max_depth=5)

In [None]:
results = model_RF.evaluate(dataset=data_test)

In [None]:
results.keys()

In [None]:
results['accuracy']

In [None]:
results['f1_score']

In [None]:
results['precision']

In [None]:
results['recall']

In [None]:
results['confusion_matrix']

In [None]:
results['roc_curve']

In [None]:
results['roc_curve'].show(view='Scatter Plot', x='fpr', y='tpr')