In [1]:
import graphlab as gl
import pandas as pd
import cPickle as pickle
import matplotlib.pyplot as plt
%matplotlib inline

This non-commercial license of GraphLab Create for academic use is assigned to mpmakris@gmail.com and will expire on June 30, 2017.


[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1470536484.log


In [2]:
gl.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 40)

### IMPORT TARGET INFO FROM PREPPER

In [None]:
def open_prepper(file_path):
    """Open the DataPrepper from pickled file."""
    with open(file_path) as f:
        prepper = pickle.load(f)
    return prepper

file_path = '../data/store/data_prepper_BUILDING.pkl'
prepper = open_prepper(file_path)

X_train, y_train = prepper.return_training_data()
X_test, y_test = prepper.return_testing_data()

In [None]:
y_combined = pd.concat((y_train, y_test), axis=0)

In [None]:
plt.hist(y_combined['image_views_quantized'], bins=40)

### GRAPH LAB IMPORT IMAGES

In [None]:
images_raw = gl.image_analysis.load_images('/home/ubuntu/data/images/BUILDING/')

In [None]:
images_raw.head(1)

In [None]:
images = images_raw

In [None]:
images['filename'] = images['path'].apply(lambda x: x[x.find('/')+1:])

In [None]:
check = True
while check:
    images['filename'] = images['filename'].apply(lambda x: x[x.find('/')+1:])
    if images[0]['filename'].find('/') < 0:
        check = False

In [None]:
images['owner'] = images['filename'].apply(lambda x: x[:x.find('_')])
images['id'] = images['filename'].apply(lambda x: x[x.find('_')+1:])
images['id'] = images['id'].apply(lambda x: x[:x.find('.')])

In [None]:
images.remove_columns(['path', 'filename'])

In [None]:
images.column_types()

In [None]:
images['id'] = images['id'].apply(lambda x: int(x))

### COMBINE IMAGES WITH TARGET DATA

In [None]:
y_combined.reset_index(inplace=True)

In [None]:
target = gl.SFrame(data=y_combined)

In [None]:
images = images.join(target, on=['owner', 'id'], how='inner')

In [None]:
images['user_is_pro'] = images['user_is_pro'].apply(lambda x: int(x))
images['user_can_buy_pro'] = images['user_can_buy_pro'].apply(lambda x: int(x))
images['user_total_views'] = images['user_total_views'].apply(lambda x: int(x))
images['image_views'] = images['image_views'].apply(lambda x: int(x))

### RESIZE IMAGES

In [4]:
images['resized_image'] = gl.image_analysis.resize(images['image'], 800, 600, channels=3)

extractor = gl.feature_engineering.DeepFeatureExtractor(features='resized_image', output_column_prefix='deep_')

extractor = extractor.fit(images)

extracted_model = extractor['model']

images = extractor.transform(images)

### Save Data, then Train-Test-Split

In [5]:
images.save('/home/ubuntu/data/GL_BUILDINGS_MODELING_DATA_RESIZED')

In [6]:
images.head(1)

image,owner,id,user_is_pro,user_can_buy_pro,user_total_views,image_ncomments
Height: 1066 Width: 1600,49503002894@N01,20924375303,1.0,0.0,106299.0,0

image_nfavs,image_nsets,image_npools,image_views,image_views_quantized,user_total_views_quantize d ...,image_nfavs_quantized
0.0,1,0,41.0,2,5,1

image_ncomments_quantized,image_nsets_quantized,image_npools_quantized,resized_image
1,1,1,Height: 600 Width: 800


In [7]:
images_train, images_test = images.random_split(0.8)

### LOAD MODEL IF NEED BE

In [3]:
images = gl.load_sframe('/home/ubuntu/data/GL_BUILDINGS_MODELING_DATA_RESIZED')
#images_train, images_test = images.random_split(0.8)

### NEURAL NET MODEL

In [None]:
images_test.head(1)

In [None]:
network = gl.neuralnet_classifier.create(images_train, target='user_is_pro', features=['resized_image'],
                                         max_iterations=100, network=None, validation_set=images_test,
                                         class_weights='auto', metric='auto', random_crop=False,
                                         input_shape=None, random_mirror=False, learning_rate=0.001, momentum=0.9,
                                         l2_regularization=0.0005, bias_l2_regularization=0.0, init_random='gaussian',
                                         init_sigma=0.01, init_bias=0.0,
                                         model_checkpoint_path='/home/ubuntu/data/GL_BUILDINGS_MODEL_CHECKPOINT',
                                         model_checkpoint_interval=5)

In [None]:
network.layers

In [None]:
cust_network_layers = list()
#cust_network_layers.append(gl.deeplearning.layers.FlattenLayer())
cust_network_layers.append(gl.deeplearning.layers.FullConnectionLayer(100, init_sigma=0.5))
cust_network_layers.append(gl.deeplearning.layers.RectifiedLinearLayer())
cust_network_layers.append(gl.deeplearning.layers.DropoutLayer(0.5))
cust_network_layers.append(gl.deeplearning.layers.FullConnectionLayer(200, init_sigma=.1))
cust_network_layers.append(gl.deeplearning.layers.TanhLayer())
cust_network_layers.append(gl.deeplearning.layers.FullConnectionLayer(150, init_sigma=.1))
cust_network_layers.append(gl.deeplearning.layers.TanhLayer())
cust_network_layers.append(gl.deeplearning.layers.FullConnectionLayer(2, init_sigma=0.5))
cust_network_layers.append(gl.deeplearning.layers.SoftmaxLayer())

custom_network = gl.deeplearning.NeuralNet()
#custom_network.params['max_iterations'] = 20
custom_network.layers = cust_network_layers
custom_network.verify(input_shape=[1, 1, 4096], output_shape=2)

In [None]:
model_neuralnetclassifier_ispro = gl.neuralnet_classifier.create(images_train, target='user_is_pro',
                                                                 features=['deep_.resized_image'],
                                                                 network=custom_network, max_iterations=100,
                                                                 validation_set=images_test, batch_size=1000,
                                                                 model_checkpoint_path='neural_network_model',
                                                                 model_checkpoint_interval=5
                                                                 )

In [None]:
model_neuralnetclassifier_ispro.extract_features() # WHAT IS THIS?!

In [None]:
neuralnetclassifier_results = model_neuralnetclassifier_ispro.evaluate(images_test)

In [None]:
neuralnetclassifier_results.keys()

In [None]:
neuralnetclassifier_results['accuracy']

In [None]:
neuralnetclassifier_results['confusion_matrix']

In [None]:
nn_test_predictions = model_neuralnetclassifier_ispro.predict(images_test)

In [None]:
type(nn_test_predictions)

In [None]:
gl.deeplearning.get_builtin_neuralnet('imagenet')