# Project Keep with Jupyter

#### Import the required libraries

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import json                      #Formating payload
import requests                  #GET and POST requests

import tensorflow as tf          #Machine Learning
from tensorflow import keras

import pandas as pd              #Data Analysis
import numpy as np               #Numerical Python used for Scientific Computing
import seaborn as sns            #Statistical Graphics built on top of matplotlib and integrated with pandas data structures
import matplotlib.pyplot as plt  #Visualization
#%matplotlib inline

#### Check Tensorflow and Keras version

In [None]:
print(tf.__version__)
print(tf.keras.__version__)

#### Import and load the Fashion MNIST data directly from TensorFlow

In [None]:
fashion_mnist = keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

#### Let's explore the format of the dataset before training the model. The following shows there are 60,000 images in the training set, with each image represented as 28 x 28 pixels: 

In [None]:
train_images.shape

#### Likewise, there are 60,000 labels in the training set:

In [None]:
len(train_labels)

#### train_labels Each label is an integer between 0 and 9:

In [None]:
train_labels

#### There are 10,000 images in the test set. Again, each image is represented as 28 x 28 pixels:

In [None]:
test_images.shape

#### And the test set contains 10,000 images labels:

In [None]:
len(test_labels)

#### The data must be preprocessed before training the network. If you inspect the first image in the training set, you will see that the pixel values fall in the range of 0 to 255:

In [None]:
plt.figure()
plt.imshow(train_images[0])
plt.colorbar()
plt.grid(False)
plt.show()

#### Scale these values to a range of 0 to 1 before feeding them to the neural network model. To do so, divide the values by 255. It's important that the training set and the testing set be preprocessed in the same way:

In [None]:
train_images = train_images / 255.0

test_images = test_images / 255.0

In [None]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

#### To verify that the data is in the correct format and that you're ready to build and train the network, let's display the first 25 images from the training set and display the class name below each image.

In [None]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[train_labels[i]])
plt.show()

#### Set up the layers

In [None]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

#### Compile the model

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#### Train the model

In [None]:
model.fit(train_images, train_labels, epochs=30)

#### Evaluate accuracy

In [None]:
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)

print('\nTest accuracy:', test_acc)

#### Make predictions

In [None]:
predictions = model.predict(test_images)

In [None]:
predictions[0]

In [None]:
np.argmax(predictions[0])

In [None]:
test_labels[0]

#### Graph this to look at the full set of 10 class predictions

In [None]:
def plot_image(i, predictions_array, true_label, img):
    predictions_array, true_label, img = predictions_array, true_label[i], img[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])

    plt.imshow(img, cmap=plt.cm.binary)

    predicted_label = np.argmax(predictions_array)
    if predicted_label == true_label:
       color = 'blue'
    else:
       color = 'red'

    plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label], 100*np.max(predictions_array), class_names[true_label]), color=color)

def plot_value_array(i, predictions_array, true_label):
    predictions_array, true_label = predictions_array, true_label[i]
    plt.grid(False)
    plt.xticks(range(10))
    plt.yticks([])
    thisplot = plt.bar(range(10), predictions_array, color="#777777")
    plt.ylim([0, 1])
    predicted_label = np.argmax(predictions_array)

    thisplot[predicted_label].set_color('red')
    thisplot[true_label].set_color('blue')

#### Verify predictions

In [None]:
i = 0
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, predictions[i], test_labels, test_images)
plt.subplot(1,2,2)
plot_value_array(i, predictions[i], test_labels)
plt.show()

In [None]:
i = 12
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, predictions[i], test_labels, test_images)
plt.subplot(1,2,2)
plot_value_array(i, predictions[i], test_labels)
plt.show()

#### Let's plot several images with their predictions. Note that the model can be wrong even when very confident. 

In [None]:
# Plot the first X test images, their predicted labels, and the true labels.
# Color correct predictions in blue and incorrect predictions in red.
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
    plt.subplot(num_rows, 2*num_cols, 2*i+1)
    plot_image(i, predictions[i], test_labels, test_images)
    plt.subplot(num_rows, 2*num_cols, 2*i+2)
    plot_value_array(i, predictions[i], test_labels)
plt.tight_layout()
plt.show()

#### Finally, use the trained model to make a prediction about a single image.

In [None]:
# Grab an image from the test dataset.
img = test_images[1]

print(img.shape)

In [None]:
# Add the image to a batch where it's the only member.
img = (np.expand_dims(img,0))

print(img.shape)
plt.imshow(img[0])

#### Now predict the correct label for this image:

In [None]:
predictions_single = model.predict(img)

print(predictions_single)

In [None]:
plot_value_array(1, predictions_single[0], test_labels)
_ = plt.xticks(range(10), class_names, rotation=45)

#### Specify SSL security settings via PEM as API server doesn't have a DNS certificate

In [None]:
import socket, ssl

context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.verify_mode = ssl.CERT_OPTIONAL
context.check_hostname = True
context.load_verify_locations("ca-chain.cert.pem")

conn = context.wrap_socket(socket.socket(socket.AF_INET),server_hostname="frascati.projectkeep.io")
conn.connect(("frascati.projectkeep.io", 8008))

cert = conn.getpeercert()
print (cert)

#### Define the API token, variables, and URL path for Project Keep

In [None]:
api_token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJUaGUgRGVtbyBXaXp6YXJkIiwiYXVkIjoiRG9taW5vIiwic3ViIjoiSm9obiBEb2UiLCJwZXJtaXNzaW9ucyI6WyJ1c2VyIiwiYWRtaW4iXSwiaWF0IjoxNTcxNzkzNTE4fQ.-R2VlEais265aZGRAL5El0srJ870iNarR4ME0WK31_U'
api_url_base = 'https://frascati.projectkeep.io:8008/lists/'

#Define varibles used with Project Keep API
#keep_unid = '86C72C1BF64B6DF04825847100373215'  #uniqueid of the view or folder
keep_unid = '2D7D51D22BA0EBAC85257A7B006D8FC8'
#keep_db = 'Demo.nsf'  #ReplicaID of the database
keep_db = '48257C630031F757'  #ReplicaID of the database
keep_scope = 'scope=all'  #Options available for all or specific groups

In [None]:
print(api_url_base+keep_unid+'?'+'db='+keep_db+'&'+keep_scope)  #Verify format of GET URL

#### Set up our HTTP request header per API rules

In [None]:
headers = {'Content-Type': 'application/json',
           'Authorization': 'Bearer {0}'.format(api_token)}
print (headers)

#### Create a function to collect the DB data

In [None]:
def get_db_data():

    api_url = api_url_base+keep_unid+'?'+'db='+keep_db+'&'+keep_scope

    response = requests.get(api_url, headers=headers, verify=False)
    
    #response.headers

    if response.status_code == 200:
        return (response.json())
        #return (json.loads(response.text))
    else:
        return None

#### Let's check the response to our GET request

In [None]:
#print("Here's the data from "+keep_db)
#print (get_db_data())

#### Let's save this output to a json file

In [None]:
with open('demo.json', 'w') as file:
    json.dump(get_db_data(), file)
    file.close()

#### Read data into Tensorflow

In [None]:
TRAIN_DATA_URL = "http://domino-with-jupyter-jupyter.app.okd.hcllabs.net/edit/demo.json"
#TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"

#train_file_path = tf.keras.utils.get_file("demo.json", TRAIN_DATA_URL)
#test_file_path = tf.keras.utils.get_file("eval.csv", TEST_DATA_URL)

json_file = tf.keras.utils.get_file("demo.json", TRAIN_DATA_URL)

In [None]:
df = pd.read_json(json_file)

## Now let's do some cool stuff with that data!

In [None]:
#open('demo.json', 'r').read()  #Verify file can be opened
demo_data = pd.read_json('demo.json')
demo_data.head(5)

#### List the types of data available

In [None]:
demo_data.dtypes

#### Remove not pertinent data types

In [None]:
demo_data = demo_data.drop(['unid', 'noteid', '$18', '$19'], axis=1)
demo_data.head(5)

#### Rename Columns

In [None]:
demo_data = demo_data.rename(columns={"$11": "Transaction_Date"})
demo_data['Transaction_Date'] = demo_data['Transaction_Date'].str[0:4]  #Grab first four characters from Transaction Date
demo_data.head(5)

#### Remove null values and duplicates

In [None]:
demo_data.shape  #display number of rows within our data set

In [None]:
demo_data.count()  #confirm rows match across data types

In [None]:
print(demo_data.isnull().sum())

#### Remove Outliers

In [None]:
sns.set()
f, axes = plt.subplots(figsize=(8, 6))
sns.boxplot(x="registerTotal", data=demo_data);

In [None]:
Q1 = demo_data.quantile(0.25)
Q3 = demo_data.quantile(0.75)
IQR = Q3 - Q1  #interquartile range (IQR) is the difference between the 75th and 25th percentile of the data
print(IQR)

In [None]:
demo_data = demo_data[~((demo_data < (Q1 - 1.5 * IQR)) |(demo_data > (Q3 + 1.5 * IQR))).any(axis=1)]
demo_data.shape

In [None]:
sns.set()
f, axes = plt.subplots(figsize=(8, 6))
sns.boxplot(x="registerTotal", data=demo_data);

#### Now that our data is in better shape we can do some additional Exploratory data analysis

In [None]:
sns.set(color_codes=True)
f, axes = plt.subplots(figsize=(14, 8))
sns.scatterplot(x="registerTotal", y="registerTaxD", hue="registerNumberofPlays", data=demo_data);

#### Specialized categorical plots

In [None]:
sns.set(color_codes=True)
h = sns.catplot(x="Transaction_Date", y="registerTotal", hue="registerNumberofPlays", kind="swarm", data=demo_data);
h.fig.set_size_inches(14, 8)