# Solutions
## Problem 1
Implement the Min-Max scaling function ($X'=a+{\frac {\left(X-X_{\min }\right)\left(b-a\right)}{X_{\max }-X_{\min }}}$) with the parameters:

$X_{\min }=0$

$X_{\max }=255$

$a=0.1$

$b=0.9$

In [None]:
# Problem 1 - Implement Min-Max scaling for grayscale image data
def normalize_grayscale(image_data):
    """
    Normalize the image data with Min-Max scaling to a range of [0.1, 0.9]
    :param image_data: The image data to be normalized
    :return: Normalized image data
    """
    a = 0.1
    b = 0.9
    grayscale_min = 0
    grayscale_max = 255
    return a + ( ( (image_data - grayscale_min)*(b - a) )/( grayscale_max - grayscale_min ) )

## Problem 2
- Use [tf.placeholder()](https://www.tensorflow.org/api_docs/python/io_ops.html#placeholder) for `features` and `labels` since they are the inputs to the model.
- Any math operations must have the same type on both sides of the operator.  The weights are float32, so the `features` and `labels` must also be float32.
- Use [tf.Variable()](https://www.tensorflow.org/api_docs/python/state_ops.html#Variable) to allow `weights` and `biases` to be modified.
- The `weights` must be the dimensions of features by labels.  The number of features is the size of the image, 28*28=784.  The size of labels is 10.
- The `biases` must be the dimensions of the labels, which is 10.

In [None]:
features_count = 784
labels_count = 10

# Problem 2 - Set the features and labels tensors
features = tf.placeholder(tf.float32)
labels = tf.placeholder(tf.float32)

# Problem 2 - Set the weights and biases tensors
weights = tf.Variable(tf.truncated_normal((features_count, labels_count)))
biases = tf.Variable(tf.zeros(labels_count))

# Problem 3
Configuration 1
* **Epochs:** 1
* **Learning Rate:** 0.1

Configuration 2
* **Epochs:** 4 or 5
* **Learning Rate:** 0.2

In [62]:
%matplotlib inline 

import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import sklearn
import sklearn.model_selection
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
import math
import tensorflow as tf


#data process

data = pd.read_csv('boston_data.csv')

print(data.head())

X = data.drop('PRICE', axis=1)
Y = data['PRICE']


X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X, Y, test_size=0.1, random_state=5)


# Scale data (training set) to 0 mean and unit standard deviation.

scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

Y_train = np.reshape(Y_train, (-1,1)) 
Y_test = np.reshape(Y_test, (-1,1)) 
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD    TAX  \
0  0.00632  18.0   2.31   0.0  0.538  6.575  65.2  4.0900  1.0  296.0   
1  0.02731   0.0   7.07   0.0  0.469  6.421  78.9  4.9671  2.0  242.0   
2  0.02729   0.0   7.07   0.0  0.469  7.185  61.1  4.9671  2.0  242.0   
3  0.03237   0.0   2.18   0.0  0.458  6.998  45.8  6.0622  3.0  222.0   
4  0.06905   0.0   2.18   0.0  0.458  7.147  54.2  6.0622  3.0  222.0   

   PTRATIO       B  LSTAT  PRICE  
0     15.3  396.90   4.98   24.0  
1     17.8  396.90   9.14   21.6  
2     17.8  392.83   4.03   34.7  
3     18.7  394.63   2.94   33.4  
4     18.7  396.90   5.33   36.2  
(455, 13)
(455, 1)
(51, 13)
(51, 1)


  return getattr(obj, method)(*args, **kwds)


In [63]:
# Neural Network Model

def multilayer_perceptron(x, weights, biases, keep_prob):
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    layer_1 = tf.nn.dropout(layer_1, keep_prob)
    
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    layer_2 = tf.nn.dropout(layer_2, keep_prob)
    
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer


# Parameters

n_input = 13
n_hidden1_neuron = 64
n_hidden2_neuron = 32
n_output = 1

weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden1_neuron], dtype=tf.float64)),
    'h2': tf.Variable(tf.random_normal([n_hidden1_neuron, n_hidden2_neuron], dtype=tf.float64)),
    'out': tf.Variable(tf.random_normal([n_hidden2_neuron, n_output], dtype=tf.float64))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden1_neuron], dtype=tf.float64)),
    'b2': tf.Variable(tf.random_normal([n_hidden2_neuron], dtype=tf.float64)),
    'out': tf.Variable(tf.random_normal([n_output], dtype=tf.float64))
}

keep_prob = tf.placeholder(tf.float64)

x = tf.placeholder(tf.float64, [None, n_input])
y = tf.placeholder(tf.float64, [None, n_output])

predictions = multilayer_perceptron(x, weights, biases, keep_prob)
cost = tf.reduce_mean(tf.square(predictions - y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost)
   
    

In [64]:
# Training

training_epochs = 50000
display_step = 1000

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(training_epochs):
        
        _, c = sess.run([optimizer, cost],
                        feed_dict={
                            x: X_train,
                            y: Y_train,
                            keep_prob: 0.8
                        })
        
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", \
                    "{:.9f}".format(c))
    
    _, test_c = sess.run([optimizer, cost],
                        feed_dict={
                            x: X_test,
                            y: Y_test,
                            keep_prob: 1
                        })
    print("Mean Squared Error of test data = ", "{:.9f}".format(test_c))

Epoch: 0001 cost= 15546.655942631
Epoch: 1001 cost= 5117.850639369
Epoch: 2001 cost= 2823.797064499
Epoch: 3001 cost= 1821.928060657
Epoch: 4001 cost= 948.214079242
Epoch: 5001 cost= 576.224294012
Epoch: 6001 cost= 301.586612681
Epoch: 7001 cost= 212.129743772
Epoch: 8001 cost= 140.856030000
Epoch: 9001 cost= 118.586782983
Epoch: 10001 cost= 86.690393683
Epoch: 11001 cost= 76.419554096
Epoch: 12001 cost= 69.475534653
Epoch: 13001 cost= 60.558455828
Epoch: 14001 cost= 53.932501438
Epoch: 15001 cost= 58.788160718
Epoch: 16001 cost= 43.972921647
Epoch: 17001 cost= 40.911072892
Epoch: 18001 cost= 35.480294935
Epoch: 19001 cost= 37.869513195
Epoch: 20001 cost= 31.927721404
Epoch: 21001 cost= 33.271771557
Epoch: 22001 cost= 26.717421942
Epoch: 23001 cost= 24.709612871
Epoch: 24001 cost= 29.842921277
Epoch: 25001 cost= 29.780801626
Epoch: 26001 cost= 30.430211325
Epoch: 27001 cost= 25.635674961
Epoch: 28001 cost= 30.324274765
Epoch: 29001 cost= 23.737305746
Epoch: 30001 cost= 29.439797912
Epo

In [6]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


In [11]:
def neuralnet_model():
    model = Sequential()
    model.add(Dense(64, input_dim=13, kernel_initializer='normal', activation='relu'))
    model.add(Dense(32, kernel_initializer='normal', activation='relu'))
    model.add(Dense(16, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [13]:
data = pd.read_csv('boston_data.csv')

#print(data.head())

X = data.drop('PRICE', axis=1)
Y = data['PRICE']

seed = 7
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=neuralnet_model, epochs=200, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Standardized: -34.85 (34.37) MSE


In [65]:
from sklearn.datasets import fetch_20newsgroups
twenty_train = fetch_20newsgroups(subset='train', shuffle=True)

Downloading 20news dataset. This may take a few minutes.
Downloading dataset from https://ndownloader.figshare.com/files/5975967 (14 MB)


In [66]:
from sklearn.svm import SVC

X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])

clf = SVC(kernel="rbf", C="")
clf.fit(X,Y)
clf.predict([[-0.8, -1]])