In [1]:
%matplotlib inline

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [2]:
train_data = np.loadtxt(open("data/train_data.csv", "rb"), delimiter=",", skiprows=1)
train_labels = np.loadtxt(open("data/train_labels.csv", "rb"), delimiter=",", skiprows=1)
test_data = np.loadtxt(open("data/test_data.csv", "rb"), delimiter=",", skiprows=1)

In [3]:
scaler = StandardScaler()
train_data_stand = scaler.fit_transform(train_data)
test_data_stand = scaler.fit_transform(test_data)

In [4]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1, max_iter=2000, verbose=True)

In [5]:
clf.fit(train_data, train_labels)

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(5, 2), learning_rate='constant',
       learning_rate_init=0.001, max_iter=2000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=True,
       warm_start=False)

In [6]:
sum(clf.predict(test_data) != 1)

0

In [7]:
clf = MLPRegressor(alpha=0.001, hidden_layer_sizes = (30,50,25), max_iter = 50000, activation = 'logistic', verbose = 'True')

In [8]:
a = clf.fit(train_data, train_labels)

Iteration 1, loss = 4.16765784
Iteration 2, loss = 3.29439058
Iteration 3, loss = 3.11175446
Iteration 4, loss = 3.08343846
Iteration 5, loss = 3.05828652
Iteration 6, loss = 3.01610639
Iteration 7, loss = 2.96466533
Iteration 8, loss = 2.90433172
Iteration 9, loss = 2.84219189
Iteration 10, loss = 2.77371261
Iteration 11, loss = 2.73374565
Iteration 12, loss = 2.67232675
Iteration 13, loss = 2.62071140
Iteration 14, loss = 2.57653556
Iteration 15, loss = 2.54894319
Iteration 16, loss = 2.53205420
Iteration 17, loss = 2.50569313
Iteration 18, loss = 2.53116452
Iteration 19, loss = 2.52490728
Iteration 20, loss = 2.52018005
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.


In [9]:
y_pred = clf.predict(train_data)

In [10]:
sum(np.round(y_pred) == train_labels)

1074

In [11]:
LogReg = LogisticRegression(multi_class='ovr', penalty='l1',tol=0.1)

In [12]:
LogReg.fit(train_data_stand, train_labels)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=None, solver='liblinear', tol=0.1,
          verbose=0, warm_start=False)

In [13]:
LogReg.score(train_data_stand,train_labels)

0.74094452086198992

In [14]:
LogReg.predict(test_data_stand)

array([ 9.,  1.,  1., ...,  1.,  3.,  1.])

In [51]:
from sklearn.model_selection import train_test_split

In [68]:
Xtrain, Xtest, Ytrain, Ytest = train_test_split(train_data_stand, train_labels, test_size=0.33, random_state=42)

Ytrain = Ytrain.astype(int)
Ytest = Ytest.astype(int)

In [69]:
from theano import tensor as T
import numpy as np
import theano
import matplotlib.pyplot as plt
import matplotlib as mt
import matplotlib.gridspec as gridspec

In [70]:
def plotmodelfit(Xtrain,
                 Ytrain,
                 pred_train,
                 nnact,
                 X1grid,
                 X2grid,
                 pred_grid,
                 cost_train_vec,
                 cost_test_vec):
    '''method
    Inputs :
    Xtrain, Ytrain        : N x D, N x 1 : traning datasets
    X1grid, X2grid        : G x G, G x G : grid locations as test dataset
    pred_train, pred_grid : N x 1, G x 1: model predictions on training dataset and grid dataset
    cost_train, cost_test   : num_iter x 1, num_iter x 1 : error across iterations on training and test set
    nnact                 : list of activation values in the hidden layer
    '''

    mt.rcParams['figure.figsize'] = (8, 6)
    norm = mt.colors.Normalize(vmin=0., vmax=1.)

    nh = [f.shape[1] for f in nnact]
    nhiddenl = len(nh)

    fig = plt.figure(num=122)

    # gs for main plot
    gs0 = gridspec.GridSpec(1, 2)
    gs00 = gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=gs0[0, 0])

    # gs for hidden layers
    gs1 = gridspec.GridSpecFromSubplotSpec(1, nhiddenl, subplot_spec=gs0[0, 1])

    subgs = []
    for i in np.arange(nhiddenl):
        subgs.append(
            gridspec.GridSpecFromSubplotSpec(nh[i], 1, subplot_spec=gs1[0, i]))

    # ax for main
    ax_00 = fig.add_subplot(gs00[0, 0])  #, adjustable='box-forced'
    ax_00.scatter(
        Xtrain[Ytrain == 1, 0],
        Xtrain[Ytrain == 1, 1],
        c="#ff9900",
        label="class 1",
        s=15,
        alpha=0.8)
    ax_00.scatter(
        Xtrain[Ytrain == 0, 0],
        Xtrain[Ytrain == 0, 1],
        c="#02275a",
        label="class 0",
        s=15,
        alpha=0.8)
    ax_00.contourf(X1grid, X2grid, pred_grid, alpha=0.3)
    ax_00.legend()
    ax_00.set_title("model fit")

    ax_01 = fig.add_subplot(gs00[1, 0])  #, adjustable='box-forced'
    ax_01.plot(
        np.arange(cost_train_vec.shape[0]),
        cost_train_vec,
        c="#27ae61",
        label="train")
    ax_01.plot(
        np.arange(cost_test_vec.shape[0]),
        cost_test_vec,
        c="#c1392b",
        label="test")
    ax_01.set_xlabel("iterations")
    ax_01.set_ylabel("cost function")
    ax_01.set_title("cost function across iterations")
    ax_01.legend()

    axhl = []

    # nested list for hidden layer activations
    for hlayer in np.arange(nhiddenl):
        axnn = []
        for hnn in np.arange(nh[hlayer]):
            ax = fig.add_subplot(subgs[hlayer][hnn, 0], aspect='equal')
            ax.scatter(
                Xtrain[:, 0],
                Xtrain[:, 1],
                c=nnact[hlayer][:, hnn],
                cmap="RdBu",
                s=5,
                norm=norm)
            ax.xaxis.set_ticks([])
            ax.yaxis.set_ticks([])
            if hnn == 0:
                ax.set_title("activations " + "\n" + "in layer " +
                             str(hlayer + 1))
            axnn.append(ax)
        axhl.append(axnn)

    fig.tight_layout()

In [83]:
def ClassificationTwoHiddenLayerNN(Xtrain,
                                  Ytrain,
                                  Xtest,
                                  Ytest,
                                  nn1=4,
                                  nn2=3,
                                  nnout=10,
                                  training_steps=50000,
                                  alpha=0.2):
    '''
    Input:
    Xtrain   : N x D    : traning set features
    Ytrian   : N x 1    : training set target
    Xtest    : M x D    : test set feaures
    Ytest    : M x 1    : test set target
    nn1      : scalar   : no. of neurons to be used in first hidden layer
    nn2      : scalar   : no. of neurons to be used in second hidden layer
    training_steps : scalar : no. of training iteration steps
    alpha    : scalar   : learning rate
    
    
    '''
    print("*** running ***")
 
    # define input and output variables in theano
    x = T.matrix('x')
    y = T.vector('y')
    xdim = Xtrain.shape[1]  # number of features in the data
    
    
    
    ##########################################################
    ################### add your code here ###################
    ##########################################################
            
    # HINT : FOR WEIGHTS USE RANDOM STANDRD GAUSSIAN INITIALIZATION
    #      : FOR BIAS USE ZERO INITIALIZATION
    
    # layer 01 parameter declaration & initialization (weights/bias)
    np.random.seed(1232)
    w_1 = theano.shared(np.random.randn(xdim, nn1), name='w_1')
    b_1 = theano.shared(np.zeros((nn1,)), name='b_1')

    # layer 02 parameter declaration & initialization (weights/bias) 
    np.random.seed(1232)
    w_2 = theano.shared(np.random.randn(nn1, nn2), name='w_2')
    b_2 = theano.shared(np.zeros((nn2,)), name='b_2')

    # output layer parameter declaration & initialization (weights/bias) 
    np.random.seed(1232)
    w_out = theano.shared(np.random.randn(nn2,nnout), name='w_out')
    b_out = theano.shared(np.zeros((nnout,)), name='b_out')

    # hidden layer output
    h_out_1 = theano.tensor.nnet.sigmoid(T.dot(x, w_1) + b_1)
    h_out_2 = theano.tensor.nnet.sigmoid(T.dot(h_out_1, w_2) + b_2)

    # perceptron predictions
    p_y_given_x = T.nnet.softmax(T.dot(h_out_2, w_out) + b_out)
    y_pred = T.argmax(p_y_given_x, axis=1)

    # cross-entropy as cost function
    #cost   = T.nnet.binary_crossentropy(y_pred, y).mean()
    
    print("##########################################################")
    print(T.arange(y.shape[0]))
    print("##########################################################")
    print( y.shape.eval())

    cost   = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y])

    # gradient computation
    gw_1, gb_1, gw_2, gb_2, gw_out, gb_out = T.grad(cost, [w_1, b_1,w_2, b_2, w_out, b_out])
    
    updates  =  [(w_1, w_1 - alpha * gw_1), 
             (b_1, b_1 - alpha * gb_1),
             (w_2, w_2 - alpha * gw_2),
             (b_2, b_2 - alpha * gb_2),
             (w_out, w_out - alpha * gw_out), 
             (b_out, b_out - alpha * gb_out)] 
    
        
    # train_model theano function
    # Note : outputs should return following in order
    #      : [prediction vector, error/cost scalar,
    #        1st hidden layer activation vector, 2nd hidden layer activation vector]
    train_model = theano.function(
             inputs  = [x,y],
             outputs = [y_pred, cost, h_out_1, h_out_2],
             updates = updates
            )
    

    # function 
    # compute prediction on unseen test data
    # Input   : x, y are intput, target vectors respectively
    # Output  :  list of predictions
    predict_model = theano.function(inputs=[x], outputs=[y_pred])
    
    # function 
    # compute cost on test data
    # Input   : x, y are intput, target vectors respectively
    # Output  : scalar cost
    cost_function = theano.function(inputs=[x,y], outputs=cost)
    

    ##########################################################
    ###################        end         ###################
    ##########################################################
        
        
        
    # accumulate error over iterations on traning and test set in a vector
    cost_train_vec = np.array([])
    cost_test_vec = np.array([])

    # training iterations begin
    for i in np.arange(training_steps):
        
        # get predictions, cost, activation values 
        # on the training set
        # pred_train - vector - predictions on training data
        # cost_train  - scalar - cost/error for the current parameter value
        # nactivation- vector - activation function from the hidden layer
        pred_train, cost_train, nactivation1, nactivation2 = train_model(
            Xtrain, Ytrain)
        cost_train_vec = np.append(cost_train_vec, cost_train)
            
        # get predictions, cost on test set
        pred_test = predict_model(Xtest)
        cost_test = cost_function(Xtest,Ytest)
        cost_test_vec = np.append(cost_test_vec, cost_test)
        
        # printing
        if i % 10000 == 0:
            print("Iteration %6s -- "%i,'Training cost: ',"%4.4f"%cost_train)

    print("final train set cost : %.4f"%cost_train)
    print("final test set cost  : %.4f"%cost_test)
    
    # compute classification accuracies
    train_predictions = (np.round(predict_model(Xtrain)).reshape((1,-1)))
    train_accuracy = np.mean(train_predictions == Ytrain)
    print("final train set classification accuracy : %.4f"%train_accuracy)
    
    test_predictions = np.round(pred_test).reshape((1,-1))
    test_accuracy = np.mean(test_predictions == Ytest)
    print("final test set classification accuracy : %.4f"%test_accuracy)
    
    # for the final model, plot model fit and activations
    # on a grid
    X1grid, X2grid = np.meshgrid(
        np.linspace(-2, 3, 100), np.linspace(-1.7, 2, 100))
    pred_grid = predict_model(
        np.transpose(np.array([X1grid.flatten(), X2grid.flatten()])))
    pred_grid = np.array(pred_grid)
    pred_grid = pred_grid.reshape(X1grid.shape)

    plotmodelfit(Xtrain, Ytrain, pred_train,
                 [nactivation1, nactivation2],
                 X1grid, X2grid, pred_grid,
                 cost_train_vec, cost_test_vec)


In [84]:
ClassificationTwoHiddenLayerNN(
    Xtrain, Ytrain, Xtest, Ytest, nn1=3, nn2=2, nnout=10, training_steps=5000)

*** running ***
##########################################################
ARange{dtype='int64'}.0
##########################################################


MissingInputError: Input 0 of the graph (indices start from 0), used to compute Shape(y), was not provided and not given a value. Use the Theano flag exception_verbosity='high', for more information on this error. 
Backtrace when that variable is created:

  File "/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/anaconda/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/anaconda/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2808, in run_ast_nodes
    if self.run_code(code, result):
  File "/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-84-12661518298d>", line 2, in <module>
    Xtrain, Ytrain, Xtest, Ytest, nn1=3, nn2=2, nnout=10, training_steps=5000)
  File "<ipython-input-83-2a2ded0a3713>", line 27, in ClassificationTwoHiddenLayerNN
    y = T.vector('y')


Backtrace when the variable is created:
  File "/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/anaconda/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/anaconda/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2808, in run_ast_nodes
    if self.run_code(code, result):
  File "/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-84-12661518298d>", line 2, in <module>
    Xtrain, Ytrain, Xtest, Ytest, nn1=3, nn2=2, nnout=10, training_steps=5000)
  File "<ipython-input-83-2a2ded0a3713>", line 27, in ClassificationTwoHiddenLayerNN
    y = T.vector('y')


array([ 2, 10,  6, ...,  2,  5,  1])