In [1]:
import numpy as np
import os
import pandas as pd



# I Load the  file
with np.load('cifar4-train.npz') as data:
    cifar4_data = dict(data.items())

X=cifar4_data['overfeat']
y=cifar4_data['labels']
P=cifar4_data['pixels']

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
import tensorflow as tf

# Split data into train/test sets
X_tr, X_te, y_tr, y_te = train_test_split(
    X, y, test_size=0.2, random_state=0)

# Batch generator
def get_batches(X, y, batch_size):
    # Shuffle X,y
    shuffled_idx = np.arange(len(y))
    np.random.shuffle(shuffled_idx)
    
    
    for i in range(0, len(y), batch_size):
        # Batch indexes
        batch_idx = shuffled_idx[i:i+batch_size]
        yield X[batch_idx], y[batch_idx]

  from numpy.core.umath_tests import inner1d


In [3]:
models=[]
accuracy_result=[]

# KNN


In [4]:
pipeKNN = Pipeline([
    ('PCA', PCA(n_components=87)),
    ('knn', KNeighborsClassifier(n_neighbors=41, p=1))
])

# Fit to train data
pipeKNN.fit(X_tr, y_tr)

# Evaluate on test set
accuracy = pipeKNN.score(X_te, y_te)*100
print('k-NN Accuracy (Test set): {:.1f} %'.format(accuracy))
accuracy_result.append(accuracy)
model='KNN'
models.append(model)


k-NN Accuracy (Test set): 77.5 %


# Decision Tree

In [5]:
pipeTREE = Pipeline([
   
    ( 'PCA', PCA(n_components=87)),
        ('Dt', DecisionTreeClassifier(max_depth=5))])
    
    
# Fit the tuned decision tree
pipeTREE.fit(X_tr, y_tr)

#  result

accuracy = pipeTREE.score(X_te, y_te)*100
print('Decision Tree Accuracy (Test set): {:.1f} %'.format(accuracy))
accuracy_result.append(accuracy)
model='Decision Tree'
models.append(model)
    


Decision Tree Accuracy (Test set): 65.4 %


# Random Forest

In [6]:
pipeforest = Pipeline([
    ('PCA', PCA(n_components=87)),
    ('rf', RandomForestClassifier(n_estimators=110))
])

# Fit to train data
pipeforest.fit(X_tr, y_tr)

accuracy = pipeforest.score(X_te, y_te)*100
print('Random Forest Accuracy (Test set): {:.1f} %'.format(accuracy))
accuracy_result.append(accuracy)
model='Random Forest'
models.append(model)

Random Forest Accuracy (Test set): 76.1 %


# Linear Kernel SVM

In [7]:
pipeLSVM = Pipeline([
     ('PCA', PCA(n_components=87)),
        ('LSV', LinearSVC(random_state=0, C=0.05)
        )])


# Fit to train data
pipeLSVM.fit(X_tr, y_tr)

accuracy = pipeLSVM.score(X_te, y_te)*100
print('SVM Linear Kernel Accuracy (Test set): {:.1f} %'.format(accuracy))
accuracy_result.append(accuracy)
model='SVM Linear Kernel'
models.append(model)

SVM Linear Kernel Accuracy (Test set): 80.6 %


# RBF Kernel SVM

In [8]:
# I ran the tuned Non-linear Kernel
pipeSVC = Pipeline([
     ('PCA', PCA(n_components=87)),
        ('SVC', SVC(C=1.55,gamma=0.0003)
        )])

# Fit to train data
pipeSVC.fit(X_tr, y_tr)

accuracy = pipeSVC.score(X_te, y_te)*100
print('SVM RBF Kernel Accuracy (Test set): {:.1f} %'.format(accuracy))
accuracy_result.append(accuracy)
model='SVM RBF Kernel'
models.append(model)

SVM RBF Kernel Accuracy (Test set): 81.4 %


# Logistic Regression

In [9]:
pipelogreg = Pipeline([
     ('PCA', PCA(n_components=None)),
        ('logreg', LogisticRegression(C=0.1)
        )])


# Fit to train data
pipelogreg.fit(X_tr, y_tr)

accuracy = pipelogreg.score(X_te, y_te)*100
print('Logistic Regression Accuracy (Test set): {:.1f} %'.format(accuracy))
accuracy_result.append(accuracy)
model='Logistic Regression'
models.append(model)


Logistic Regression Accuracy (Test set): 82.8 %


# Multilayer Neural Network

In [10]:
# I create the netwrok Graph

# Create new graph
graph = tf.Graph()

with graph.as_default():
    # Create placeholders
    X = tf.placeholder(dtype=tf.float32, shape=[None, 4096])
    y = tf.placeholder(dtype=tf.int32, shape=[None])
    
    # Hidden layer with 64 units and RELU activation
    hidden = tf.layers.dense(
        X, 64, activation=tf.nn.relu, # ReLU
        kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),
        bias_initializer=tf.zeros_initializer(),
        name='hidden'
    )
    
    
   
    # Output layer
    logits = tf.layers.dense(
        hidden, 10, activation=None, # No activation function
        kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0),
        bias_initializer=tf.zeros_initializer(),
        name='output'
    )
    
    
    # Get weights of the first hidden layer
    with tf.variable_scope('hidden', reuse=True):
        W1 = tf.get_variable('kernel')
    
    # Dropout
    training = tf.placeholder(dtype=tf.bool)
    flat_output = tf.layers.dropout(hidden, rate=0.5, seed=0, training=training)
    
        
    
    #Get weights of the second tensor
    with tf.variable_scope('output', reuse=True):
        W2 = tf.get_variable('kernel')
    
    
    # Regularization strength
    alpha = tf.placeholder(dtype=tf.float32)

    
    # Loss fuction: mean cross-entropy with regularization on W1 and W2
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))
    regularizer_W1=tf.nn.l2_loss(W1)
    regularizer_W2=tf.nn.l2_loss(W2)
    
    loss=tf.reduce_mean(loss+alpha*regularizer_W1+alpha*regularizer_W2)
    
    
    # Gradient descent
    lr = tf.placeholder(dtype=tf.float32)
    gd = tf.train.GradientDescentOptimizer(learning_rate=lr)

    # Minimize cross-entropy
    train_op = gd.minimize(loss)
    
    # Compute predictions and accuracy
    predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
    is_correct = tf.equal(y, predictions)
    accuracy = tf.reduce_mean(tf.cast(is_correct, dtype=tf.float32))
    
    

# I initialize, train and test the results

# Validation accuracy
valid_acc_values = []

with tf.Session(graph=graph) as sess:
    # Initialize variables
    sess.run(tf.global_variables_initializer())
    
    # Set seed
    np.random.seed(0)
    
    # Train several epochs
    for epoch in range(50):
        # Accuracy values (train) after each batch
        batch_acc = []
        
        # Get batches of data
        for X_batch, y_batch in get_batches(X_tr, y_tr, 50):
            # Run training and evaluate accuracy
            _, acc_value = sess.run([train_op, accuracy], feed_dict={
                X: X_batch,
                y: y_batch,
                lr: 0.005, # Learning rate
                training: True, # Apply dropout
                alpha: 0.01 # Regularization strength

            })
            
            
    # Evaluate test accuracy
    test_acc = sess.run(accuracy, feed_dict={
        X: X_te,
        y: y_te

    })
        
    print('Multilayer fully-connected network accuracy (test set) {:.1f} %'.format(test_acc*100))
    
accuracy_result.append(test_acc*100)
model='Multilayer fully-connected network'
models.append(model)
    


Multilayer fully-connected network accuracy (test set) 81.9 %


# Convolutional Neural Network

In [11]:
# I re-arrange the data

P=(P - 128) / 255

# and I split them in train and test

# Create the test set
X_train, X_test, y_train, y_test = train_test_split(
    # Reshape images: 32 by 32 with 3 (RGB) color channels
    P.reshape(-1, 32, 32, 3),
    cifar4_data['labels'],
    test_size=1800, random_state=0)

# create the validate set from the train
X_valid, X_test, y_valid, y_test = train_test_split(
    X_test, y_test, test_size=1000, random_state=0)

# Batch generator
def get_batches_conv(X, y, batch_size):
    # Shuffle X,y
    shuffled_idx = np.arange(len(y)) # 1,2,...,n
    np.random.shuffle(shuffled_idx)
    
    # Enumerate indexes by steps of batch_size
    # i: 0, b, 2b, 3b, 4b, .. where b is the batch size
    for i in range(0, len(y), batch_size):
        # Batch indexes
        batch_idx = shuffled_idx[i:i+batch_size]
        yield X[batch_idx], y[batch_idx]

# Create new graph
graph = tf.Graph()

with graph.as_default():
    # Placeholders
    X = tf.placeholder(dtype=tf.float32, shape=[None, 32, 32, 3])
    y = tf.placeholder(dtype=tf.int32, shape=[None])
    
    
    # Convolutional layer (64 filters, 5x5, stride: 2)
    conv1 = tf.layers.conv2d(
        X, 64, (5, 5), (2, 2), 'SAME', # "same" padding
        activation=tf.nn.relu, # ReLU
        kernel_initializer=tf.truncated_normal_initializer(stddev=0.01, seed=0),
        name='conv1'
    )
        
    # Maxpool layer (2x2, stride: 2, "same" padding)
    pool1 = tf.layers.max_pooling2d(conv1, (2, 2), (2, 2), 'SAME')
    
    
    # Convolutional layer (64 filters, 3x3, stride: 1)
    conv2 = tf.layers.conv2d(
        pool1, 64, (3, 3), (1, 1), 'SAME', # "same" padding
        activation=tf.nn.relu, # ReLU
        kernel_initializer=tf.truncated_normal_initializer(stddev=0.01, seed=0),
        name='conv2'
    )
    
    
    # Maxpool layer (2x2, stride: 2, "same" padding)
    pool2 = tf.layers.max_pooling2d(conv2, (2, 2), (2, 2), 'SAME')
    
    
    # Flatten output
    flat_output = tf.contrib.layers.flatten(pool2)
    
    
    # Dropout
    training = tf.placeholder(dtype=tf.bool)
    flat_output = tf.layers.dropout(flat_output, rate=0.5, seed=0, training=training)
    
    
    # Fully connected layer
    fc1 = tf.layers.dense(
        flat_output, 256, # 256 hidden units
        activation=tf.nn.relu, # ReLU
        kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),
        bias_initializer=tf.zeros_initializer()
    )
   
    
    # Output layer
    logits = tf.layers.dense(
        fc1, 4, # One output unit per category
        activation=None, # No activation function
        kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0),
        bias_initializer=tf.zeros_initializer()
    )
   
    
    # Kernel of the 1st conv. layer
    with tf.variable_scope('conv1', reuse=True):
        conv_kernels = tf.get_variable('kernel')
    
    # Mean cross-entropy
    mean_ce = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=y, logits=logits))
    
    # Adam optimizer
    lr = tf.placeholder(dtype=tf.float32)
    gd = tf.train.AdamOptimizer(learning_rate=lr)

    # Minimize cross-entropy
    train_op = gd.minimize(mean_ce)
    
    # Compute predictions and accuracy
    predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
    is_correct = tf.equal(y, predictions)
    accuracy = tf.reduce_mean(tf.cast(is_correct, dtype=tf.float32))
    
    
# Validation accuracy
valid_acc_values = []

with tf.Session(graph=graph) as sess:
    # Initialize variables
    sess.run(tf.global_variables_initializer())
    
    # Set seed
    np.random.seed(0)
    
    # Train several epochs
    for epoch in range(50):
        # Accuracy values (train) after each batch
        batch_acc = []
        
        for X_batch, y_batch in get_batches_conv(X_train, y_train, 20):
            # Run training and evaluate accuracy
            _, acc_value = sess.run([train_op, accuracy], feed_dict={
                X: X_batch,
                y: y_batch,
                lr: 0.0001, # Learning rate
                training:True
            })
            
            # Save accuracy (current batch)
            batch_acc.append(acc_value)

        # Evaluate validation accuracy
        valid_acc = sess.run(accuracy, feed_dict={
            X: X_valid,
            y: y_valid,
            training: False
        })
        valid_acc_values.append(valid_acc)
        
      
        
    # Get 1st conv. layer kernels
    kernels = conv_kernels.eval()
    
    # Evaluate test accuracy
    test_acc = sess.run(accuracy, feed_dict={
        X: X_test,
        y: y_test,
            training: False
    })
print('Convolutional Neural Network Test accuracy: {:.1f}'.format(test_acc*100))
    
accuracy_result.append(test_acc*100)
model='Multilayer fully-connected network'
models.append(model) 

  from ._conv import register_converters as _register_converters


Convolutional Neural Network Test accuracy: 65.8


In [12]:
Overview=pd.DataFrame(models, columns=['Classifier'])
test_accuracy=pd.DataFrame(accuracy_result, columns=['Test_Accuracy'])
Overview=Overview.join(test_accuracy, )
Overview.sort_values(by='Test_Accuracy', ascending=False)

Unnamed: 0,Classifier,Test_Accuracy
5,Logistic Regression,82.8
6,Multilayer fully-connected network,81.900001
4,SVM RBF Kernel,81.4
3,SVM Linear Kernel,80.6
0,KNN,77.5
2,Random Forest,76.1
7,Multilayer fully-connected network,65.799999
1,Decision Tree,65.4


### Considering both accuracy and running time I decide use for the classification the SVC RBF Kernel

In [17]:
# I Load the  file for the prediction
with np.load('cifar4-test.npz') as data:
    cifar4_data = dict(data.items())

X_test=cifar4_data['overfeat']


In [40]:
test_predictions=pipeSVC.predict(X_test)

In [41]:
# I check that the prediction is evenly distributed among classes
df_predict=pd.DataFrame(test_predictions,columns=['Class'])
df_predict['count']=1
df_predict.groupby(by='Class').count()

Unnamed: 0_level_0,count
Class,Unnamed: 1_level_1
0,249
1,262
2,229
3,260


In [43]:
from tempfile import TemporaryFile
export = TemporaryFile()

np.save('test_predictions.npy',test_predictions)