In [22]:
#1 Python Configuration and Data Loading
import sys

if sys.version_info[0] < 3:
    raise Exception("Python 3 not detected.")
    
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from scipy import io
# import pandas as pd

for data_name in ["mnist", "spam", "cifar10"]:
    data = io.loadmat("data/%s_data.mat" % data_name)
    print("\nloaded %s data!" % data_name)
    fields = "test_data", "training_data", "training_labels"
    for field in fields:
        print(field, data[field].shape)


loaded mnist data!
test_data (10000, 784)
training_data (60000, 784)
training_labels (60000, 1)

loaded spam data!
test_data (5857, 32)
training_data (5172, 32)
training_labels (5172, 1)

loaded cifar10 data!
test_data (10000, 3072)
training_data (50000, 3072)
training_labels (50000, 1)


In [40]:
#2 Data Partitioning
np.random.seed(1)

def shuffle_train_val_split(name, val_amt=0, percent=0):
    data = io.loadmat("data/%s_data.mat" % name)
    num_ex = data["training_data"].shape[0]
    shuffle = np.random.permutation(num_ex) 
    #shuffle before split
    data_xtrain, data_ytrain = data["training_data"][shuffle], data["training_labels"][shuffle]
    #split
    if not val_amt:
        #spam
        val_amt = int(percent * num_ex)
    data_xval, data_yval = data_xtrain[:val_amt,:], data_ytrain[:val_amt,:]
    data_xtrain, data_ytrain = data_xtrain[val_amt:,:], data_ytrain[val_amt:,:]
    return data_xtrain, data_ytrain, data_xval, data_yval

In [41]:
#2 Data Partitioning
mnist_xtrain, mnist_ytrain, mnist_xval, mnist_yval = shuffle_train_val_split("mnist", 10000)
spam_xtrain, spam_ytrain, spam_xval, spam_yval = shuffle_train_val_split("spam", percent=0.2)
cifar10_xtrain, cifar10_ytrain, cifar10_xval, cifar10_yval = shuffle_train_val_split("cifar10", 5000)

60000
(60000, 784) (60000, 1)
(50000, 784) (50000, 1) (10000, 784) (10000, 1)
5172
(5172, 32) (5172, 1)
(4138, 32) (4138, 1) (1034, 32) (1034, 1)
50000
(50000, 3072) (50000, 1)
(45000, 3072) (45000, 1) (5000, 3072) (5000, 1)


In [None]:
#3 Support Vector Machines: Coding