In [12]:
import numpy as np
import cvxpy as cp
import matplotlib.pyplot as plt
from scipy.spatial import distance
import random
import requests, gzip, os, hashlib
from MyClassifier_13 import MyClassifier
np.random.seed(1)
random.seed(1)

In [9]:
### Utility Functions
def percentage_correct(inferences, labels):
    N_test = inferences.shape[0]
    return 1/N_test * np.ones(N_test)@(labels == inferences)

def make_dataset(N):
        Y = np.concatenate((np.random.multivariate_normal(np.array([-1,1]), np.identity(2), N//2), np.random.multivariate_normal(np.array([1,-1]), np.identity(2), N//2)))
        S = np.concatenate((-np.ones(N//2, dtype=np.int8), np.ones(N//2, dtype=np.int8)))
        return (Y, S)

def fetch(url):
  """
  fetches the data from MNIST website
  """
  fp = os.path.join(path, hashlib.md5(url.encode('utf-8')).hexdigest())
  if os.path.isfile(fp):
      with open(fp, "rb") as f:
          data = f.read()
  else:
      with open(fp, "wb") as f:
          data = requests.get(url).content
          f.write(data)
  return np.frombuffer(gzip.decompress(data), dtype=np.uint8).copy()

In [10]:
### driver code
###############################################################################
use_mnist = False # Set to True to use MNIST, False to use synthetic data (smaller and faster)
###############################################################################
if use_mnist:
    # fetching the MNIST data

    ### change this line of code to put the data where you want it
    path = "./testing"
    os.makedirs(path, exist_ok=True)
    ###

    y_train = fetch("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")[0x10:].reshape((-1, 784))
    s_train = fetch("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz")[8:]
    y_test = fetch("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")[0x10:].reshape((-1, 784))
    s_test = fetch("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz")[8:]

    ###########################################################################
    ### CHANGE THESE NUMBERS TO USE DIFFERENT DIGITS FOR CLASSIFICATION
    num1 = 1
    num2 = 7
    ###########################################################################
    y_train = y_train[(s_train == num1) | (s_train == num2)]
    s_train = s_train[(s_train == num1) | (s_train == num2)].astype(np.int8)
    y_test = y_test[(s_test == num1) | (s_test == num2)]
    s_test = s_test[(s_test == num1) | (s_test == num2)].astype(np.int8)

    # map 1's to s = 1 and 7's to s = -1
    s_train[s_train == num1] = 1
    s_train[s_train == num2] = -1
    s_test[s_test == num1] = 1
    s_test[s_test == num2] = -1

    N_train = s_train.shape[0]
    training_indices = np.random.choice(range(y_train.shape[0]), size=N_train, replace=False)
    y_train = y_train[training_indices]
    s_train = s_train[training_indices]

else:
    # synthetic dataset
    N_train = 12000
    N_test = 2000

    y_train, s_train = make_dataset(N_train)
    y_test, s_test = make_dataset(N_test)

In [11]:
# LP
model = MyClassifier(y_train.shape[1])
y_new, s_new = model.LP(y_train, s_train)
model = MyClassifier(y_new.shape[1])
model.train(y_new, s_new)

inferences = model.test(y_test)
print(percentage_correct(inferences, s_test))

No. of chosen samples = 40
0.9250000000000007


In [5]:
# ILP
model = MyClassifier(y_train.shape[1])
y_new, s_new = model.ILP(y_train,s_train)
model = MyClassifier(y_new.shape[1])
model.train(y_new, s_new)

inferences = model.test(y_test)
print(percentage_correct(inferences, s_test))

No. of chosen samples = 40
0.9250000000000007


In [6]:
# What is we used all points?
model = MyClassifier(y_train.shape[1])
model.train(y_train, s_train)

inferences = model.test(y_test)
print(percentage_correct(inferences, s_test))

0.9235000000000007
