In [61]:
import torch
import torch.nn as nn
from torch.optim import SGD

import numpy as np
import matplotlib.pyplot as plt

In [62]:
INPUT_SIZE = 1
HIDDEN_SIZE = 8
NUM_LAYERS = 2
OUTPUT_SIZE = 2

BATCH_SIZE = 10
LEARNING_RATE = 0.1
NUM_EPOCHS = 1000

In [63]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(NeuralNet, self).__init__()
        self.hidden = nn.ModuleList()
        self.input_layer = nn.Linear(input_size, hidden_size)
        for k in range(num_layers-1):
            self.hidden.append(nn.Linear(hidden_size, hidden_size))
        self.output_layer = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        x=self.input_layer(x)
        x=x.relu()
        for layer in self.hidden:
            x=layer(x)
            x=x.relu()
        x=self.output_layer(x)
        return x
    
def ReLU_init(model):
    for param in model.parameters():
        nn.init.normal_(param, 0, 1/np.sqrt(HIDDEN_SIZE))

def ReLU_glorot_init(model):
    for name, param in model.named_parameters():
        
        if name.endswith(".bias"):
            param.data.fill_(0)
        else:
            nn.init.xavier_normal_(param)

In [64]:
NUM_SAMPLES = 500
NUM_TRAIN_SAMPLES = 50

X=np.linspace(0,1,NUM_SAMPLES)

breaks=np.sort(np.random.random(6))

encode_inputs=lambda x: 1 if (x>breaks[0] and x<breaks[1]) or (x>breaks[2] and x<breaks[3]) or (x>breaks[4] and x<breaks[5]) else 0
Y=np.array([encode_inputs(x) for x in X])

X = torch.from_numpy(X).float()
Y = torch.from_numpy(Y).float()

TRAIN_INDICES = np.random.choice(np.arange(NUM_SAMPLES),size=NUM_TRAIN_SAMPLES,replace=False)
X_TRAIN = X[TRAIN_INDICES]
Y_TRAIN = Y[TRAIN_INDICES]

In [65]:
def get_weight(model):
    weights=[]
    for para in model.parameters():
        weights+=torch.flatten(para).tolist()
    return weights

def next_batch(inputs, targets, batchSize):
    for i in range(0, inputs.shape[0], batchSize):
        yield (inputs[i:i + batchSize], targets[i:i + batchSize])

def get_bound(feature_dataset, label_dataset, feature_train, label_train, deltas, lmda, print_loss=False):
    model = NeuralNet(INPUT_SIZE,HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE)
    ReLU_init(model)
            
    opt = SGD(model.parameters(), lr=LEARNING_RATE)
    lossFunc = nn.CrossEntropyLoss()

    num_samples=len(feature_dataset)
    num_train=len(feature_train)

    for epoch in range(NUM_EPOCHS):
        for x,y in next_batch(feature_train,label_train,BATCH_SIZE):
            x=x.reshape((len(x),1))
            outputs = model(x)
            loss = lossFunc(outputs, y.long())
            opt.zero_grad()
            loss.backward()
            opt.step()
        if (epoch+1)%100==0 and print_loss:
            print('Epoch {}: Loss {:.3f}'.format(epoch+1,loss.item()))

    model_outputs=model(feature_dataset.reshape(len(feature_dataset),1)).max(1)[1]
    classified=label_dataset==model_outputs

    p_Delta=(sum(classified)/num_samples).numpy()

    train_error=0
    with torch.no_grad():
        for (x,y) in next_batch(feature_train,label_train,5):
            output=model(x.reshape(len(x),1))
            loss = lossFunc(output, y.long())
            train_error+=loss.item()*len(x)
        train_error=train_error/num_train

    true_error=0
    C=0
    with torch.no_grad():
        for (x,y) in next_batch(feature_dataset,label_dataset,1):
            output=model(x.reshape(len(x),1))
            loss = lossFunc(output, y.long())
            if C<loss.item():
                C=loss.item()
            true_error+=loss.item()*len(x)
        true_error=true_error/num_samples

    w=get_weight(model)
    d=len(w)
    pdf = exp(-np.linalg.norm(w)**2/(2*HIDDEN_SIZE))
    pdf = pdf/(2*np.pi*HIDDEN_SIZE)**(d/2)

    bounds=[]
    for delta in deltas:
        log_component = p_Delta**num_train
        for k in range(1,num_train):
            log_component+=factorial(num_train)*exp(lmda**2*C**2/(8*k))*p_Delta**(num_train-k)*(1-p_Delta)**k/(factorial(num_train-k)*factorial(k))
        log_component=np.log(log_component)


        bounds.append(train_error+(log_component+np.log(1/delta)+np.log(1/pdf))/lmda)
    
    return train_error, true_error, bounds, p_Delta

In [66]:
NUM_TESTS=50
N_DELTA=10
DELTAS=[0.01,0.025,0.05,0.075,0.1,0.125,0.15,0.175,0.2,0.25]
LAMBDA=1.1

train_errors=[]
test_errors=[]
p_Deltas=[]
trial_bounds=[[] for n in range(N_DELTA)]
emp_prob=np.zeros(len(DELTAS))

for k in range(NUM_TESTS):
    tr_er, ts_er, bs, p = get_bound(X,Y,X_TRAIN,Y_TRAIN,DELTAS, LAMBDA)
    train_errors.append(tr_er)
    test_errors.append(ts_er)
    p_Deltas.append(p)
    for n, b in enumerate(bs):
        trial_bounds[n].append(b)
        if ts_er<b:
            emp_prob[n]+=1
    print('Trial {} Training Error {:.3f} Test Error {:.3f}'.format(k+1,tr_er,ts_er))
emp_prob=emp_prob/NUM_TESTS
print(emp_prob)

106
92.51187314263723
Trial 1 Training Error 0.306 Test Error 0.398
106
81.28836703855669
Trial 2 Training Error 0.326 Test Error 0.425
106
140.21094853849243
Trial 3 Training Error 0.169 Test Error 0.358


KeyboardInterrupt: 