In [1]:
import pandas as pd
import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.utils import make_grid

import math
import random

from PIL import Image, ImageOps, ImageEnhance
import numbers

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
batch_size_train = 64
batch_size_test = 1000
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=True, download=True,
                             transform=transform),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=False, download=True,
                             transform=transform),
  batch_size=batch_size_test, shuffle=True)

In [3]:
class HNet(nn.Module):    
    def __init__(self):
        super(HNet, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 128)  # 6*6 from image dimension
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)


    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
#         x = F.softmax(x, dim = 1)
        return x     

In [4]:
class NHNet(nn.Module):    
    def __init__(self):
        super(NHNet, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 128)  # 6*6 from image dimension
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)


    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.softmax(x, dim = 1)
        return x   

In [5]:
gamma_exp = []
train_output = []
train_losses = []
train_counter = []
test_losses = []
# test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

In [6]:
initial_model = HNet()

optimizer = optim.Adam(initial_model.parameters(), lr=0.003)

criterion = nn.CrossEntropyLoss()

# exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if torch.cuda.is_available():
    initial_model = initial_model.cuda()
    criterion = criterion.cuda()

In [7]:
def train(epoch):
    initial_model.train()
#     exp_lr_scheduler.step()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        optimizer.zero_grad()
        output = initial_model(data)
#         print(output.shape)
#         train_output.append(output)
#         if batch_idx == 937:      
#             train_output.append(output)
#         print("before:",batch_idx,output[0])
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
#         print(optimizer.state_dict())
#         gamma_exp.append(optimizer.state_dict()['exp_avg'])
#         gamma_exp_sq.append(optimizer.state_dict()['exp_avg_sq'])
#         print(gamma[''])
#         print("after:",output[0])
#         train_output.append(output.data.max(1, keepdim=True))
#         if batch_idx == 0:
#             print(output.data.max(1, keepdim=True)[1].shape)
        train_losses.append(loss.item())
        train_counter.append(
                (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
        torch.save(initial_model.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/model.pth')
        torch.save(optimizer.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/optimizer.pth')
        if (batch_idx + 1)% 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.item()))
#             train_losses.append(loss.item())
#             train_counter.append(
#                 (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))

In [8]:
def evaluate(data_loader):
    initial_model.eval()
    loss = 0
    correct = 0
    
    with torch.no_grad():
        for data, target in data_loader:
#             data, target = Variable(data, volatile=True), Variable(target)
            if torch.cuda.is_available():
                data = data.cuda()
                target = target.cuda()
        
            output = initial_model(data)
        
            loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
    loss /= len(data_loader.dataset)
    test_losses.append(loss)    
    print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))

In [9]:
n_epochs = 65

for epoch in range(n_epochs):
    train(epoch)
    evaluate(train_loader)



KeyboardInterrupt: 

In [None]:
torch.save(initial_model, 'C:/Users/cozyn/Desktop/Research/results/initial_model.pth')

In [18]:
initial_model = torch.load('C:/Users/cozyn/Desktop/Research/results/initial_model.pth')
initial_model.eval()

HNet(
  (flatten): Flatten()
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)

In [8]:
def mseresidual(y, F):
    return y - F

In [9]:
Hoptimizer = optim.Adam(initial_model.parameters(), lr=0.003)

Hcriterion = nn.MSELoss()

# exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if torch.cuda.is_available():
    Hcriterion = Hcriterion.cuda()

In [10]:
evaluate(train_loader)

NameError: name 'evaluate' is not defined

In [11]:
def Htrain(Hmodel, epoch):
    Hmodel.train()
    for m in range(num_of_models):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = Variable(data), Variable(target)
            nb_digits = 10
            target_onehot = torch.FloatTensor(data.shape[0], nb_digits)
            if torch.cuda.is_available():
                data = data.cuda()
                target = target.cuda()
                target_onehot = target_onehot.cuda()
            Hoptimizer.zero_grad()
            output = initial_model(data)
            for i in range(m):
                model = models[i]
                if torch.cuda.is_available():
                    output = output.cuda()
                    model = model.cuda()
                output = output + gamma_exp[i] * model(data)
            target = target.view(-1,1)
            target_onehot.zero_()
            target_onehot.scatter_(1, target, 1)
            residual = mseresidual(target_onehot, output)
            houtput = Hmodel(data)
            houtput = houtput.type(torch.cuda.FloatTensor)
            residual = residual.type(torch.cuda.FloatTensor)
#             residual_list.append(residual)
    #             print("residual is:", residual)
    #             print("predicted is:", houtput)
            loss = Hcriterion(houtput, residual)
            loss.backward(retain_graph=True)
            Hoptimizer.step()
            if (batch_idx + 1)% 100 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                    100. * (batch_idx + 1) / len(train_loader), loss.item()))

In [12]:
def GradientBoosting(initial_model, M):
    gamma_exp = torch.ones([M], dtype = torch.float64)
    models = []
    residual_list = []
    for m in range(M):
        # Create new model for training residuals
        Hmodel = NHNet()
        if torch.cuda.is_available():
            Hmodel = Hmodel.cuda()
            gamma_exp = gamma_exp.cuda()
#         Htrain(Hmodel, 100)
#         for i in range(5):
#             print(i)
        Hmodel.train()
        epoch = 2
        for i in range(epoch):
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = Variable(data), Variable(target)
                nb_digits = 10
                target_onehot = torch.FloatTensor(data.shape[0], nb_digits)
                if torch.cuda.is_available():
                    data = data.cuda()
                    target = target.cuda()
                    target_onehot = target_onehot.cuda()
                Hoptimizer.zero_grad()
                output = initial_model(data)
                for i in range(m):
                    model = models[i]
                    if torch.cuda.is_available():
                        output = output.cuda()
                        model = model.cuda()
                    output = output + gamma_exp[i] * model(data)
                target = target.view(-1,1)
                target_onehot.zero_()
                target_onehot.scatter_(1, target, 1)
                residual = mseresidual(target_onehot, output)
                houtput = Hmodel(data)
                houtput = houtput.type(torch.cuda.FloatTensor)
                residual = residual.type(torch.cuda.FloatTensor)
                residual_list.append(residual)
    #             print("residual is:", residual)
    #             print("predicted is:", houtput)
                loss = Hcriterion(houtput, residual)
                loss.backward(retain_graph=True)
                Hoptimizer.step()
                if (batch_idx + 1)% 100 == 0:
                    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                        100. * (batch_idx + 1) / len(train_loader), loss.item()))
#             print('batch_idx', batch_idx)
#             train_losses.append(loss.item())
#             torch.save(model.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/model.pth')
#             torch.save(optimizer.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/optimizer.pth')
        models.append(Hmodel)
#         print("Hmodel is:", Hmodel)
#         print("Appended model is:", models[1])
#         print("Length of models:", len(models))
        
        
#         loss = 0
#         correct = 0
    
#         with torch.no_grad():
#             for batch_idx, (data, target) in enumerate(train_loader):
# #             data, target = Variable(data, volatile=True), Variable(target)
#                 residual = residual_list[batch_idx]
#                 if torch.cuda.is_available():
#                     data = data.cuda()
#                     residual = residual.cuda()
#                 residual = residual.type(torch.cuda.LongTensor)
#                 output = Hmodel(data)
#                 loss += F.cross_entropy(output, residual, reduction='sum').item()
#                 pred = output.data.max(1, keepdim=True)[1]
#                 correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
#         loss /= len(residual_list)
#     # test_losses.append(loss)    
#         print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
#             loss, correct, len(residual_list),
#             100. * correct / len(residual_list)))
        
        
        
        
        gamma = torch.rand(1, requires_grad=True, device="cuda")
#         gamma[0] = 0.1
        print("Initialized gamma:", gamma)
#         Variable(gamma)
        Goptimizer = optim.Adam([gamma], lr=0.003)
        for i in range(10):
            print(i)
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = Variable(data), Variable(target)
                nb_digits = 10
                target_onehot = torch.FloatTensor(data.shape[0], nb_digits)
                if torch.cuda.is_available():
                    data = data.cuda()
                    target = target.cuda()
                    target_onehot = target_onehot.cuda()
                    Hmodel = Hmodel.cuda()
                    gamma = gamma.cuda()
                Goptimizer.zero_grad()  
                output = initial_model(data)
                for i in range(m):
                    model = models[i]
                    if torch.cuda.is_available():
                        model = model.cuda()
                        output = output.cuda()
                        gamma_temp = gamma_exp[i]
                        gamma_temp = gamma_temp.cuda()
                    output = output + gamma_temp * model(data)

                target = target.view(-1,1)
                target_onehot.zero_()
                target_onehot.scatter_(1, target, 1)
                temp = Hmodel(data)
    #             print('output is:', output)
    #             print('gamma is:', gamma.shape)
    #             print('Hmodel(data) is:', temp)
                predicted = output + gamma * temp
    #             print("predicted is:", predicted)
    #             predicted.double()
    #             target_onehot.double()
                loss = Hcriterion(predicted, target_onehot)
    #             print("target_onehot is:", target_onehot)
    #             train_losses.append(loss.item())
    #             torch.save(model.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/model.pth')
    #             torch.save(optimizer.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/optimizer.pth')
    #             print("loss is:", loss)
                loss.backward(retain_graph=True)
#                 print("gamma is before:", gamma)
    #             print("gamma's gradient is:", gamma.retain_grad())
                Goptimizer.step()
#                 print("gamma is after:", gamma)
        gamma_exp[m] = gamma
        print(gamma_exp)
    return models, gamma_exp

In [13]:
num_of_models = 6
models, gamma_exp = GradientBoosting(initial_model, num_of_models)

Initialized gamma: tensor([0.3014], device='cuda:0', requires_grad=True)
0
1
2
3
4
5
6
7
8
9
tensor([1.0943, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000], device='cuda:0',
       dtype=torch.float64, grad_fn=<CopySlices>)
Initialized gamma: tensor([0.7089], device='cuda:0', requires_grad=True)
0
1
2
3
4
5
6
7
8
9
tensor([1.0943, 1.0067, 1.0000, 1.0000, 1.0000, 1.0000], device='cuda:0',
       dtype=torch.float64, grad_fn=<CopySlices>)
Initialized gamma: tensor([0.0156], device='cuda:0', requires_grad=True)
0
1
2
3
4
5
6
7
8
9
tensor([1.0943, 1.0067, 0.9911, 1.0000, 1.0000, 1.0000], device='cuda:0',
       dtype=torch.float64, grad_fn=<CopySlices>)
Initialized gamma: tensor([0.5003], device='cuda:0', requires_grad=True)
0
1
2
3
4
5
6
7
8
9
tensor([1.0943, 1.0067, 0.9911, 0.9985, 1.0000, 1.0000], device='cuda:0',
       dtype=torch.float64, grad_fn=<CopySlices>)
Initialized gamma: tensor([0.1176], device='cuda:0', requires_grad=True)
0
1
2
3
4
5
6
7
8
9
tensor([1.0943, 1.0067, 0.9911, 0.9985,

In [28]:
loss = 0
correct = 0
    
with torch.no_grad():
    for batch_idx, (data, target) in enumerate(train_loader):
#             data, target = Variable(data, volatile=True), Variable(target)
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        output = initial_model(data)
#         if batch_idx == 937:
#             print("Output before is:",output)
        for i in range(num_of_models):
            model = models[i]
            if torch.cuda.is_available():
                model = model.cuda()
                output = output.cuda()
                gamma_temp = gamma_exp[i]
                gamma_temp = gamma_temp.cuda()
            output = output + gamma_temp * model(data) * 20
#             if batch_idx == 937:
#                 print("Hmodel output is:", model(data))
#                 print("Output after is:",output)
        loss += F.cross_entropy(output, target, reduction='sum').item()
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
loss /= len(train_loader.dataset)
# test_losses.append(loss)    
print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
    loss, correct, len(train_loader.dataset),
    100. * correct / len(train_loader.dataset)))


Average loss: 0.0325, Accuracy: 59504/60000 (99.173%)



In [16]:
model1 = models[0]
torch.save(initial_model, 'C:/Users/cozyn/Desktop/Research/results/model1.pth')
model2 = models[1]
torch.save(initial_model, 'C:/Users/cozyn/Desktop/Research/results/model2.pth')
model3 = models[2]
torch.save(initial_model, 'C:/Users/cozyn/Desktop/Research/results/model3.pth')
model4 = models[3]
torch.save(initial_model, 'C:/Users/cozyn/Desktop/Research/results/model4.pth')
model5 = models[4]
torch.save(initial_model, 'C:/Users/cozyn/Desktop/Research/results/model5.pth')
model6 = models[5]
torch.save(initial_model, 'C:/Users/cozyn/Desktop/Research/results/model6.pth')