In [1]:
import pandas as pd
import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.utils import make_grid

import math
import random

from PIL import Image, ImageOps, ImageEnhance
import numbers

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# batch_size = 64

# train_loader = torch.utils.data.DataLoader(torchvision.datasets.MNIST('/files/', train=True, download=True, transform= transforms.Compose(
#                             [
# #                              RandomRotation(degrees=20), 
# #                              RandomShift(3),
#                              transforms.ToTensor(), 
#                              transforms.Normalize(mean=(0.5,), std=(0.5,))])),
#                              batch_size=batch_size, shuffle=True)
# test_loader = torch.utils.data.DataLoader(torchvision.datasets.MNIST('/files/', train=False, download=True),
#                                            batch_size=1000, shuffle=False)
batch_size_train = 64
batch_size_test = 1000
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=True, download=True,
                             transform=transform),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=False, download=True,
                             transform=transform),
  batch_size=batch_size_test, shuffle=True)

In [3]:
class Net(nn.Module):    
    def __init__(self):
        super(Net, self).__init__()
          
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
          
        self.classifier = nn.Sequential(
            nn.Dropout(p = 0.5),
            nn.Linear(64 * 7 * 7, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(p = 0.5),
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(p = 0.5),
            nn.Linear(512, 10),
        )
          
        for m in self.features.children():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        
        for m in self.classifier.children():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform(m.weight)
            elif isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
                

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        
        return x     


In [4]:
model = Net()

optimizer = optim.Adam(model.parameters(), lr=0.003)

criterion = nn.CrossEntropyLoss()

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()



In [5]:
n_epochs = 1
gamma_exp = []
gamma_exp_sq = []
train_output = []
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

In [6]:
def train(epoch):
    model.train()
    exp_lr_scheduler.step()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        optimizer.zero_grad()
        output = model(data)
#         if batch_idx == 937:      
#             train_output.append(output)
#         print("before:",batch_idx,output[0])
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
#         print(optimizer.state_dict())
#         gamma_exp.append(optimizer.state_dict()['exp_avg'])
#         gamma_exp_sq.append(optimizer.state_dict()['exp_avg_sq'])
#         print(gamma[''])
#         print("after:",output[0])
#         train_output.append(output.data.max(1, keepdim=True))
#         if batch_idx == 0:
#             print(output.data.max(1, keepdim=True)[1].shape)
        train_losses.append(loss.item())
        train_counter.append(
                (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
        if (batch_idx + 1)% 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.item()))
#             train_losses.append(loss.item())
#             train_counter.append(
#                 (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
            torch.save(model.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/model.pth')
            torch.save(optimizer.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/optimizer.pth')

In [7]:
def evaluate(data_loader):
    model.eval()
    loss = 0
    correct = 0
    
    with torch.no_grad():
        for data, target in data_loader:
#             data, target = Variable(data, volatile=True), Variable(target)
            if torch.cuda.is_available():
                data = data.cuda()
                target = target.cuda()
        
            output = model(data)
        
            loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
    loss /= len(data_loader.dataset)
    test_losses.append(loss)    
    print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))

In [8]:
n_epochs = 2

for epoch in range(n_epochs):
    train(epoch)
    evaluate(train_loader)




Average loss: 0.0348, Accuracy: 59341/60000 (98.902%)



In [None]:
def prediciton(data_loader):
    model.eval()
    test_pred = torch.LongTensor()
    
    for data, target in data_loader:
        data = Variable(data, volatile=True)
        if torch.cuda.is_available():
            data = data.cuda()
            
        output = model(data)
        
        pred = output.cpu().data.max(1, keepdim=True)[1]
        test_pred = torch.cat((test_pred, pred), dim=0)
        
    return test_pred

In [None]:
test_pred = prediciton(test_loader)

In [None]:
print(test_pred.shape)
print(torch.FloatTensor(train_losses).shape)
print(torch.FloatTensor(train_counter).shape)
examples = enumerate(train_loader)
batch_idx, (example_data, example_targets) = next(examples)
print(batch_idx)
print(example_targets.shape)
print(example_data.shape)

In [4]:
class HNet(nn.Module):    
    def __init__(self):
        super(HNet, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 128)  # 6*6 from image dimension
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)


    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        
        return x     

In [36]:
n_epochs = 1
gamma_exp = []
gamma_exp_sq = []
train_output = []
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

In [37]:
model = HNet()

optimizer = optim.Adam(model.parameters(), lr=0.003)

criterion = nn.CrossEntropyLoss()

# exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()

In [41]:
def train(epoch):
    model.train()
#     exp_lr_scheduler.step()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        nb_digits = 10
        y_onehot = torch.FloatTensor(64, nb_digits)
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
            y_onehot = y_onehot.cuda()
            
        optimizer.zero_grad()
        output = model(data)
#         target = target.view(-1,1)
#         y_onehot.zero_()
#         y_onehot.scatter_(1, target, 1)
#         print("target is:", target)
#         print("target one hot is:", y_onehot.shape)
        
        pred = output.data.max(1, keepdim=True)
#         print("pred is:", pred[0])
#         print("output is:", output[0])
#         print("data is:", data.shape)
#         print("target is:",target.shape)
#         train_output.append(output)
#         if batch_idx == 937:      
#             train_output.append(output)
#         print("before:",batch_idx,output[0])
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
#         print(optimizer.state_dict())
#         gamma_exp.append(optimizer.state_dict()['exp_avg'])
#         gamma_exp_sq.append(optimizer.state_dict()['exp_avg_sq'])
#         print(gamma[''])
#         print("after:",output[0])
#         train_output.append(output.data.max(1, keepdim=True))
#         if batch_idx == 0:
#             print(output.data.max(1, keepdim=True)[1].shape)
        train_losses.append(loss.item())
        train_counter.append(
                (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
        torch.save(model.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/model.pth')
        torch.save(optimizer.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/optimizer.pth')
        if (batch_idx + 1)% 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.item()))
#             train_losses.append(loss.item())
#             train_counter.append(
#                 (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))

In [39]:
def evaluate(data_loader):
    model.eval()
    loss = 0
    correct = 0
    
    with torch.no_grad():
        for data, target in data_loader:
#             data, target = Variable(data, volatile=True), Variable(target)
            if torch.cuda.is_available():
                data = data.cuda()
                target = target.cuda()
        
            output = model(data)
        
            loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
    loss /= len(data_loader.dataset)
    test_losses.append(loss)    
    print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))

In [42]:
n_epochs = 1

for epoch in range(n_epochs):
    train(epoch)
    evaluate(train_loader)



KeyboardInterrupt: 

In [None]:
def mseresidual(y, F):
    return y - F

In [None]:
Hmodel = HNet()

Hoptimizer = optim.Adam(Hmodel.parameters(), lr=0.003)

Hcriterion =nn.MSELoss()

if torch.cuda.is_available():
    Hmodel = Hmodel.cuda()
    Hcriterion = Hcriterion.cuda()
    
ensemble_size = 3

# if torch.cuda.is_available():
#     Hmodel = Hmodel.cuda()
#     Hcriterion = Hcriterion.cuda()

In [None]:
def GradientBoosting(F, data_loader, M, epoch):
    Hmodel.train()

    F = []
    for m in range(M):
        Hmodel = HNet()
        Hoptimizer = optim.Adam(Hmodel.parameters(), lr=0.003)
        Hcriterion =nn.MSELoss()
        if torch.cuda.is_available():
            Hmodel = Hmodel.cuda()
            Hcriterion = Hcriterion.cuda()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = Variable(data), Variable(target)
            if torch.cuda.is_available():
                data = data.cuda()
                target = target.cuda()
            for i in range(m):
                    model = F[i]
                    output = output + gamma_exp[i] * model(data)
            residual = mseresidual(target, predicted)
            optimizer.zero_grad()
            output = Hmodel(data)
            loss = Hcriterion(output, residual)
            loss.backward()
            Hoptimizer.step()
        F.append(Hmodel)
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = Variable(data), Variable(target)
            if torch.cuda.is_available():
                data = data.cuda()
                target = target.cuda()
            gamma = 1
            predicted = output + gamma * Hmodel(data)
            Goptimizer = optim.Adam(gamma, lr=0.003)
            loss = Hcriterion(target, predicted)
            loss.backward()
            Goptimizer.step()
            gamma_exp.append(gamma)
    return F, gamma_exp

In [None]:
GradientBoosting(pred, train_loader, 3)