In [0]:
import numpy as np
from matplotlib import pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optimizers

from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

## Data preparation

In [2]:
features, labels = load_iris(True)
labels = labels.reshape(-1, 1)
features[0], labels[0]

(array([5.1, 3.5, 1.4, 0.2]), array([0]))

In [0]:
x_train, x_test, y_train, y_test = train_test_split(features, labels)

## Network definition

In [0]:
class IrisClassifier(nn.Module):
    def __init__(self, in_features=4, classes=3):
        super(IrisClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(in_features, 10),
            nn.ReLU(),
            nn.Linear(10, 10),
            nn.ReLU(),
            nn.Linear(10, classes)
        )
    
    def forward(self, x):
        return self.model(x)

## Optimizer implementation

In [0]:
class SimulatedAnnealing:
    def __init__(self, model, annealing_rate=0.999,  annealing_schedule=5):
        self.model = model
        self.annealing_rate = annealing_rate
        self.annealing_schedule = annealing_schedule

    def get_weights(self):
        """Get weights from the NN and concat them in single vector"""
        return torch.cat([ps.flatten() for ps in self.model.parameters()])

    def load_weights(self, new_params):
        """Load weights to the NN from single weights' vector"""
        j = 0
        for i, params in enumerate(self.model.parameters()):
            cur_len = np.prod(params.shape)
            params.data = new_params[j: cur_len + j].reshape(params.shape)
            j += cur_len

    def optimize(self, data, target, criterion, min_temp=1e-2):
        cur_params = self.get_weights()
        cur_loss = criterion(self.model(data), target)
        temp, t = cur_loss, 0

        while temp > min_temp:
            self.load_weights(cur_params)
            cur_loss = criterion(self.model(data), target)
            
            new_params = torch.distributions.Normal(cur_params, 0.2).sample()
            self.load_weights(new_params)
            new_loss = criterion(self.model(data), target)

            ap = self.acceptance_prob(cur_loss, new_loss, temp)
            if ap >= np.random.rand():
                cur_params = new_params
                cur_loss = new_loss

            if t > 0 and t % self.annealing_schedule == 0:
                print(f'Temperature = {temp} | Current loss = {cur_loss}')
                temp *= self.annealing_rate

            t += 1
                
        self.load_weights(cur_params)
    
    def acceptance_prob(self, cur_loss, new_loss, temp):
        if new_loss < cur_loss:
            return 1.0
        else:
            return torch.exp((cur_loss - new_loss) / temp)

## Network training and evaluation

In [0]:
device = torch.device('cpu')
iris_clf = IrisClassifier().double()
iris_clf.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = SimulatedAnnealing(iris_clf, 0.999, 5)

In [0]:
def train_classifier(model, x, y, criterion, optimizer):
    model.train()
    data = torch.from_numpy(x).to(device)
    target = torch.from_numpy(y).to(device).squeeze()
    optimizer.optimize(data, target, criterion)

def test_classifier(model, x, y):
    model.eval()
    data = torch.from_numpy(x).to(device)
    target = torch.from_numpy(y).to(device)
    
    correct = 0
    total = y.shape[0]

    for i, (feat, label) in enumerate(zip(data, target)):
        pred = model(feat).detach().cpu()
        if pred.argmax().item() == label.item():
            correct += 1
    
    print(f'Total test accuracy = {correct / total}')

In [9]:
%%time
train_classifier(iris_clf, x_train, y_train, criterion, optimizer)

Temperature = 1.1363612435139534 | Current loss = 1.4517333853209868
Temperature = 1.1352248822704394 | Current loss = 2.145326322822996
Temperature = 1.134089657388169 | Current loss = 2.57295799523333
Temperature = 1.1329555677307808 | Current loss = 1.4147153150447822
Temperature = 1.13182261216305 | Current loss = 1.8113582205912802
Temperature = 1.1306907895508869 | Current loss = 3.4780409168142365
Temperature = 1.129560098761336 | Current loss = 5.483130804997413
Temperature = 1.1284305386625746 | Current loss = 5.483130804997413
Temperature = 1.127302108123912 | Current loss = 4.553676308074144
Temperature = 1.1261748060157881 | Current loss = 4.553676308074144
Temperature = 1.1250486312097723 | Current loss = 3.538906133919455
Temperature = 1.1239235825785625 | Current loss = 3.538906133919455
Temperature = 1.1227996589959839 | Current loss = 3.538906133919455
Temperature = 1.121676859336988 | Current loss = 3.45975930405922
Temperature = 1.120555182477651 | Current loss = 2.6

In [10]:
test_classifier(iris_clf, x_test, y_test)

Total test accuracy = 0.9736842105263158


## Different annealing rates


##### Annealing rate = 0.75

In [37]:
iris_clf_test = IrisClassifier().double()
iris_clf_test.to(device)
optimizer_test = SimulatedAnnealing(iris_clf_test, 0.75, 5)
train_classifier(iris_clf_test, x_train, y_train, criterion, optimizer_test)

Temperature = 1.1151675367631595 | Current loss = 1.6132167539311337
Temperature = 0.8363756525723696 | Current loss = 0.75381937374323
Temperature = 0.6272817394292771 | Current loss = 1.5701639137229022
Temperature = 0.47046130457195784 | Current loss = 1.7097657083701585
Temperature = 0.35284597842896837 | Current loss = 0.7595965410839726
Temperature = 0.2646344838217263 | Current loss = 0.7595965410839726
Temperature = 0.1984758628662947 | Current loss = 0.5961811357779652
Temperature = 0.14885689714972103 | Current loss = 0.5961811357779652
Temperature = 0.11164267286229076 | Current loss = 0.5961811357779652
Temperature = 0.08373200464671807 | Current loss = 0.5961811357779652
Temperature = 0.06279900348503856 | Current loss = 0.6448753114681178
Temperature = 0.04709925261377892 | Current loss = 0.6767740551985998
Temperature = 0.03532443946033419 | Current loss = 0.370969574480029
Temperature = 0.02649332959525064 | Current loss = 0.370969574480029
Temperature = 0.0198699971964

In [38]:
test_classifier(iris_clf_test, x_test, y_test)

Total test accuracy = 0.6052631578947368


##### Annealing rate = 0.9

In [41]:
iris_clf_test = IrisClassifier().double()
iris_clf_test.to(device)
optimizer_test = SimulatedAnnealing(iris_clf_test, 0.9, 5)
train_classifier(iris_clf_test, x_train, y_train, criterion, optimizer_test)

Temperature = 1.0999394422059479 | Current loss = 2.098992545334748
Temperature = 0.9899454979853531 | Current loss = 2.3582118594548263
Temperature = 0.8909509481868179 | Current loss = 2.1958547971181983
Temperature = 0.8018558533681361 | Current loss = 3.492012626215466
Temperature = 0.7216702680313225 | Current loss = 3.492012626215466
Temperature = 0.6495032412281903 | Current loss = 1.43079835034573
Temperature = 0.5845529171053713 | Current loss = 0.8882392487283911
Temperature = 0.5260976253948342 | Current loss = 0.8882392487283911
Temperature = 0.47348786285535077 | Current loss = 0.8882392487283911
Temperature = 0.4261390765698157 | Current loss = 0.9713960905484856
Temperature = 0.3835251689128341 | Current loss = 0.9713960905484856
Temperature = 0.34517265202155073 | Current loss = 0.9716454877242449
Temperature = 0.31065538681939564 | Current loss = 1.5000560131522536
Temperature = 0.2795898481374561 | Current loss = 1.0543560209733625
Temperature = 0.2516308633237105 | C

In [42]:
test_classifier(iris_clf_test, x_test, y_test)

Total test accuracy = 0.7368421052631579


## Gradient optimization testing

In [0]:
def train_classifier_grad(model, x, y, criterion, optimizer, epochs=1000):
    model.train()
    data = torch.from_numpy(x).to(device)
    target = torch.from_numpy(y).to(device)
    for epoch in range(epochs):
        total_loss = 0
        for i, (feat, label) in enumerate(zip(data, target)):

            pred = model(feat).unsqueeze(0)
            loss = criterion(pred, label)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        if epoch % 50 == 0 or epoch == epochs - 1:
            print(f'Epoch {epoch+1}/{epochs}, Loss = {total_loss}')

In [0]:
iris_clf2 = IrisClassifier().double()
iris_clf2.to(device)
optimizer2 = optimizers.Adam(iris_clf2.model.parameters(), lr=0.001)

In [25]:
%%time
train_classifier_grad(iris_clf2, x_train, y_train, criterion, optimizer2, epochs=125)

Epoch 1/125, Loss = 122.12794614091588
Epoch 51/125, Loss = 10.53629731252227
Epoch 101/125, Loss = 8.935006193433413
Epoch 125/125, Loss = 8.581025592744519
CPU times: user 12.1 s, sys: 624 ms, total: 12.7 s
Wall time: 12.9 s


In [26]:
test_classifier(iris_clf2, x_test, y_test)

Total test accuracy = 1.0
