# Prepare

In [1]:
!nvidia-smi

Mon Jan  2 16:43:55 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P0    26W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
%cd "/content/drive/MyDrive/CE Project"
!ls

/content/drive/MyDrive/CE Project
 data		 softmax_temperature.ipynb   svm3.ipynb   test.ipynb
 softmax.ipynb	 svm2.ipynb		     SVM.ipynb	 'Use Library.ipynb'


In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os
import time
from sklearn.metrics import accuracy_score, f1_score
from keras.datasets import mnist, cifar10
from sklearn.model_selection import train_test_split
import torch
from sklearn.preprocessing import OneHotEncoder
import random

In [4]:
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
seed_everything(22)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define model

In [6]:
class SoftmaxTemperature:
    def softmax(self, Z):
        e_z = torch.exp(Z/self.T)
        A = e_z / e_z.sum(dim=1, keepdim=True)
        return A

    def loss(self):
        A = self.softmax(torch.mm(self.X, self.W))
        return -torch.mean(torch.sum(self.y * torch.log(A), dim=1))

    def grad(self, X, y):
        A = self.softmax(torch.mm(X, self.W))
        A = A - y
        return torch.mm(X.T, A.to(torch.float64)) / X.shape[0]

    def fit(self, X, y, W, T, lr=0.05, epochs=200, tol=1e-5, batch_size=32):
        self.X = X
        self.y = y
        self.W = torch.clone(W)
        self.T = T

        ep = 0
        # loss_hist = [self.loss().cpu().numpy()]
        N = X.shape[0]
        batches = int(np.ceil(N/batch_size))
        
        while ep < epochs:
            ep += 1
            mix_ids = torch.randperm(N)

            for i in range(batches):
                batch_ids = mix_ids[batch_size*i : min(batch_size*(i+1), N)]
                X_batch, y_batch = X[batch_ids], y[batch_ids]
                self.W -= lr * self.grad(X_batch, y_batch)

            # loss_hist.append(self.loss().cpu().numpy())
            if ep % 10 == 0:
                print('Epoch:', ep, 'loss:', self.loss().cpu().numpy())

    def parameters(self):
        return self.W
    
    def predict(self, X):
        A = self.softmax(torch.mm(X, self.W))
        return torch.argmax(A, dim=1)

# 1. MNIST

In [7]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

print(y_train[0])
le = OneHotEncoder()
y_train = le.fit_transform(y_train.reshape(-1, 1)).toarray()
print(y_train[0])

X_train = X_train.reshape(-1, 784)
X_train = X_train.astype('float32')
X_test = X_test.reshape(-1, 784)
X_test = X_test.astype('float32')

# normalize
X_train /= 255
X_test /= 255
print('Train samples:', X_train.shape)
print('Test samples:', X_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
5
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
Train samples: (60000, 784)
Test samples: (10000, 784)


In [8]:
X_train = torch.from_numpy(X_train).to(device).to(torch.float64)
y_train = torch.from_numpy(y_train).to(device).to(torch.float64)
X_test = torch.from_numpy(X_test).to(device).to(torch.float64)
# y_test = torch.from_numpy(y_test).to(device)

In [9]:
W_init = torch.randn(X_train.shape[1], 10).to(device).to(torch.float64)

### 1.5

In [None]:
lr = 0.05
T = 1.5
start = time.time()
model = SoftmaxTemperature()
model.fit(X_train, y_train, W_init, T)
print('Train completed in {}s'.format(time.time() - start))

Epoch: 10 loss: 0.37848175493278613
Epoch: 20 loss: 0.32794696158467945
Epoch: 30 loss: 0.30456451219732106
Epoch: 40 loss: 0.29169036609171006
Epoch: 50 loss: 0.28226624238043546
Epoch: 60 loss: 0.27579345812201744
Epoch: 70 loss: 0.2697551260602253
Epoch: 80 loss: 0.2672357201224741
Epoch: 90 loss: 0.2626967353229213
Epoch: 100 loss: 0.2591680316199559
Epoch: 110 loss: 0.2567020108772778
Epoch: 120 loss: 0.25542291837891634
Epoch: 130 loss: 0.25286485592049396
Epoch: 140 loss: 0.2523706238914793
Epoch: 150 loss: 0.2505077417606611
Epoch: 160 loss: 0.24922722138139847
Epoch: 170 loss: 0.24756123758710227
Epoch: 180 loss: 0.24615438845509352
Epoch: 190 loss: 0.24650107444036432
Epoch: 200 loss: 0.24581068399239722
Train completed in 83.35889482498169s


In [None]:
y_pred = model.predict(X_test)
accuracy_score(y_pred.cpu().numpy(), y_test)*100

92.33

### 1000

In [None]:
T = 1000
start = time.time()
model = SoftmaxTemperature()
model.fit(X_train, y_train, W_init, T)
print('Train completed in {}s'.format(time.time() - start))

Epoch: 5 loss: 1.8807726835628351
Epoch: 10 loss: 1.5867530303309363
Epoch: 15 loss: 1.3785649268552662
Epoch: 20 loss: 1.2280219563198587
Epoch: 25 loss: 1.1158879554931076
Epoch: 30 loss: 1.0297873952642083
Epoch: 35 loss: 0.9618237614115079
Epoch: 40 loss: 0.9068740078600744
Epoch: 45 loss: 0.8615230748217554
Epoch: 50 loss: 0.8234358729486665
Epoch: 55 loss: 0.7909663897349034
Epoch: 60 loss: 0.762929677682432
Epoch: 65 loss: 0.7384506008479279
Epoch: 70 loss: 0.7168709259980902
Epoch: 75 loss: 0.6976856414338497
Epoch: 80 loss: 0.6805016684888552
Epoch: 85 loss: 0.6650083546788471
Epoch: 90 loss: 0.6509565997947085
Epoch: 95 loss: 0.638144821268553
Epoch: 100 loss: 0.6264077968010873
Epoch: 105 loss: 0.6156086838240018
Epoch: 110 loss: 0.6056333508502265
Epoch: 115 loss: 0.5963859591127141
Epoch: 120 loss: 0.5877849580183082
Epoch: 125 loss: 0.5797609451374001
Epoch: 130 loss: 0.5722542080176898
Epoch: 135 loss: 0.5652131101440319
Epoch: 140 loss: 0.5585929582950688
Epoch: 145 los

In [None]:
y_pred = model.predict(X_test)
accuracy_score(y_pred.cpu().numpy(), y_test)*100

88.29

In [None]:
torch.max(W_init)

tensor(4.2385, device='cuda:0')

In [None]:
print(torch.max(model.parameters()))

tensor(3.9304, device='cuda:0')


### 0.9

In [10]:
T = 0.1
start = time.time()
model = SoftmaxTemperature()
model.fit(X_train, y_train, W_init, T)
print('Train completed in {}s'.format(time.time() - start))

Epoch: 10 loss: 3.262106246261842
Epoch: 20 loss: 2.2935851926595596
Epoch: 30 loss: 1.7986150249415107
Epoch: 40 loss: 1.4804182485506705
Epoch: 50 loss: 1.2569550061905097
Epoch: 60 loss: 1.085007219297241
Epoch: 70 loss: 0.9784138551166808
Epoch: 80 loss: 0.8552157544967748
Epoch: 90 loss: 0.7882183121404759
Epoch: 100 loss: 0.7388347247215129
Epoch: 110 loss: 0.6764695850677641
Epoch: 120 loss: 0.6098997127154752
Epoch: 130 loss: 0.594536805602092
Epoch: 140 loss: 0.5348344938076185
Epoch: 150 loss: 0.5197074743840648
Epoch: 160 loss: 0.47956588421455065
Epoch: 170 loss: 0.46153949624423335
Epoch: 180 loss: 0.4384705470881292
Epoch: 190 loss: 0.4206483546338336
Epoch: 200 loss: 0.4190389568701153
Train completed in 98.81109309196472s


In [11]:
y_pred = model.predict(X_test)
accuracy_score(y_pred.cpu().numpy(), y_test)*100

90.10000000000001

### 0.8

In [12]:
T = 10
start = time.time()
model = SoftmaxTemperature()
model.fit(X_train, y_train, W_init, T)
print('Train completed in {}s'.format(time.time() - start))

Epoch: 10 loss: 0.3578596508363862
Epoch: 20 loss: 0.324621617014032
Epoch: 30 loss: 0.30965875393872055
Epoch: 40 loss: 0.30071683951924943
Epoch: 50 loss: 0.2944632009738895
Epoch: 60 loss: 0.28986471472849906
Epoch: 70 loss: 0.2859568280248341
Epoch: 80 loss: 0.2827491719071499
Epoch: 90 loss: 0.2801655931042327
Epoch: 100 loss: 0.27780893196827033
Epoch: 110 loss: 0.27599322082084193
Epoch: 120 loss: 0.2740960759343406
Epoch: 130 loss: 0.2725371776044312
Epoch: 140 loss: 0.2711222174953553
Epoch: 150 loss: 0.26993676782413056
Epoch: 160 loss: 0.26861200303915306
Epoch: 170 loss: 0.2675846239746869
Epoch: 180 loss: 0.26663574626627684
Epoch: 190 loss: 0.26549206954140503
Epoch: 200 loss: 0.26469761626657967
Train completed in 94.43031072616577s


In [13]:
y_pred = model.predict(X_test)
accuracy_score(y_pred.cpu().numpy(), y_test)*100

92.35