In [2]:
import torch

import numpy as np
import pandas as pd

# Tensors

In [2]:
torch.empty(1)

tensor([0.])

In [3]:
torch.empty(2, 3, 2)

tensor([[[-7.9347e-03,  1.8315e-42],
         [ 0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00]],

        [[ 0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00]]])

In [5]:
torch.rand(2, 2)

tensor([[0.4116, 0.2256],
        [0.0662, 0.7380]])

In [6]:
torch.zeros(2, 2) # Also torch.ones

tensor([[0., 0.],
        [0., 0.]])

In [7]:
x = torch.tensor([2.5, 0.1])
y = torch.tensor([2, 3])
z = x + y
z

tensor([4.5000, 3.1000])

In [8]:
x = torch.rand(5, 3)
x

tensor([[0.1456, 0.5775, 0.3520],
        [0.0699, 0.4624, 0.3684],
        [0.1273, 0.9187, 0.4958],
        [0.5938, 0.3482, 0.4351],
        [0.3846, 0.3223, 0.0436]])

In [9]:
x[:, 1]

tensor([0.5775, 0.4624, 0.9187, 0.3482, 0.3223])

In [10]:
y = x.view(-1)
y

tensor([0.1456, 0.5775, 0.3520, 0.0699, 0.4624, 0.3684, 0.1273, 0.9187, 0.4958,
        0.5938, 0.3482, 0.4351, 0.3846, 0.3223, 0.0436])

In [10]:
y = x.view(3, 5)
y

tensor([[0.4272, 0.0073, 0.4219, 0.7843, 0.3332],
        [0.0859, 0.8629, 0.5066, 0.8344, 0.8108],
        [0.5462, 0.3082, 0.1835, 0.3551, 0.9963]])

In [11]:
torch.from_numpy(np.random.rand(2, 2))

tensor([[0.7478, 0.1118],
        [0.6823, 0.1719]], dtype=torch.float64)

# GPU

In [12]:
torch.cuda.is_available()

True

In [13]:
z = None
if torch.cuda.is_available():

    device = torch.device("cuda")
    x = torch.ones(5, device=device)

    y = torch.ones(5)
    y = y.to(device)

    z = x + y

z

tensor([2., 2., 2., 2., 2.], device='cuda:0')

In [14]:
z = z.to("cpu")
z

tensor([2., 2., 2., 2., 2.])

# Gradient calculation

In [16]:
x = torch.tensor(np.random.randn(10), requires_grad=True)
x

tensor([-1.2357, -2.0963,  0.5606, -0.2135, -0.9652,  0.9018, -0.7355,  0.8865,
         0.7695, -0.2547], dtype=torch.float64, requires_grad=True)

In [17]:
y = x * 2
y

tensor([-2.4714, -4.1927,  1.1212, -0.4270, -1.9305,  1.8036, -1.4711,  1.7730,
         1.5389, -0.5095], dtype=torch.float64, grad_fn=<MulBackward0>)

In [18]:
z = y.mean()
z.backward() # dz/dx
x.grad

tensor([0.2000, 0.2000, 0.2000, 0.2000, 0.2000, 0.2000, 0.2000, 0.2000, 0.2000,
        0.2000], dtype=torch.float64)

# Gradient descent training using numpy

$f = w \cdot x$

In [67]:
# Data for f = 2 * x
X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

Loss: $MSE = \frac{(\vec{y}_{pred} - \vec{y})^2}{N}$

In [68]:
w = 0.0

# model prediction
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_predicted):
    return ((y_predicted - y) ** 2).mean()

Gradient:  
$MSE = \frac{1}{N} \cdot (w \cdot x - y)^2$  
$\frac{dL}{dw} = \frac{1}{N} \cdot 2x \cdot (w \cdot x - y) = \frac{1}{N} \cdot 2x \cdot (y_{pred} - y)$

In [69]:
def gradient(x, y, y_predicted):
    # Element-wise (wrong I guess): return (2 * x * (y_predicted - y)).mean()
    return np.dot(2 * x, y_predicted - y) / len(x) # Dot product

In [70]:
for i in range(10):
    print(f'Prediction before training: f({i}) = {forward(i):.3f}')

Prediction before training: f(0) = 0.000
Prediction before training: f(1) = 0.000
Prediction before training: f(2) = 0.000
Prediction before training: f(3) = 0.000
Prediction before training: f(4) = 0.000
Prediction before training: f(5) = 0.000
Prediction before training: f(6) = 0.000
Prediction before training: f(7) = 0.000
Prediction before training: f(8) = 0.000
Prediction before training: f(9) = 0.000


In [73]:
# Training

# Hyperparameters
learning_rate = 0.02
n_iters = 10

# Training loop
for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    # loss
    l = loss(Y, y_pred)
    # gradients
    dw = gradient(X, Y, y_pred)
    # update weights
    w -= learning_rate * dw

    if epoch % 1 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {l:.6f}')

epoch 1: w = 1.724, loss = 1.162786
epoch 2: w = 1.807, loss = 0.569765
epoch 3: w = 1.865, loss = 0.279185
epoch 4: w = 1.905, loss = 0.136801
epoch 5: w = 1.934, loss = 0.067032
epoch 6: w = 1.954, loss = 0.032846
epoch 7: w = 1.968, loss = 0.016094
epoch 8: w = 1.977, loss = 0.007886
epoch 9: w = 1.984, loss = 0.003864
epoch 10: w = 1.989, loss = 0.001893


In [74]:
for i in range(10):
    print(f'Prediction after training: f({i}) = {forward(i):.3f}')

Prediction after training: f(0) = 0.000
Prediction after training: f(1) = 1.989
Prediction after training: f(2) = 3.978
Prediction after training: f(3) = 5.967
Prediction after training: f(4) = 7.956
Prediction after training: f(5) = 9.944
Prediction after training: f(6) = 11.933
Prediction after training: f(7) = 13.922
Prediction after training: f(8) = 15.911
Prediction after training: f(9) = 17.900


# Gradient descent training using PyTorch

In [86]:
# Data for f = 2 * x
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True) # Specify that this required gradient computation

# model prediction
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_predicted):
    return ((y_predicted - y) ** 2).mean()

In [87]:
for i in range(10):
    print(f'Prediction before training: f({i}) = {forward(i):.3f}')

# Training

# Hyperparameters
learning_rate = 0.02
n_iters = 50

# Training loop
for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    # loss
    l = loss(Y, y_pred)
    
    # backward pass
    l.backward() # dL/dw

    # update weights
    # should not be part of the computational graph
    with torch.no_grad():
        w -= learning_rate * w.grad
    
    # zero gradients
    w.grad.zero_()

    if epoch % 10 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {l:.6f}')

for i in range(10):
    print(f'Prediction after training: f({i}) = {forward(i):.3f}')

Prediction before training: f(0) = 0.000
Prediction before training: f(1) = 0.000
Prediction before training: f(2) = 0.000
Prediction before training: f(3) = 0.000
Prediction before training: f(4) = 0.000
Prediction before training: f(5) = 0.000
Prediction before training: f(6) = 0.000
Prediction before training: f(7) = 0.000
Prediction before training: f(8) = 0.000
Prediction before training: f(9) = 0.000
epoch 1: w = 0.600, loss = 30.000000
epoch 11: w = 1.960, loss = 0.023938
epoch 21: w = 1.999, loss = 0.000019
epoch 31: w = 2.000, loss = 0.000000
epoch 41: w = 2.000, loss = 0.000000
Prediction after training: f(0) = 0.000
Prediction after training: f(1) = 2.000
Prediction after training: f(2) = 4.000
Prediction after training: f(3) = 6.000
Prediction after training: f(4) = 8.000
Prediction after training: f(5) = 10.000
Prediction after training: f(6) = 12.000
Prediction after training: f(7) = 14.000
Prediction after training: f(8) = 16.000
Prediction after training: f(9) = 18.000


# PyTorch Model, Loss and Optimizer

Steps:
1) Design model (input, output size, forward pass)
2) Construct loss and optimizer
3) Training loop:
- forward pass: compute prediction
- backward pass: gradients
- update weights

In [3]:
import torch.nn as nn

In [4]:
# Data for f = 2 * x
# Shape for X: (n, m) where n is the number of samples and m is the number of features
# Shape for Y: (n, 1) where n is the number of samples
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

X_test = torch.tensor([[1], [2], [3], [4], [5], [6], [7]], dtype=torch.float32)

n_samples, n_features = X.shape
print(f"n_samples: {n_samples}, n_features: {n_features}")

input_size = n_features
output_size = 1

# Can be done like this as well for a simple linear regression:
# model = nn.Linear(input_size, output_size)

class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        return self.lin(x)
    
model = LinearRegression(input_size, output_size)

# Use model(...) to predict
for i in range(X_test.shape[0]):
    print(f'Prediction before training: f({X_test[i]}) = {model(X_test[i]).item():.3f}')

learning_rate = 0.02
n_iters = 100

loss = nn.MSELoss()
# model.parameters() instead of manually specifying the parameters / weights
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(n_iters):
    # prediction = forward pass
    # Use model(...) to predict / do forward pass
    y_pred = model(X)

    # loss
    l = loss(Y, y_pred)
    
    # backward pass
    l.backward() # dL/dw

    # update weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()

    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss = {l:.6f}')

for i in range(X_test.shape[0]):
    print(f'Prediction after training: f({X_test[i]}) = {model(X_test[i]).item():.3f}')

n_samples: 4, n_features: 1
Prediction before training: f(tensor([1.])) = -0.292
Prediction before training: f(tensor([2.])) = 0.333
Prediction before training: f(tensor([3.])) = 0.957
Prediction before training: f(tensor([4.])) = 1.582
Prediction before training: f(tensor([5.])) = 2.206
Prediction before training: f(tensor([6.])) = 2.831
Prediction before training: f(tensor([7.])) = 3.455
epoch 1: w = 1.129, loss = 21.332088
epoch 11: w = 2.111, loss = 0.030238
epoch 21: w = 2.120, loss = 0.021250
epoch 31: w = 2.114, loss = 0.018844
epoch 41: w = 2.107, loss = 0.016711
epoch 51: w = 2.101, loss = 0.014819
epoch 61: w = 2.095, loss = 0.013142
epoch 71: w = 2.089, loss = 0.011655
epoch 81: w = 2.084, loss = 0.010336
epoch 91: w = 2.079, loss = 0.009166
Prediction after training: f(tensor([1.])) = 1.854
Prediction after training: f(tensor([2.])) = 3.929
Prediction after training: f(tensor([3.])) = 6.004
Prediction after training: f(tensor([4.])) = 8.080
Prediction after training: f(tens

# Linear Regression and Logistic Regression

See .py files

# Dataset and DataLoader - Batch Training

- Epoch = 1 forward and backward pass of **all** training samples
- batch_size = number of training samples in one forward & backward pass
- number of iterations = number of passes, each pass using batch_size_ number of samples
- e.g. 100 samples, batch_size = 20 --> 100/20 = 5 iterations per 1 epoch
- Training: In one epoch loop through all batches

PyTorch and Torchvision have a lot of built-in datasets and dataloaders  
e.g. torchvision.datasets.MNIST, FashionMNIST, CIFAR10, ImageNet, COCO

In [37]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

# Goal: Predict the type of a Pokemon based on its stats

class PokemonDataset(Dataset):
    # Initialize your data, download, etc.
    def __init__(self):
        # data loading
        xy = pd.read_csv('pokemon.csv')
        xy.drop(columns=["#", "Name", "Type 2", "Legendary", "Generation"], inplace=True)
        xy["Type 1"] = pd.Categorical(xy["Type 1"]).codes # Convert to numerical
        xy.convert_dtypes(infer_objects=True)
        self.x = torch.tensor(xy.drop(columns=["Type 1"]).values, dtype=torch.float32) # Numerical data
        self.y = torch.tensor(xy["Type 1"].values.reshape(-1, 1), dtype=torch.int64) # Categorical data
        self.n_samples = xy.shape[0]

    # return dataset[index]
    def __getitem__(self, index):
        return self.x[index], self.y[index]

    # len(dataset)
    def __len__(self):
        return self.n_samples

In [40]:
dataset = PokemonDataset()

first_data = dataset[0]
features, labels = first_data
print(f"Features: {features}\nLabel: {labels}")

Features: tensor([318.,  45.,  49.,  49.,  65.,  65.,  45.])
Label: tensor([9])


In [43]:
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True) # Also: num_workers

# Convert to an iterator over the batches
dataiter = iter(dataloader)

data = next(dataiter)
features, labels = data
print(f"Features: {features}\nLabels: {labels}")

Features: tensor([[445., 103.,  60.,  86.,  60.,  86.,  50.],
        [340.,  45.,  85.,  70.,  40.,  40.,  60.],
        [494.,  75.,  95., 122.,  58.,  75.,  69.],
        [525.,  60.,  55., 145.,  75., 150.,  40.]])
Labels: tensor([[12],
        [ 1],
        [ 8],
        [15]])


In [48]:
# Training loop
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples / 4) # 4 is the batch size
print(f"total_samples: {total_samples}, n_iterations: {n_iterations}")

for epoch in range(num_epochs):
    # Use enumerate to get the index and the data in the format (index, (inputs, labels))
    for i, (inputs, labels) in enumerate(dataloader):
        # forward, backward, update
        if (i + 1) % 5 == 0:
            print(f'epoch: {epoch + 1}/{num_epochs}, step: {i + 1}/{n_iterations}, inputs: {inputs.shape}')

total_samples: 800, n_iterations: 200
epoch: 1/2, step: 5/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 10/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 15/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 20/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 25/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 30/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 35/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 40/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 45/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 50/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 55/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 60/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 65/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 70/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 75/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 80/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 85/200, inputs: torch.Size([4, 7])
epoch: 1/2, step: 90/200, inputs: torch.Size([4, 7])
epoch: 1/

# Dataset Transforms

Complete list of built-in transforms: https://pytorch.org/vision/stable/transforms.html

On Images: CenterCrop, Grayscale, Pad, RandomAffine, RandomCrop, RandomHorizontalFlip, RandomRotation, Resize, Scale  
On Tensors: LinearTransformation, Normalize, RandomErasion  
Conversion: ToPILImage, ToTensor  
Generic using lambdas or custom  

Composed transforms:  
```py
composed = transforms.compose([
    Rescale(256),
    RandomCrop(224)
])
```

In [20]:
from torch.utils.data import Dataset, DataLoader
import torchvision

In [9]:
class PokemonDataset(Dataset):
    # Initialize your data, download, etc.
    # transform: Optional transform to be applied on a sample.
    def __init__(self, transform=None):
        # data loading
        xy = pd.read_csv('pokemon.csv')
        xy.drop(columns=["#", "Name", "Type 2", "Legendary", "Generation"], inplace=True)
        xy["Type 1"] = pd.Categorical(xy["Type 1"]).codes # Convert to numerical
        xy.convert_dtypes(infer_objects=True)

        # Not tensors
        self.x = xy.drop(columns=["Type 1"]).values # Numerical data
        self.y = xy["Type 1"].values.reshape(-1, 1) # Categorical data
        self.n_samples = xy.shape[0]

        self.transform = transform

    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        
        if self.transform:
            sample = self.transform(sample)

        return sample

    # len(dataset)
    def __len__(self):
        return self.n_samples

In [10]:
class ToTensor:
    # Implement the __call__ method so that objects of the class can be called as a function (Callable)
    def __call__(self, sample):
        inputs, targets = sample # Sample contains features and label from the dataset
        return torch.from_numpy(inputs), torch.from_numpy(targets)

In [17]:
class MulTransform:
    def __init__(self, factor):
        self.factor = factor

    def __call__(self, sample):
        inputs, targets = sample
        inputs *= self.factor
        return inputs, targets

In [16]:
dataset = PokemonDataset(transform=ToTensor())

first_data = dataset[0]
features, labels = first_data
print(f"Features: {features}\nLabel: {labels}")

Features: tensor([318,  45,  49,  49,  65,  65,  45])
Label: tensor([9], dtype=torch.int8)


In [18]:
dataset = PokemonDataset(transform=MulTransform(3))

first_data = dataset[0]
features, labels = first_data
print(f"Features: {features}\nLabel: {labels}")

Features: [954 135 147 147 195 195 135]
Label: [9]


In [21]:
composed_transform = torchvision.transforms.Compose([ToTensor(), MulTransform(2)])

dataset = PokemonDataset(transform=composed_transform)

first_data = dataset[0]
features, labels = first_data
print(f"Features: {features}\nLabel: {labels}")

Features: tensor([636,  90,  98,  98, 130, 130,  90])
Label: tensor([9], dtype=torch.int8)


# Pokemon model

In [135]:
import torch.nn as nn

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [111]:
data = pd.read_csv('pokemon.csv')
data.drop(columns=["#", "Name", "Type 2", "Legendary", "Generation"], inplace=True)
data["Type 1"] = pd.Categorical(data["Type 1"]).codes # Convert to numerical
data.convert_dtypes(infer_objects=True)

data

Unnamed: 0,Type 1,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,9,318,45,49,49,65,65,45
1,9,405,60,62,63,80,80,60
2,9,525,80,82,83,100,100,80
3,9,625,80,100,123,122,120,80
4,6,309,39,52,43,60,50,65
...,...,...,...,...,...,...,...,...
795,15,600,50,100,150,100,150,50
796,15,700,50,160,110,160,110,110
797,14,600,80,110,60,150,130,70
798,14,680,80,160,60,170,130,80


In [112]:
data.describe()

Unnamed: 0,Type 1,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,9.4675,435.1025,69.25875,79.00125,73.8425,72.82,71.9025,68.2775
std,5.580356,119.96304,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474
min,0.0,180.0,1.0,5.0,5.0,10.0,20.0,5.0
25%,5.0,330.0,50.0,55.0,50.0,49.75,50.0,45.0
50%,10.0,450.0,65.0,75.0,70.0,65.0,70.0,65.0
75%,14.0,515.0,80.0,100.0,90.0,95.0,90.0,90.0
max,17.0,780.0,255.0,190.0,230.0,194.0,230.0,180.0


In [113]:
data["Type 1"].value_counts()

Type 1
17    112
12     98
9      70
0      69
14     57
6      52
3      44
15     44
2      32
10     32
8      32
1      31
13     28
16     27
5      27
11     24
4      17
7       4
Name: count, dtype: int64

In [127]:
X, y = data.drop(columns=["Type 1"]), data["Type 1"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Save train and test datasets separately
train = pd.concat([X_train, y_train], axis=1)
test = pd.concat([X_test, y_test], axis=1)

train.to_csv("pokemon_train.csv", index=False)
test.to_csv("pokemon_test.csv", index=False)

In [129]:
class PokemonDataset(Dataset):
    # Initialize your data, download, etc.
    # transform: Optional transform to be applied on a sample.
    def __init__(self, test, transform=None):
        # data loading
        if test:
            xy = pd.read_csv('pokemon_test.csv')
        else:
            xy = pd.read_csv('pokemon_train.csv')
            
        # Not tensors
        self.x = xy.drop(columns=["Type 1"]).values # Numerical data
        self.y = xy["Type 1"].values.reshape(-1, 1) # Categorical data
        self.n_samples = xy.shape[0]

        self.transform = transform

    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        
        if self.transform:
            sample = self.transform(sample)

        return sample

    # len(dataset)
    def __len__(self):
        return self.n_samples

In [130]:
train_dataset = PokemonDataset(test=False, transform=ToTensor())
train_dataloader = DataLoader(dataset=train_dataset, batch_size=10, shuffle=True) # Also: num_workers

print(f"Batches: {len(train_dataloader)}")

Batches: 64


In [139]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, 20)
        self.relu1 = nn.ReLU()
        self.l2 = nn.Linear(20, 20)
        self.relu2 = nn.ReLU()
        self.l3 = nn.Linear(20, 20)
        self.relu3 = nn.ReLU()
        self.l4 = nn.Linear(20, num_classes)

    def forward(self, x):
        out = self.l1(x)
        out = self.relu1(out)
        out = self.l2(out)
        out = self.relu2(out)
        out = self.l3(out)
        out = self.relu3(out)
        out = self.l4(out)
        return out
    
# Hyperparameters
input_size = train_dataset.x.shape[1]
num_classes = len(np.unique(train_dataset.y))
learning_rate = 0.002
num_epochs = 500

model = NeuralNet(input_size, num_classes)

# Loss and optimizer
# Loss: CrossEntropyLoss for classification
# Optimizer: Adam
criterion = nn.CrossEntropyLoss() # Applies softmax
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [140]:
for epoch in range(num_epochs):
    for i, (X, y) in enumerate(train_dataloader):
        # forward, backward, update
        #X = X.view(-1, input_size)

        outputs = model(X.float())
        loss = criterion(outputs, y.squeeze().long())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 5 == 0:
            print(f'epoch: {epoch + 1}/{num_epochs}, step: {i + 1}/{len(train_dataloader)}, loss = {loss.item()}')

epoch: 1/500, step: 5/64, loss = 17.06651496887207
epoch: 1/500, step: 10/64, loss = 10.806642532348633
epoch: 1/500, step: 15/64, loss = 4.593147277832031
epoch: 1/500, step: 20/64, loss = 6.615126609802246
epoch: 1/500, step: 25/64, loss = 3.487967014312744
epoch: 1/500, step: 30/64, loss = 4.78005313873291
epoch: 1/500, step: 35/64, loss = 3.0857672691345215
epoch: 1/500, step: 40/64, loss = 3.761988401412964
epoch: 1/500, step: 45/64, loss = 2.882544755935669
epoch: 1/500, step: 50/64, loss = 3.0650882720947266
epoch: 1/500, step: 55/64, loss = 2.5994412899017334
epoch: 1/500, step: 60/64, loss = 2.560173511505127
epoch: 2/500, step: 5/64, loss = 2.781372308731079
epoch: 2/500, step: 10/64, loss = 2.924121618270874
epoch: 2/500, step: 15/64, loss = 2.5568013191223145
epoch: 2/500, step: 20/64, loss = 2.8431968688964844
epoch: 2/500, step: 25/64, loss = 2.7612600326538086
epoch: 2/500, step: 30/64, loss = 2.3974032402038574
epoch: 2/500, step: 35/64, loss = 2.555790424346924
epoch: 

In [141]:
# Test

test_dataset = PokemonDataset(test=True, transform=ToTensor())

with torch.no_grad():
    y_true = []
    y_pred = []
    for X, y in test_dataset:
        outputs = model(X.float())
        _, predictions = torch.max(outputs.reshape(1, -1), 1)
        y_true.append(y.squeeze().long().item())
        y_pred.append(predictions.item())

    print(accuracy_score(y_true, y_pred))

0.21875


In [148]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

# SVC for comparison

clf = SVC()

X_train, y_train = train_dataset.x, train_dataset.y.squeeze()
X_test, y_test = test_dataset.x, test_dataset.y.squeeze()

# Grid search
param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['rbf']}
grid = GridSearchCV(SVC(), param_grid, refit=True)

grid.fit(X_train, y_train)

print(grid.best_params_)

grid_predictions = grid.predict(X_test)

print(accuracy_score(y_test, grid_predictions))



{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.2375
