# Machine Learning with PyTorch

## Tasks with Networks

* An image classifier
* A regression prediction
* Clustering with NNs (note: https://github.com/MarcTLaw/DeepSpectralClusteringToy)
* Generative Adversarial Networks (GAN)
* Reinforcement Learning

## Next Lesson

**Intermediate Machine Learning with PyTorch**: This last lesson of the Beginning material covered several real world neural networks.  For a deeper dive into some specialize areas, come back for a future webinar session on Intermidiate material.

In [3]:
from sklearn.model_selection import train_test_split
import numpy as np
import torch
import pandas as pd

df = pd.read_csv('data/garments.csv.gz')

print(len(df))
df.head()

243570


Unnamed: 0,age,bra_size_chest,bra_size_cup,height,shoe_size,weight,TARGET
0,30.0,34.0,5.0,62.0,7.0,128.0,6 SHORT
1,20.0,34.0,3.0,64.0,8.0,145.0,6 SHORT
2,26.0,32.0,4.0,64.0,8.0,148.0,6 SHORT
3,43.0,34.0,5.0,61.0,7.0,145.0,6 SHORT
4,63.0,38.0,3.0,63.0,8.0,130.0,6 SHORT


In [5]:
X = df[['age', 'bra_size_chest', 'bra_size_cup', 'height', 'shoe_size', 'weight']]
df_one_hot = pd.get_dummies(df)
Y = df_one_hot[[col for col in df_one_hot.columns if col.startswith('TARGET')]]
labels = [col.replace('TARGET_', '') for col in Y.columns]

In [None]:
# The number of input features
in_dim = X.shape[1]

# The number of "polynomial features" of order 2
hidden1 = int(in_dim * 2 + (in_dim * (in_dim-1) / 2) + 1)
out_dim = Y.shape[1]

# The sizes of the "inference layers"/
hidden2 = hidden3 = 2 * out_dim   

In [80]:
in_dim, hidden1, hidden2, hidden3, out_dim

(6, 28, 66, 66, 33)

In [82]:
def do_training(model, X_train, Y_train, optimizer, batch_size=1000, show_every=250):
    for t in range(5000):
        # Sample a few training rows
        indices = np.random.randint(0, X_train.shape[0], batch_size)
        X = X_train[indices]
        Y = Y_train[indices]
        
        # Forward pass: compute predicted y by passing x to the model.
        Y_pred = model(X)

        # Compute and print loss.
        loss = loss_fn(Y_pred, Y)
        if not t % show_every:
            print(t, loss)

        # Before the backward pass, use the optimizer object to zero all of the
        # gradients for the variables it will update (which are the learnable
        # weights of the model). This is because by default, gradients are
        # accumulated in buffers( i.e, not overwritten) whenever .backward()
        # is called. Checkout docs of torch.autograd.backward for more details.
        optimizer.zero_grad()

        # Backward pass: compute gradient of the loss with respect to model
        # parameters
        loss.backward()

        # Calling the step function on an Optimizer makes an update to its
        # parameters
        optimizer.step()

In [85]:
# Create a sequential NN
model = torch.nn.Sequential(
    # This layer allows "polynomial features"
    torch.nn.Linear(in_dim, hidden1),
    # The activation is treated as a separate layer
    torch.nn.ReLU(),

    # This layer is "inference"
    torch.nn.Linear(hidden1, hidden2),
    # Often Leaky ReLU eliminates the "dead neuron" danger
    torch.nn.LeakyReLU(), 
    
    # A Dropout layer sometimes reduces co-adaptation of neurons
    torch.nn.Dropout(p=0.1),

    # This layer is "inference"
    torch.nn.Linear(hidden2, hidden3),
    torch.nn.LeakyReLU(), 

    # A sigmoid activation is used for a binary decision
    # Since we use one-hot encoding, we essentially make a 
    torch.nn.Linear(hidden3, out_dim),  
    torch.nn.Sigmoid()
)

In [98]:
# Split the original data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

# Convert arrays to tensors
X_train = torch.from_numpy(X_train.values).float()
X_test  = torch.from_numpy(X_test.values).float()
Y_train = torch.from_numpy(Y_train.values)[:, np.newaxis].float()
Y_test  = torch.from_numpy(Y_test.values)[:, np.newaxis].float()

In [86]:
## Now run model
learning_rate = 1e-5
loss_fn = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
do_training(model, X_train, Y_train, optimizer)

0 tensor(0.3172, grad_fn=<MseLossBackward>)
250 tensor(0.1912, grad_fn=<MseLossBackward>)
500 tensor(0.1092, grad_fn=<MseLossBackward>)
750 tensor(0.0561, grad_fn=<MseLossBackward>)
1000 tensor(0.0397, grad_fn=<MseLossBackward>)
1250 tensor(0.0340, grad_fn=<MseLossBackward>)
1500 tensor(0.0320, grad_fn=<MseLossBackward>)
1750 tensor(0.0306, grad_fn=<MseLossBackward>)
2000 tensor(0.0303, grad_fn=<MseLossBackward>)
2250 tensor(0.0300, grad_fn=<MseLossBackward>)
2500 tensor(0.0298, grad_fn=<MseLossBackward>)
2750 tensor(0.0296, grad_fn=<MseLossBackward>)
3000 tensor(0.0294, grad_fn=<MseLossBackward>)
3250 tensor(0.0292, grad_fn=<MseLossBackward>)
3500 tensor(0.0292, grad_fn=<MseLossBackward>)
3750 tensor(0.0293, grad_fn=<MseLossBackward>)
4000 tensor(0.0293, grad_fn=<MseLossBackward>)
4250 tensor(0.0291, grad_fn=<MseLossBackward>)
4500 tensor(0.0291, grad_fn=<MseLossBackward>)
4750 tensor(0.0289, grad_fn=<MseLossBackward>)


In [23]:
from torchsummary import summary
summary(model, input_size=(1, X_train.shape[1]))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 28]             196
              ReLU-2                [-1, 1, 28]               0
            Linear-3                [-1, 1, 66]           1,914
         LeakyReLU-4                [-1, 1, 66]               0
           Dropout-5                [-1, 1, 66]               0
            Linear-6                [-1, 1, 33]           2,211
         LeakyReLU-7                [-1, 1, 33]               0
            Linear-8                [-1, 1, 33]           1,122
           Sigmoid-9                [-1, 1, 33]               0
Total params: 5,443
Trainable params: 5,443
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.02
Estimated Total Size (MB): 0.02
-----------------------------------------------

In [119]:
y_pred = model(X_test).detach().numpy().argsort(axis=1)[:,:-4:-1]
y_pred

array([[27, 21, 30],
       [27, 30, 24],
       [30,  6, 27],
       ...,
       [21, 24, 30],
       [24, 21, 30],
       [24, 30, 26]])

In [121]:
y = np.argmax(Y_test.detach().numpy(), axis=2).flatten()

In [124]:
n = 0
for true, claims in zip(y, y_pred):
    predictions = ", ".join(labels[n] for n in claims[:3])
    print(labels[true], "->", predictions)
    if n > 100:
        break
    n += 1

00 -> 6, 2, 8
4 -> 6, 8, 4
6 -> 8, 10, 6
6 SHORT -> 8, 4, 2
12 -> 2, 10, 8
6 -> 4, 6, 8
6 -> 2, 4, 8
6 -> 4, 8, 6
0 -> 4, 6, 2
4 -> 4, 8, 6
12 LONG -> 6, 10, 8
8 -> 10, 4, 8
0 -> 6, 4, 2
8 -> 6, 4, 2
6 -> 6, 8, 4
2 -> 4, 6, 2
6 -> 8, 10, 6
4 -> 4, 6, 2
6 -> 2, 4, 8
4 -> 6, 4, 2
4 SHORT -> 4, 8, 2
4 -> 4, 6, 2
0 SHORT -> 4, 6, 2
6 -> 6, 8, 4
4 -> 2, 8, 6
10 -> 2, 4, 8
10 -> 8, 2, 4
2 -> 6, 2, 8
6 -> 2, 4, 8
12 -> 6, 4, 8
6 -> 4, 6, 2
2 LONG -> 2, 8, 4
6 -> 2, 4, 8
0 SHORT -> 4, 2, 6
4 -> 6, 8, 4
0 -> 8, 4, 2
8 -> 2, 8, 4
8 -> 4, 2, 8
0 SHORT -> 4, 8, 6
6 -> 2, 4, 8
8 -> 6, 4, 2
18 -> 8, 6, 10
6 SHORT -> 4, 8, 2
4 -> 4, 6, 8
0 -> 8, 4, 10
4 -> 6, 4, 2
4 -> 2, 4, 10
10 -> 8, 4, 2
6 -> 2, 8, 4
4 SHORT -> 6, 8, 4
8 -> 2, 4, 8
4 -> 2, 4, 8
4 SHORT -> 6, 4, 8
8 SHORT -> 4, 8, 10
4 -> 4, 8, 2 SHORT
10 -> 8, 2, 6
8 -> 8, 2, 10
2 -> 4, 6, 8
12 -> 2, 10, 8
6 -> 6, 2, 8
12 -> 10, 2, 8
4 -> 4, 8, 6
2 -> 8, 6, 12
00 SHORT -> 8, 4, 2
8 SHORT -> 2, 4, 8
6 -> 4, 8, 2
2 SHORT -> 0, 4, 6
2 -> 4, 2, 8
10 