# Machine Learning with PyTorch

## Tasks with Networks

* An image classifier
* A regression prediction
* Clustering with NNs (note: https://github.com/MarcTLaw/DeepSpectralClusteringToy)
* Generative Adversarial Networks (GAN)
* Reinforcement Learning

## Next Lesson

**Intermediate Machine Learning with PyTorch**: This last lesson of the Beginning material covered several real world neural networks.  For a deeper dive into some specialize areas, come back for a future webinar session on Intermidiate material.

In [3]:
from sklearn.model_selection import train_test_split
import numpy as np
import torch
import pandas as pd

df = pd.read_csv('data/garments.csv.gz')

print(len(df))
df.head()

243570


Unnamed: 0,age,bra_size_chest,bra_size_cup,height,shoe_size,weight,TARGET
0,30.0,34.0,5.0,62.0,7.0,128.0,6 SHORT
1,20.0,34.0,3.0,64.0,8.0,145.0,6 SHORT
2,26.0,32.0,4.0,64.0,8.0,148.0,6 SHORT
3,43.0,34.0,5.0,61.0,7.0,145.0,6 SHORT
4,63.0,38.0,3.0,63.0,8.0,130.0,6 SHORT


In [5]:
X = df[['age', 'bra_size_chest', 'bra_size_cup', 'height', 'shoe_size', 'weight']]
df_one_hot = pd.get_dummies(df)
Y = df_one_hot[[col for col in df_one_hot.columns if col.startswith('TARGET')]]
labels = [col.replace('TARGET_', '') for col in Y.columns]

In [6]:
learning_rate = 1e-4

# The number of input features
in_dim = X.shape[1]

# The number of "polynomial features" of order 2
hidden1 = int(in_dim * 2 + (in_dim * (in_dim-1) / 2) + 1)
out_dim = Y.shape[1]

# The sizes of the "inference layers"
hidden2 = 2 * out_dim   
hidden3 = out_dim

In [7]:
in_dim, out_dim, hidden1, hidden2, hidden3

(6, 33, 28, 66, 33)

In [15]:
# Create a sequential NN
model = torch.nn.Sequential(
    # This layer allows "polynomial features"
    torch.nn.Linear(in_dim, hidden1),
    # The activation is treated as a separate layer
    torch.nn.ReLU(),

    # This layer is "inference"
    torch.nn.Linear(hidden1, hidden2),
    # Often Leaky ReLU eliminates the "dead neuron" danger
    torch.nn.LeakyReLU(), 
    
    # A Dropout layer sometimes reduces co-adaptation of neurons
    torch.nn.Dropout(p=0.25),

    # This layer is "inference"
    torch.nn.Linear(hidden2, hidden3),
    torch.nn.LeakyReLU(), 

    # A sigmoid activation is used for a binary decision
    # Since we use one-hot encoding, we essentially make a 
    torch.nn.Linear(hidden3, out_dim),  
    torch.nn.Sigmoid()
)

In [18]:
def do_training(model, X_train, Y_train, optimizer, batch_size=1000, show_every=250):
    for t in range(5000):
        # Sample a few training rows
        indices = np.random.randint(0, X_train.shape[0], batch_size)
        X = X_train[indices]
        Y = Y_train[indices]
        
        # Forward pass: compute predicted y by passing x to the model.
        Y_pred = model(X)

        # Compute and print loss.
        loss = loss_fn(Y_pred, Y)
        if not t % show_every:
            print(t, loss)

        # Before the backward pass, use the optimizer object to zero all of the
        # gradients for the variables it will update (which are the learnable
        # weights of the model). This is because by default, gradients are
        # accumulated in buffers( i.e, not overwritten) whenever .backward()
        # is called. Checkout docs of torch.autograd.backward for more details.
        optimizer.zero_grad()

        # Backward pass: compute gradient of the loss with respect to model
        # parameters
        loss.backward()

        # Calling the step function on an Optimizer makes an update to its
        # parameters
        optimizer.step()

In [19]:
# Split the original data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

# Convert arrays to tensors
X_train = torch.from_numpy(X_train.values).float()
Y_train = torch.from_numpy(Y_train.values[:, np.newaxis]).float()
X_test = torch.from_numpy(X_test.values).float()
Y_test = torch.from_numpy(Y_test.values[:, np.newaxis]).float()

## Now run model
loss_fn = torch.nn.MSELoss(reduction='elementwise_mean')
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
do_training(model, X_train, Y_train, optimizer)

0 tensor(18260904., grad_fn=<MseLossBackward>)
100 tensor(6267635., grad_fn=<MseLossBackward>)
200 tensor(1705114.3750, grad_fn=<MseLossBackward>)
300 tensor(1091557., grad_fn=<MseLossBackward>)
400 tensor(1014259.4375, grad_fn=<MseLossBackward>)
500 tensor(1003724.3125, grad_fn=<MseLossBackward>)
600 tensor(987928.3750, grad_fn=<MseLossBackward>)
700 tensor(985802.6875, grad_fn=<MseLossBackward>)
800 tensor(982526.5000, grad_fn=<MseLossBackward>)
900 tensor(980437.9375, grad_fn=<MseLossBackward>)
1000 tensor(976926.2500, grad_fn=<MseLossBackward>)
1100 tensor(972897.8125, grad_fn=<MseLossBackward>)
1200 tensor(969080.6250, grad_fn=<MseLossBackward>)
1300 tensor(975465.1250, grad_fn=<MseLossBackward>)
1400 tensor(969783.0625, grad_fn=<MseLossBackward>)
1500 tensor(969482., grad_fn=<MseLossBackward>)
1600 tensor(968283.1250, grad_fn=<MseLossBackward>)
1700 tensor(963359.5625, grad_fn=<MseLossBackward>)
1800 tensor(965612.5000, grad_fn=<MseLossBackward>)
1900 tensor(955097.3125, grad_fn=

In [23]:
from torchsummary import summary
summary(model, input_size=(1, X_train.shape[1]))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 28]             196
              ReLU-2                [-1, 1, 28]               0
            Linear-3                [-1, 1, 66]           1,914
         LeakyReLU-4                [-1, 1, 66]               0
           Dropout-5                [-1, 1, 66]               0
            Linear-6                [-1, 1, 33]           2,211
         LeakyReLU-7                [-1, 1, 33]               0
            Linear-8                [-1, 1, 33]           1,122
           Sigmoid-9                [-1, 1, 33]               0
Total params: 5,443
Trainable params: 5,443
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.02
Estimated Total Size (MB): 0.02
-----------------------------------------------

In [55]:
y_pred = np.argmax(model(X_train).detach().numpy(), axis=1)

In [66]:
y = np.argmax(Y_train.detach().numpy(), axis=2).flatten()

In [70]:
n = 0
for true, claim in zip(y, y_pred):
    print(labels[true], "/", labels[claim])
    if n > 100:
        break
    n += 1


6 LONG / 6
4 / 6
6 SHORT / 4
2 / 6
10 / 6
14 / 6
14 / 6
10 / 4
0 / 4
2 / 4
2 / 6
8 / 4
0 / 6
12 / 6
8 SHORT / 6
8 SHORT / 6
12 / 4
4 / 6
6 / 6
4 / 4
8 / 4
6 / 4
4 / 6
12 / 6
4 / 6
6 / 6
2 LONG / 4
10 / 6
8 LONG / 6
14 / 4
00 SHORT / 4
4 / 6
8 / 6
10 / 6
00 / 6
6 / 6
2 / 6
6 / 4
6 / 6
2 / 6
4 / 6
4 SHORT / 4
12 / 6
8 SHORT / 4
4 / 6
0 SHORT / 4
0 / 6
6 / 6
4 / 6
16 LONG / 4
14 SHORT / 4
4 / 6
6 / 8
6 LONG / 4
8 / 4
4 / 6
2 / 4
4 / 6
12 LONG / 6
0 / 8
8 / 6
4 SHORT / 6
00 SHORT / 6
4 / 6
10 / 4
6 / 4
4 / 6
8 / 2
12 / 4
6 / 6
0 / 6
8 / 6
16 LONG / 4
2 / 6
14 SHORT / 4
2 / 6
6 / 6
4 / 6
2 / 4
2 SHORT / 6
8 / 6
6 / 4
2 / 4
4 / 4
4 / 4
2 SHORT / 6
14 / 6
12 / 6
12 / 4
4 / 6
2 / 6
12 / 6
6 / 6
0 SHORT / 6
8 / 6
4 / 6
8 / 6
8 / 4
6 / 6
2 / 4
00 / 4
2 / 6
