# Neural Networks


[Tutorial](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)

In [112]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

## Data

Load the Data using pandas/numpy:

In [113]:
df=pd.read_csv('./DataSets/winequality-red.csv', sep=';')
df=np.array(df.values)

The last column are the features so extract those:

In [114]:
y = df[:,-1]
X = df[:,range(df.shape[1]-1)]

In [115]:
y = y>5

In [116]:
prob_good = y
prob_bad = [ not i for i in y ]
prob_good_bad = np.c_[prob_good, prob_bad]

In [117]:
print(prob_good_bad)

[[False  True]
 [False  True]
 [False  True]
 ...
 [ True False]
 [False  True]
 [ True False]]


Finally make sure that these are tensors

In [118]:
X = torch.from_numpy(X.astype(np.float32))
prob_good_bad = torch.from_numpy(prob_good_bad.astype(np.float32))
y = prob_good_bad
print(y)


tensor([[0., 1.],
        [0., 1.],
        [0., 1.],
        ...,
        [1., 0.],
        [0., 1.],
        [1., 0.]])


## Define a Model

In [123]:
model = nn.Sequential(
    nn.Linear(in_features=11, out_features=4),
    nn.Sigmoid(),
    nn.Linear(4, 2),
    nn.Sigmoid()
    # nn.Flatten(start_dim=0, end_dim=1)
)

### Define a Loss Function

In [124]:
loss_fn = nn.MSELoss(reduction='sum')

In [145]:
lss_fn = torch.nn.NLLLoss()


### Define an Optimizer

In [146]:
optimizer = torch.optim.RMSprop(model.parameters(), lr = 1e-6)

### Train the Model

In [147]:
for t in range(5000):
    # Forward Pass: Compute predicted y value
    y_pred = model(X.float())

    # Measure the Loss
    loss = loss_fn(y_pred, y.float()) 
    if t % 100 == 99:
        print(t, '\t', loss.item())

    # Backward Pass; Compute the Partial Derivatives
    ## First Zero the Gradients, otherwise the can't be overwritten
    optimizer.zero_grad()
    ## Now calculate the gradients
    loss.backward()

    # Adjust the Weights
    optimizer.step()

99 	 865.0260009765625
199 	 864.8231811523438
299 	 864.6378173828125
399 	 864.4583129882812
499 	 864.280517578125
599 	 864.1033935546875
699 	 863.9268188476562
799 	 863.750244140625
899 	 863.5740966796875
999 	 863.3980102539062
1099 	 863.2222290039062
1199 	 863.046630859375
1299 	 862.8712768554688
1399 	 862.6961059570312
1499 	 862.521240234375
1599 	 862.3466796875
1699 	 862.1724853515625
1799 	 861.99853515625
1899 	 861.82470703125
1999 	 861.6512451171875
2099 	 861.4779663085938
2199 	 861.3048095703125
2299 	 861.1320190429688
2399 	 860.9593505859375
2499 	 860.7869873046875
2599 	 860.6148071289062
2699 	 860.4428100585938
2799 	 860.2710571289062
2899 	 860.099609375
2999 	 859.92822265625
3099 	 859.7572021484375
3199 	 859.5863647460938
3299 	 859.415771484375
3399 	 859.245361328125
3499 	 859.0751953125
3599 	 858.9052734375
3699 	 858.735595703125
3799 	 858.5660400390625
3899 	 858.3968505859375
3999 	 858.227783203125
4099 	 858.0590209960938
4199 	 857.89

In [148]:
yhat = model(X)
print(yhat)

tensor([[0.4140, 0.6279],
        [0.4074, 0.6440],
        [0.4089, 0.6409],
        ...,
        [0.3900, 0.6461],
        [0.3939, 0.6457],
        [0.4057, 0.6430]], grad_fn=<SigmoidBackward>)


In [149]:
yhat = model(X)
print("descriptive statistics of yhat-------")
print(yhat.detach().numpy().mean())
print(yhat.detach().numpy().std())

print("descriptive statistics of y-------")
print(y.mean())
print(y.std())

descriptive statistics of yhat-------
0.51529443
0.10740744
descriptive statistics of y-------
tensor(0.5000)
tensor(0.5001)


In [150]:
print(yhat.detach().numpy())

[[0.41402104 0.62786376]
 [0.40740186 0.6439618 ]
 [0.40887716 0.64087504]
 ...
 [0.39003873 0.6461355 ]
 [0.39392456 0.6457051 ]
 [0.405732   0.6430231 ]]


In [151]:
is_good_obs = y.detach().numpy()[:,0] > y.detach().numpy()[:,1]

In [152]:
is_good_pred = yhat.detach().numpy()[:,0] > yhat.detach().numpy()[:,1]

In [153]:
np.average([ a == b for a in is_good_obs for b in is_good_pred])

0.4652908067542214

In [135]:
[ (a, b) for a in y.detach().numpy()[:,0] for b in y.detach().numpy()[:,1]]

[(0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 0.0),
 (0.0, 0.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),
 (0.0, 1.0),

In [34]:
yhat = (yhat > 0.5).detach().numpy()
print(np.average(yhat))

1.0


In [35]:
print(y)

tensor([0., 0., 0.,  ..., 1., 0., 1.])


In [36]:
correctQ = [ i==j for i in yhat for j in y ]
print(np.average(correctQ))

0.0


In [39]:
print(correctQ)

 False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False

## Torch Class

Define a torch class, in this case we'll use an 11-4-1, where the output measures whether or not the wine is good.

In [40]:

class Network(nn.Module):
    def __init__(self):
        super().__init__()

        # Inputs to hidden layer linear transformation
        # Create the input layer and the hidden layer
        self.hidden1 = nn.Linear(11, 10)
        self.hidden2 = nn.Linear(10, 8)
        self.hidden3 = nn.Linear(8, 4)
        self.output = nn.Linear(4, 1)

        # Define the activation functions that will be used
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1) # dim=1 calculates softmax across cols

    def forward(self, x):
        # Take input
        x = self.hidden1(x)  # Linear Combination of input-> hidden
        x = self.hidden2(x)  # Linear Combination of input-> hidden
        x = self.hidden3(x)  # Linear Combination of input-> hidden
        x = self.sigmoid(x) # Activation Function
        x = self.output(x)  # Linear Combination of hidden -> output
        x = self.sigmoid(x) # Activation Function

        return x

# Assign the model object
net = Network()
print(net)

Network(
  (hidden1): Linear(in_features=11, out_features=10, bias=True)
  (hidden2): Linear(in_features=10, out_features=8, bias=True)
  (hidden3): Linear(in_features=8, out_features=4, bias=True)
  (output): Linear(in_features=4, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (softmax): Softmax(dim=1)
)


### Generate the Model Output

In [41]:

## Print the Model Output
out = net(X)
print(out)

tensor([[0.2479],
        [0.2181],
        [0.2249],
        ...,
        [0.2295],
        [0.2258],
        [0.2312]], grad_fn=<SigmoidBackward>)


## Measure the Loss Function

First define a loss function,  We'll use *MSE* here, but there are [many others](https://pytorch.org/docs/stable/nn.html#loss-functions). Also define the form of Gradient Descent implemented

In [42]:
eta = 1e-6

import torch.optim as optim

criterion = nn.MSELoss(reduction='sum')
optimizer = optim.RMSprop(net.parameters(), lr=1e-3)
# optimizer = optim.SGD(net.parameters(), lr=eta, momentum = 0.9)

## Train the Network

Now that gradient descent model, the structure of the network and the loss function are all defined we can begin training the network.

In [43]:
for t in range(1000):
    # Forward Pass; Calculate y_pred
    y_pred = net(X)

    # Calculate and print the loss
    loss = criterion(y_pred, y)
    if t % 100 == 0:
        print(loss.item())
    
    # Backward Pass
    optimizer.zero_grad()      # Zero out the Gradients
    loss.backward()            # Calculate Gradients and store in .grad

    # Update the Gradients
    optimizer.step()

840620.4375
636174.0
636137.125
636131.75
636127.4375
636124.375
636122.5
636121.3125
636120.75
636120.375


### Save the model

In [44]:
PATH = './wine_neural_network.pth'
torch.save(net.state_dict(), PATH)

## Print the Parameters of the Neural Network

In [45]:
for name, param in net.named_parameters():
    if param.requires_grad:
        print(name, param.data)


hidden1.weight tensor([[ 0.1659,  0.1932,  0.2630,  0.1430,  0.1354, -0.1183, -0.1731, -0.2000,
          0.2040, -0.2001,  0.0873],
        [ 0.2676,  0.1665,  0.0743, -0.0893, -0.0434,  0.1390,  0.0346,  0.0886,
         -0.0303, -0.2046,  0.1101],
        [ 0.3145,  0.1626,  0.3498,  0.1678, -0.0362,  0.3579,  0.0022, -0.1147,
          0.2304,  0.0761,  0.2774],
        [-0.3507, -0.4197, -0.0269,  0.0961,  0.1233, -0.2793,  0.0695, -0.4159,
         -0.2709, -0.2752, -0.4242],
        [ 0.2044,  0.3283,  0.3445, -0.1539,  0.2026,  0.2616,  0.0382,  0.3558,
         -0.1172,  0.1999,  0.1058],
        [-0.1351, -0.2686, -0.1736, -0.0905, -0.2516,  0.2392,  0.1405,  0.2328,
          0.0160, -0.0375, -0.0537],
        [ 0.3497,  0.2349, -0.0523,  0.2163, -0.1075, -0.1066,  0.3245,  0.0276,
          0.0748,  0.0826,  0.1978],
        [ 0.1737, -0.2390, -0.1542,  0.2967, -0.2272, -0.2125, -0.2348,  0.2773,
          0.2997, -0.0662, -0.0178],
        [ 0.0682, -0.0585,  0.0850,  0.14

## Print the Misclassification Rate

In [46]:
yhat = net(X)
yhat = yhat.detach().numpy().reshape(-1) > 0.3
print(yhat)

[ True  True  True ...  True  True  True]


In [47]:
y = df[:,-1]
y = y>5
np.array(y)

array([False, False, False, ...,  True, False,  True])

In [48]:
correctQ = [ i==j for i in yhat for j in y ]
print(np.average(correctQ))

0.5347091932457786


0.5347091932457786


Hmm, misclassificatoin rate is not very good.