# Casting Classification as Regression, Regressing to Probabilities
1. We can turn classification labels into a one-hot vector.
2. We can regress to the vector.
3. To produce output classes, we can take the element with highest weight.
4. The regressed value can be interpreted as an (approximate) probability.

Regressing to probabilities is a useful trick, especially when we start thinking about confidences and unsupervised data analysis.

[Link to Fish Dataset Details](https://www.kaggle.com/aungpyaeap/fish-market)

In [4]:
import numpy as np
import csv
from tqdm import tqdm_notebook

rows = []

with open('Fish.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        rows.append(row)

print(len(rows))
print(rows[0]) # first row is a header
print(rows[1])

rows = rows[1:]

labels = {} # Create a dictionary of label strings to numeric values
for row in rows:
    if row[0] not in labels:
        labels[row[0]]=len(labels)

print(labels)
        
inputs = np.array([[float(c) for c in (row[1:])] for row in rows])
outputs = np.array([labels[row[0]] for row in rows])
print(outputs)

160
['\ufeffSpecies', 'Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width']
['Bream', '242', '23.2', '25.4', '30', '11.52', '4.02']
{'Roach': 1, 'Parkki': 3, 'Whitefish': 2, 'Pike': 5, 'Perch': 4, 'Smelt': 6, 'Bream': 0}
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6
 6 6 6 6 6 6 6 6 6 6 6]


In [5]:
def output_to_one_hot(categories, max_val):
    data = np.zeros((len(categories), max_val))
    data[np.arange(len(categories)), categories] = 1
    return data

encodings = output_to_one_hot(outputs, len(labels))
print(encodings[:10])
print(encodings[-10:])

[[1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]]
[[0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1.]]


In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(inputs, encodings)

# Assignment:
1. Define a network class that regresses to the 7 outputs.
2. Train a sufficiently large network to perform the categorization.
3. Measure the test accuracy of the model by counting the number of accurate labels

# Stretch Goals:
- Test out different network architectures (depth, breadth) and examine training performance

In [7]:
from torch.autograd import Variable
from tqdm import tqdm_notebook, trange
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as utils
import matplotlib.pyplot as plt
import numpy as np
import time

In [8]:
class Net(nn.Module):

    def __init__(self, breadth=500, depth=3):
        super(Net, self).__init__()

        hidden_layers = depth - 2
        self.fcs = [nn.Linear(6, breadth)]
        self.fcs.extend([nn.Linear(breadth, breadth)]*hidden_layers)
        
        for i in np.arange(len(self.fcs)):
            self.add_module('fc{}'.format(i), self.fcs[i])
        
        self.final_fc = nn.Linear(breadth, 7)

    def forward(self, x):
        for fc in self.fcs:
            x = F.relu(fc(x))
        x = self.final_fc(x)
        
        return x
    
    def train(self, inputs, true_vals, num_epochs=1000):
        inputs = torch.from_numpy(inputs).float()
        true_vals = torch.from_numpy(true_vals).float()
        
        t0 = time.time()

        net = self
        net.float() # force float type
        net.zero_grad()
        
        outputs = net(Variable(torch.Tensor([0]*6)))
        outputs.backward(torch.randn(7)) # Use random gradients to break symmetry?

        learning_rate = 1 # Need to initialize carefully
        for f in net.parameters():
            f.data.sub_(f.grad.data * learning_rate)

        # create your optimizer
        optimizer = optim.Adam(net.parameters())
        criterion = nn.MSELoss()

        t = trange(num_epochs)
        for epoch in t:  # loop over the dataset multiple times

            running_loss = 0.0

            # wrap them in Variable
            reshaped_inputs = inputs #.view(-1, 1) # Structure with each input in its own row
            reshaped_outputs = true_vals #.view(-1, 1) # Neglecting to have outputs and true vals to match dimension is a common mistake.

            # forward + backward + optimize
            outputs = net(reshaped_inputs)
            #print(outputs)
            #print(reshaped_outputs)
            #loss = criterion(outputs, reshaped_outputs)
            error = reshaped_outputs - outputs
            #print("ERROR")
            #print(error)
            loss = (error ** 2).mean()

            loss.backward()
            optimizer.step()

            # zero the parameter gradients
            optimizer.zero_grad()

            t.set_description('ML: loss={}'.format(loss.item())) # Updates Loss information

        t1 = time.time() - t0

        return loss, t1
    
    def categorize(self, inputs):
        inputs = torch.from_numpy(inputs).float()
        outputs = net(inputs).detach().numpy()
        
        # make hard decisions
        hard_output = np.zeros_like(outputs)
        hard_output[np.arange(len(outputs)), outputs.argmax(1)] = 1
        return hard_output

In [9]:
net = Net()
print(net)

Net(
  (fc0): Linear(in_features=6, out_features=500, bias=True)
  (fc1): Linear(in_features=500, out_features=500, bias=True)
  (final_fc): Linear(in_features=500, out_features=7, bias=True)
)


In [10]:
net.train(X_train, y_train)

ML: loss=0.08434455841779709: 100%|██████████| 1000/1000 [00:05<00:00, 193.16it/s]


(tensor(0.0843, grad_fn=<MeanBackward0>), 5.19073748588562)

In [11]:
from tqdm import tqdm_notebook


In [12]:
y_categorized_test = net.categorize(X_test)

# The number of accurate labels
num_accurate_labels = np.sum(y_categorized_test == y_test) / y_test.shape[1]

frac_correct = num_accurate_labels / len(y_test)
print(frac_correct)

0.8571428571428571


In [13]:
num_accurate_labels

34.285714285714285

In [14]:
# Test different parameters
breadths = [250, 500, 1000]
depths = [3, 4, 5]

frac_corrects = np.zeros([len(breadths), len(depths)])

for b in np.arange(len(breadths)):
        breadth = breadths[b]

        for d in np.arange(len(depths)):
            depth = depths[d]
            
            net = Net(breadth=breadth, depth=depth)
            net.train(X_train, y_train)
            
            y_categorized_test = net.categorize(X_test)
            # The number of accurate labels
            num_accurate_labels = np.sum(y_categorized_test == y_test) / y_test.shape[1]

            frac_correct = num_accurate_labels / len(y_test)
            print("Breadth: {}, Depth: {}, {} correct".format(breadth, depth, frac_correct))

ML: loss=0.06734256446361542: 100%|██████████| 1000/1000 [00:04<00:00, 244.74it/s]
ML: loss=15.752217292785645:   2%|▏         | 21/1000 [00:00<00:09, 107.25it/s]

Breadth: 250, Depth: 3, 0.8571428571428571 correct


ML: loss=0.09997686743736267: 100%|██████████| 1000/1000 [00:04<00:00, 202.98it/s]
ML: loss=0.9487798810005188:   2%|▏         | 18/1000 [00:00<00:10, 91.88it/s]

Breadth: 250, Depth: 4, 0.8142857142857143 correct


ML: loss=0.05704564228653908: 100%|██████████| 1000/1000 [00:05<00:00, 174.90it/s]
ML: loss=3572.078369140625:   2%|▏         | 19/1000 [00:00<00:09, 99.16it/s] 

Breadth: 250, Depth: 5, 0.8785714285714287 correct


ML: loss=0.1714099794626236: 100%|██████████| 1000/1000 [00:05<00:00, 195.34it/s]
ML: loss=31.32442283630371:   2%|▏         | 15/1000 [00:00<00:12, 77.46it/s]

Breadth: 500, Depth: 3, 0.8428571428571429 correct


ML: loss=0.07283200323581696: 100%|██████████| 1000/1000 [00:07<00:00, 140.79it/s]
ML: loss=0.670676052570343:   1%|          | 11/1000 [00:00<00:17, 57.92it/s] 

Breadth: 500, Depth: 4, 0.8714285714285713 correct


ML: loss=0.1396862417459488: 100%|██████████| 1000/1000 [00:09<00:00, 110.12it/s]
ML: loss=520990.03125:   1%|          | 8/1000 [00:00<00:22, 43.60it/s]

Breadth: 500, Depth: 5, 0.8642857142857142 correct


ML: loss=0.3549141585826874: 100%|██████████| 1000/1000 [00:12<00:00, 82.69it/s]
ML: loss=364.1068115234375:   1%|          | 6/1000 [00:00<00:29, 33.83it/s] 

Breadth: 1000, Depth: 3, 0.8357142857142857 correct


ML: loss=0.09415100514888763: 100%|██████████| 1000/1000 [00:20<00:00, 49.03it/s]
ML: loss=7.513808250427246:   0%|          | 4/1000 [00:00<00:44, 22.31it/s]

Breadth: 1000, Depth: 4, 0.8571428571428571 correct


ML: loss=0.07752490043640137: 100%|██████████| 1000/1000 [00:25<00:00, 39.85it/s]


Breadth: 1000, Depth: 5, 0.85 correct
