In [200]:
%matplotlib widget

from sklearn.datasets import load_wine
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import numpy as np

In [201]:
# load the full dataset and display it
dataset = load_wine()
names = dataset.feature_names
data = dataset.data
target = dataset.target

In [202]:
scaler = StandardScaler() # feature scale the data first
scaler.fit(data)
data = scaler.transform(data)

In [203]:
target = target

print(names)
print("Data: ", data.shape, data.dtype)
print("Target:", target.shape, target.dtype)

print(data)
print(target)

['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
Data:  (178, 13) float64
Target: (178,) int32
[[ 1.51861254 -0.5622498   0.23205254 ...  0.36217728  1.84791957
   1.01300893]
 [ 0.24628963 -0.49941338 -0.82799632 ...  0.40605066  1.1134493
   0.96524152]
 [ 0.19687903  0.02123125  1.10933436 ...  0.31830389  0.78858745
   1.39514818]
 ...
 [ 0.33275817  1.74474449 -0.38935541 ... -1.61212515 -1.48544548
   0.28057537]
 [ 0.20923168  0.22769377  0.01273209 ... -1.56825176 -1.40069891
   0.29649784]
 [ 1.39508604  1.58316512  1.36520822 ... -1.52437837 -1.42894777
  -0.59516041]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1

We want to achieve multi output classification. Since we figured out logistic regression which can so far classify binary labels very well, how can we extend it to multi output? The idea now is to then use multiple binary classification models to simulate the effect of multi output. So yes just by being able to do binary classification we can easily extend it to any n classification categories. 

In [204]:
examples = data.shape[0]
features = data.shape[1]
categories = np.unique(target).shape[0]
models = []
binarytargets = []

for i in range(categories):
    models.append(LogisticRegression(random_state=0))
# create as many models as there are categories

for i in range(categories):
    binarytarget = np.array(target)
    binarytarget = (binarytarget ^ i).astype(bool).astype(np.int32)
    binarytargets.append(binarytarget)
# transform each of the models targets into a binary classification problem

print("Targets:", binarytargets)

for i in range(categories):
    models[i].fit(data, binarytargets[i])
# fit each specific target to the associated model

Targets: [array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0

This is how easy it is to transform the multi classification problem into multiple binary classification problems. The xor checks for the specific ith target and makes it 0 while making the rest of targets non zero. This will return integers and not 1 and 0's so we compare to boolean array first. Then we simply cast the boolean array to an integer 0's and 1's array. With this many arrays corresponding to each category, we can just evaluate the logistic regression model to each category independently and solve the full multi output classification problem.

In [205]:
def predict(data, models):
    categories = len(models)
    predictions = []

    for i in range(categories):
        predictions.append(models[i].predict(data))
    
    pzip = zip(*predictions) # take each binary prediction from separate models and join them in an array
    arr = np.array([*pzip]) # take unzipped tuples and put in a 2d array where each row is a sample and each col is the binary prediction
    indices = np.argmin(arr,axis=1) # find all zero locations and take the last one
    return indices

In [206]:
predictions = predict(data, models)
print("Predictions:",predictions)

Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


In [207]:
errors = np.sum((target ^ predictions).astype(bool).astype(np.int32))
print("Misclassifications:", errors)

Misclassifications: 1


As we can see, we only have 1 error. So the trick of using multiple models does work in practice albeit it's a bit slow on the otherhand. Let's try to use the generalized multi output logistic regression from scikit instead of using our tricks for verification. This model that scikit uses will actually be proper compared to the hack we just performed. Therefore, it should perform a bit better.

In [208]:
model = LogisticRegression(random_state=0)
model.fit(data, target)
scikit_predictions = model.predict(data) # use scikit multi classification model to predict
print(scikit_predictions)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


In [209]:
errors = np.sum((target - scikit_predictions).astype(bool).astype(np.int32))
print("Misclassifications:", errors)

Misclassifications: 0


Nice, no errors. Scikit has fully predicted everything with it's complex model. This tells us a few insights. In theory, it's always better to design a proper model. This will lead to better accuracy and is much faster and more numerically stable than the hacks we perform. In reality however, sometimes we have no choice but to use our time wisely. In these situations, it's better to be clever about our approach and we have shown that multi classification is a joke and can easily be done using only binary classification techniques. Therefore, each side has pros and cons and we must make our decision based on them. Even though, I showed all the pros using this hack, there are many cons. The models are going to be explosive in memory and if there are many categories, then it's not viable anymore. Additionally, it's also much slower because each model is trained separately. Finally, there will be accuracy issues for assuming many binary classifications. Good day :)