# Multi-Output Multi-class Classification Problem

# Multi Class Classification Problem

In [2]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
import pandas as pd

In [3]:
# define dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, n_classes=3, random_state=1)

In [4]:
X.shape, y.shape

((1000, 10), (1000,))

In [5]:
# Convert NumPy arrays to Pandas DataFrame and Series
X_df = pd.DataFrame(X, columns=[f'feature_{i+1}' for i in range(X.shape[1])])
y_series = pd.Series(y, name='target')

In [6]:
# Concatenate X and y along the columns axis to create the final DataFrame
df_oomc = pd.concat([X_df, y_series], axis=1)

In [7]:
df_oomc.head()

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,target
0,1.891494,-0.398476,1.638569,0.016472,1.518924,-3.526512,1.809988,0.588109,-0.025422,-0.528354,1
1,1.869138,-0.563362,2.034118,0.386694,-0.09584,-2.92724,0.739217,-0.55633,-0.2734,0.721293,0
2,1.952597,-2.831446,0.190555,-0.663887,0.191591,-1.859121,-0.101502,-0.609577,-2.077502,-1.7547,1
3,-0.670425,-0.69082,0.593548,-0.062704,2.373577,-0.52106,0.428725,-0.268511,0.492262,-1.34384,2
4,-1.398901,-0.71523,-0.677451,0.391539,-0.458324,2.178984,-1.885502,-0.966866,-0.425888,-0.207826,0


In [8]:
df_oomc['target'].unique()

array([1, 0, 2])

In [9]:
df_oomc['target'].value_counts()

target
1    334
2    334
0    332
Name: count, dtype: int64

In [10]:
df_oomc.groupby('target').size()

target
0    332
1    334
2    334
dtype: int64

## One-vs-Rest and One-vs-One for Multi-Class Classification

In [11]:
# logistic regression for multi-class classification using built-in one-vs-rest
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
import pandas as pd

In [12]:
# define dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, n_classes=3, random_state=1)

In [13]:
type(X), type(y)

(numpy.ndarray, numpy.ndarray)

In [14]:
# define model
model = LogisticRegression(multi_class='ovr')

In [15]:
# fit model
model.fit(X, y)

In [16]:
# make predictions
yhat = model.predict(X)

In [17]:
yhat

array([1, 0, 1, 2, 0, 2, 1, 2, 0, 1, 1, 0, 2, 1, 0, 1, 2, 0, 1, 0, 0, 1,
       0, 2, 0, 0, 2, 0, 1, 1, 2, 1, 0, 2, 0, 0, 0, 1, 1, 2, 2, 1, 0, 1,
       0, 2, 0, 0, 0, 2, 0, 1, 2, 0, 2, 2, 1, 2, 1, 0, 2, 2, 2, 1, 0, 1,
       1, 1, 0, 2, 0, 2, 2, 1, 0, 0, 0, 0, 0, 1, 2, 0, 2, 0, 2, 2, 2, 1,
       0, 1, 2, 1, 1, 1, 0, 2, 0, 2, 1, 1, 0, 1, 1, 0, 0, 0, 2, 2, 2, 2,
       1, 0, 0, 1, 2, 0, 0, 0, 0, 2, 2, 2, 1, 0, 1, 2, 2, 0, 1, 0, 2, 1,
       0, 0, 2, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 1, 1, 2, 1,
       0, 0, 2, 2, 1, 0, 2, 1, 2, 2, 1, 0, 2, 0, 2, 1, 0, 0, 0, 0, 0, 1,
       2, 0, 2, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 2, 1, 0, 0, 1, 1, 0,
       2, 1, 1, 0, 1, 1, 0, 0, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 2, 0, 0, 2,
       2, 1, 2, 2, 1, 0, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 0, 2, 2,
       2, 2, 0, 0, 0, 1, 2, 2, 2, 0, 2, 2, 2, 2, 1, 2, 0, 1, 1, 0, 1, 2,
       1, 2, 0, 2, 0, 0, 0, 2, 1, 1, 0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 2, 0,
       0, 0, 1, 2, 0, 0, 1, 0, 0, 2, 0, 0, 2, 1, 2,

# One vs Rest

In [18]:
# logistic regression for multi-class classification using a one-vs-rest
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier

In [19]:
# define dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, n_classes=3, random_state=1)
# define model
model = LogisticRegression()
# define the ovr strategy
ovr = OneVsRestClassifier(model)
# fit model
ovr.fit(X, y)
# make predictions
yhat = ovr.predict(X)

In [20]:
yhat

array([1, 0, 1, 2, 0, 2, 1, 2, 0, 1, 1, 0, 2, 1, 0, 1, 2, 0, 1, 0, 0, 1,
       0, 2, 0, 0, 2, 0, 1, 1, 2, 1, 0, 2, 0, 0, 0, 1, 1, 2, 2, 1, 0, 1,
       0, 2, 0, 0, 0, 2, 0, 1, 2, 0, 2, 2, 1, 2, 1, 0, 2, 2, 2, 1, 0, 1,
       1, 1, 0, 2, 0, 2, 2, 1, 0, 0, 0, 0, 0, 1, 2, 0, 2, 0, 2, 2, 2, 1,
       0, 1, 2, 1, 1, 1, 0, 2, 0, 2, 1, 1, 0, 1, 1, 0, 0, 0, 2, 2, 2, 2,
       1, 0, 0, 1, 2, 0, 0, 0, 0, 2, 2, 2, 1, 0, 1, 2, 2, 0, 1, 0, 2, 1,
       0, 0, 2, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 1, 1, 2, 1,
       0, 0, 2, 2, 1, 0, 2, 1, 2, 2, 1, 0, 2, 0, 2, 1, 0, 0, 0, 0, 0, 1,
       2, 0, 2, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 2, 1, 0, 0, 1, 1, 0,
       2, 1, 1, 0, 1, 1, 0, 0, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 2, 0, 0, 2,
       2, 1, 2, 2, 1, 0, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 0, 2, 2,
       2, 2, 0, 0, 0, 1, 2, 2, 2, 0, 2, 2, 2, 2, 1, 2, 0, 1, 1, 0, 1, 2,
       1, 2, 0, 2, 0, 0, 0, 2, 1, 1, 0, 0, 1, 1, 2, 0, 1, 1, 1, 1, 2, 0,
       0, 0, 1, 2, 0, 0, 1, 0, 0, 2, 0, 0, 2, 1, 2,

In [21]:
# SVM for multi-class classification using one-vs-one
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.multiclass import OneVsOneClassifier
# define dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, n_classes=3, random_state=1)

In [22]:
# define model
model = SVC()
# define ovo strategy
ovo = OneVsOneClassifier(model)
# fit model
ovo.fit(X, y)
# make predictions
yhat = ovo.predict(X)