# Training a Binary Classifier

In [13]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.preprocessing import StandardScaler

In [17]:
Xi, yi = load_iris().data[:100,:], load_iris().target[:100]

In [18]:
SXi = StandardScaler().fit_transform(Xi)

In [19]:
log=LogisticRegression(penalty = 'l2', C=10)

In [20]:
fitlog = log.fit(Xi,yi)

In [21]:
fitlog.predict(SXi[0].reshape(1,4))

array([0])

In [26]:
fitlog.predict_proba(SXi[0].reshape(1,4))

array([[9.99999756e-01, 2.43556596e-07]])

# Training a Multiclass Classifier

In [30]:
multilog = LogisticRegression(penalty='l2', multi_class = 'ovr', C=10) # solver:'liblinear' is limited to one-versus-rest schemes.

In [41]:
Xn, yn = load_iris().data, load_iris().target

In [42]:
SXn = StandardScaler().fit_transform(Xn)

In [43]:
ftmultilog = multilog.fit(SXn,yn)

In [44]:
ftmultilog.classes_

array([0, 1, 2])

In [45]:
ftmultilog.predict_proba(SXn[0].reshape(1,4))

array([[9.19423162e-01, 8.05768377e-02, 2.98696081e-11]])

In [46]:
ftmultilog.predict(SXn[6].reshape(1,4))

array([0])

# Reducing Variance Through Regularization

In [51]:
reglog = LogisticRegression(penalty = 'l2', C=5, solver='sag', multi_class='multinomial') # solver='sag', makes it faster

In [75]:
Logist = LogisticRegressionCV(Cs=100,penalty = 'l2', multi_class='multinomial')

In [76]:
best = Logist.fit(SXn,yn)

In [77]:
best.C_

array([5.85702082, 5.85702082, 5.85702082])

In [78]:
best.Cs

100

In [79]:
best.predict(SXn[6].reshape(1,4))

array([0])

# Handling Imbalanced Classes

In [82]:
import numpy as np

In [83]:
Ximbal,yimbal = load_iris().data[40:,:], load_iris().target[40:]

In [89]:
yi_new = np.where((yimbal==0), 0, 1)
yi_new

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [105]:
ml = LogisticRegression(penalty='l2', C=5, class_weight='balanced', solver='sag')

In [106]:
tfml = ml.fit(Ximbal,yi_new)



In [107]:
tfml.predict(Ximbal[0].reshape(1,4))

array([0])

In [108]:
tfml.predict_proba(Ximbal[40].reshape(1,4))

array([[0.01224957, 0.98775043]])