# Review: Logistic Regression Coefficients

In [1]:
import numpy as np
import pandas as pd

# better, LogisticRegression.fit(...) to find these
vec2 = np.array([[ 1.37239431],
                 [-1.16675093],
                 [-1.32467119],
                 [ 6.59925245]])

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# better, LogisticRegression.predict(...)
def classification_model(row):
    return sigmoid(row @ vec2).round().astype(bool)

# Multi-Class Logistic Regression

In [2]:
animals = ["dog"] * 50 + ["cat"] * 40 + ["mouse"] * 20
weight = np.concatenate((
    np.random.normal(20, 7, 50),
    np.random.normal(10, 3, 40),
    np.random.normal(1, 0.3, 20)
))
df = pd.DataFrame({"weight":weight, "one": 1, "kind": animals})
df

Unnamed: 0,weight,one,kind
0,10.835011,1,dog
1,21.304322,1,dog
2,17.284541,1,dog
3,18.528989,1,dog
4,17.386590,1,dog
...,...,...,...
105,0.532801,1,mouse
106,0.716917,1,mouse
107,1.436854,1,mouse
108,1.240639,1,mouse


In [3]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, stratify=df["kind"], train_size=0.5)
train.head()

Unnamed: 0,weight,one,kind
87,7.987801,1,cat
55,12.300825,1,cat
10,17.169456,1,dog
101,0.865147,1,mouse
98,0.927797,1,mouse


In [9]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(train[["weight"]], train["kind"])

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [11]:
test.copy()
test["predicted"] = lr.predict(test[["weight"]])
test.head()

Unnamed: 0,weight,one,kind,predicted
60,4.38195,1,cat,cat
45,16.61462,1,dog,dog
58,8.599523,1,cat,cat
2,17.284541,1,dog,dog
5,24.795626,1,dog,dog


In [13]:
lr.coef_, lr.intercept_

(array([[ 0.16804496],
        [ 1.00460136],
        [-1.17264632]]), array([ 2.1710018 , -9.10402612,  6.93302432]))

In [16]:
coef = np.vstack((lr.coef_.T, lr.intercept_))
coef

array([[ 0.16804496,  1.00460136, -1.17264632],
       [ 2.1710018 , -9.10402612,  6.93302432]])

In [24]:
coef[:, :3]

array([[ 0.16804496,  1.00460136, -1.17264632],
       [ 2.1710018 , -9.10402612,  6.93302432]])

In [36]:
scores = sigmoid(test.iloc[:, :2] @ coef)
scores = scores.set_axis([c+"-score" for c in lr.classes_], axis=1)
scores.head()

Unnamed: 0,cat-score,dog-score,mouse-score
60,0.948209,0.008996,0.8574839
45,0.993057,0.999493,3.544717e-06
58,0.973817,0.385816,0.04104504
2,0.993791,0.999741,1.61588e-06
5,0.998235,1.0,2.416633e-10


In [39]:
pd.concat((test, scores), axis=1)

Unnamed: 0,weight,one,kind,predicted,cat-score,dog-score,mouse-score
60,4.38195,1,cat,cat,0.948209,0.008996,0.8574839
45,16.61462,1,dog,dog,0.993057,0.999493,3.544717e-06
58,8.599523,1,cat,cat,0.973817,0.385816,0.04104504
2,17.284541,1,dog,dog,0.993791,0.999741,1.61588e-06
5,24.795626,1,dog,dog,0.998235,1.0,2.416633e-10
7,11.666024,1,dog,cat,0.984194,0.931864,0.001172908
97,0.945565,1,mouse,mouse,0.911323,0.000287,0.9970536
44,18.714524,1,dog,dog,0.995111,0.999939,3.021044e-07
35,15.391716,1,dog,dog,0.991486,0.998271,1.4872e-05
93,0.937155,1,mouse,mouse,0.911209,0.000285,0.9970824
