In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer


def sigmoid(z):
    a = 1/(1+np.exp(-z))
    return a


def logloss(y_true,y_hat):
    loss = np.mean(-1*(y_true*np.log(y_hat+0.0001)+(1-y_true)*np.log(1-y_hat+0.0001)))
    return loss


class LogisticRegression:
    def __init__(self):
        self.W = None
        self.b = None
        self.loss = []
        self.training_loss = None

    def fit(self, X, Y, alpha, round, normalize = False):
        # n features, m samples
        n, m = X.shape
        self.b = np.random.rand(1,1)
        self.W = np.random.rand(n,1)
        if normalize:
            mean = np.mean(X,axis=1)
            std = np.std(X,axis=1)
            for field in range(n):
                X[field] = (X[field] - mean[field])/std[field]
        for i in range(round):
            Z = np.matmul(np.transpose(self.W),X)+self.b
            A = sigmoid(Z)
            dZ = A-Y
            dW = np.matmul(X,np.transpose(dZ))/m
            db = np.sum(dZ)/m
            self.W = self.W - dW*alpha
            self.b = self.b - db*alpha
            round_n_loss = logloss(Y, A)
            self.loss.append(round_n_loss)
        self.training_loss = round_n_loss

data = load_breast_cancer()
X = np.transpose(data['data'])
Y = np.reshape(data['target'],(1,-1))

lr = LogisticRegression()
lr.fit(X,Y,0.01,2000,normalize=True)

print (lr.training_loss)

0.08794135717666299


In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression

X, y = load_breast_cancer(return_X_y=True)
clf = LogisticRegression().fit(X, y)
pred_sklearn = clf.predict_proba(X)



In [8]:
logloss(y,pred_sklearn[:,1])

0.09234737306912336

In [45]:
# from sklearn.datasets import load_breast_cancer
# import statsmodels.discrete.discrete_model as sm

# X, y = load_breast_cancer(return_X_y=True)
# logit = sm.Logit(y,X).fit()

In [1]:
from sklearn.datasets import load_breast_cancer
import pandas as pd
# import statsmodels.genmod.generalized_linear_model as sm
import statsmodels.formula.api as smf
import statsmodels.api as sm
x, y = load_breast_cancer(return_X_y=True)
data = pd.DataFrame(columns = ['Response','Day','Class'])
data['Response'] = y
data['Day'] = x[:,0]
data['Class'] = x[:,1]

# logit = sm.GLM(y,X).fit()

In [2]:
formula = 'Response~Day+Class'
model = smf.glm(formula,data,family=sm.families.Binomial())
model_result = model.fit()

In [18]:
import pandas as pd
import numpy as np

In [19]:
df = pd.DataFrame({'cat':['high','medium','low','high',np.nan]})

In [20]:
df['cat'].dtype

dtype('O')

In [21]:
df['cat_cat']=df['cat'].astype('category')

In [22]:
a= df['cat_cat'].cat

In [23]:
df['cat_cat'].cat.codes

0    0
1    2
2    1
3    0
4   -1
dtype: int8