In [3]:
## ####################################################
import sys
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import normalize
from sklearn.linear_model import SGDClassifier
## ###################################################

# load the data
X, y = load_breast_cancer(return_X_y=True)  ## X input, y output
## to convert the {0,1} output into {-1,+1}
y = 2*y - 1

X = normalize(X, norm='l2')


print(X.shape, y.shape)
mdata,ndim = X.shape


## learning parameters
nitermax = 50  ## maximum iteration
eta = 0.1      ## learning speed

nfold = 5         ## number of folds
cselection = KFold(n_splits=nfold, random_state=None, shuffle=False)
## initialize the learning parameters for all folds
f1 = np.zeros(nfold)
maxmargin_train = np.zeros(nfold)

"""
To do ....

"""
pos = 0
for train_index, test_index in cselection.split(X):
    w = np.zeros(ndim)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    iteration = 0
    max_margin = -np.inf

    classifier = SGDClassifier(max_iter=nitermax, eta0=eta)
    #computing the score
    classifier.fit(X_train, y_train)

    #Apply the trained weight
    y_pred = np.zeros(y_test.shape[0])
    for i in range(y_test.shape[0]):
        y_hat = np.dot(w, X_test[i])
        if y_hat <= 0:
            y_pred[i] = -1
        else:
            y_pred[i] = 1

    #computing the score
    f1_loc = f1_score(y_test, y_pred)
    f1[pos] = f1_loc
    pos += 1


print('The average F1:',np.mean(f1))
print('The average maximum margin achieved in the training:',np.mean(maxmargin_train))

(569, 30) (569,)
The average F1: 0.0
The average maximum margin achieved in the training: 0.0


In [4]:
## ####################################################
import sys
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
## ###################################################

class logreg_sgd_cls:
    def __init__(self, eta, nitermax = 10):
        """
        Input: 
        eta: real learning speed
        nitermax: number of maximum iteration
        """

        self.eta = eta
        self.nitermax = nitermax
        self.w = None   ## learning weights
        self.margimax = 0 ## maximum marginin the training
        return
    
    ## --------------------------------------
    def fit(self, X, y):
        """
        Task: to solve the logistic regression problem
            by applying stochastic gradient algorithm
        Input: X  2d array of input examples in the rows 
               y  1d(vector) array of +1,-1 labels
        """
        m,n = X.shape
        ## initialize the weights 
        W = np.zeros(n)
        self.marginmax = 0
        
        ## iteration on the full data 
        for t in range (self.nitermax): 
            ## iteration on the examples 
            for i in range (m):
                xymargin = y[i]*np.dot (w,X[i]) ## functional margin
                if xymargin > self.marginmax: ## find the maximum of the margin
                    self.marginmax = xymargin
                ## compute the stochastic gradient
                philog = 1/(1+ np.exp(-(-xymargin))) ## to agree with the slide 
                deltaJ = -philog * y[i] * X[i]
                W = W - self.eta * deltaJ ## update the weights
            self.w = W

            return
        
        ## -----------------------------------
    
    def predict(self, X, y = None):
        """
        Task: to predict the labels for the given examples based on the self.w 
        Input:  X 2d array of input examples in the rows
        Output: y 1d array of predicted labels
        """
        xw= np.dot (X, self.w)
        ## predicting +1 probbaility
        Pyplus = 1/(1+np.exp(-xw))
        ## predicting -1 probability
        Pyminus = 1/(1+np.exp(xw))

        ## labels correspond to the greater probabilities 
        y = 2*(Pyplus > Pyminus)-1
        return y
    
    ## #######:
    def main(iworkmode):
        # load the data
        X, y = load_breast_cancer (return_X_y=True) ## X input, y output
        ## to convert the {0,1} output into {-1,+1}
        y = 2*y - 1

        print(X.shape, y.shape)
        mdata, ndim = X.shape
        nitermax = 50 ## maximum iteration
        eta = 0.1 ## learning speed
        nfold = 5 ## number of folds

        ## split the data into 5-folds
        cselection=KFold(n_splits=nfold,random_state=None, shuffle=False)
        iscale = 2 ## = 0 noscaling,
                   ## = 1 scaling by row wise L2 norm
                   ## = 2 scaling the rows by maximum absolute value, L infinite norm of columns
        if iscale == 1:
            xnorm = np.sqrt(np.sum(X**2, 1))
            xnorm = xnorm + (xnorm == 0)
            X /= np.outer(xnorm, np.ones(ndim))
        elif iscale == 2:
            X /= np.outer(np.ones(mdata), np.max(np.abs(X), 0))
        
        ## construct a learning object
        clogreg = logreg_sgd_cls(eta, nitermax=nitermax)

        ## initialize the learning results for all folds
        xf1 = np.zeros(nfold)
        xprecision = np.zeros(nfold)
        xrecall = np.zeros(nfold)
        xmargin = np.zeros(nfold)        

        ## run the cross-validation
        ifold = 0
        for index_train, index_test in cselection.split(X):
            Xtrain = X[index_train]
            ytrain = y[index_train]
            Xtest = X[index_test]
            ytest = y[index_test]
            mtrain = Xtrain.shape[0]
            mtest = Xtest.shape[0]
            print("Training size:",mtrain)
            print("Test size:", mtest)
            clogreg.fit(Xtrain, ytrain) ## training
            yprediction = clogreg.predict(Xtest) ## prediction

            true_positive = np.sum((ytest>0)* (yprediction>0))
            true_negative = np.sum((ytest<=0)*(yprediction<=0))
            false_positive = np.sum ((ytest<=0)*(yprediction>0))
            false_negative = np.sum ((ytest>0)*(yprediction<=0))
            precision = true_positive/(true_positive+false_positive)
            recall= true_positive/(true_positive+false_negative)
            f1 = 2 * precision * recall / (precision + recall)
            xprecision [ifold] = precision
            xrecall[ifold] = recall 
            xf1[ifold] = f1
            xmargin[ifold] = clogreg.marginmax

            ## print('tp, fp, fn, tn:', true_positive, false_positive, false negative, true_negative) 
            print('Fold, f1, precision, recall:', ifold, '%5.3f'%f1, \
                    '%5.3f'%precision, '%5.3f '%recall)
            print('Maximum margin:', '%7.4f'%xmargin [ifold])
            ifold += 1
        print ('The average F1:', '%5.4f '%np.mean(xf1))
        print('The average maximum margin:', '%7.4f '%np. mean (xmargin))
        return
# ## ######################
# ## ######################
# if __name__ == "__main__":
#     if len(sys.argv)==1:
#         iworkmode=0
#     elif len(sys.argv)>=2:
#         iworkmode=eval(sys.argv[1])
# main(iworkmode)

