In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [2]:
cancer = load_breast_cancer()

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target, stratify=cancer.target, random_state=42)

#### Increased max_iter = 10000 to avoid warning "lbfgs failed to converge (status=1)" -- Default C =1

In [24]:
logreg = LogisticRegression(max_iter= 5000).fit(X_train, y_train)
print("Training set score: {:.8f}".format(logreg.score(X_train, y_train)))
print("Test set score: {:.8f}".format(logreg.score(X_test, y_test)))

Training set score: 0.95774648
Test set score: 0.95804196


#### Increasing C = 100 to improve accuracy

In [42]:
logreg100 = LogisticRegression(max_iter= 5000, C=100).fit(X_train, y_train)
print("Training set score: {:.8f}".format(logreg100.score(X_train, y_train)))
print("Test set score: {:.8f}".format(logreg100.score(X_test, y_test)))

Training set score: 0.98122066
Test set score: 0.96503497


#### Warning when maxiter = 5000 and C = 1000 (Increasing max_iter works, Why?)

In [37]:
logreg100 = LogisticRegression(max_iter= 5000, C=1000).fit(X_train, y_train)
print("Training set score: {:.8f}".format(logreg100.score(X_train, y_train)))
print("Test set score: {:.8f}".format(logreg100.score(X_test, y_test)))

Training set score: 0.98591549
Test set score: 0.98601399


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


#### C = 0.01 lower accuracy (Not dramatic)

In [43]:
logreg001 = LogisticRegression(max_iter=500, C=0.01).fit(X_train, y_train)
print("Training set score: {:.8f}".format(logreg001.score(X_train, y_train)))
print("Test set score: {:.8f}".format(logreg001.score(X_test, y_test)))

Training set score: 0.95305164
Test set score: 0.95104895


In [39]:
help(LogisticRegression)

Help on class LogisticRegression in module sklearn.linear_model._logistic:

class LogisticRegression(sklearn.base.BaseEstimator, sklearn.linear_model._base.LinearClassifierMixin, sklearn.linear_model._base.SparseCoefMixin)
 |  LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='lbfgs', max_iter=100, multi_class='auto', verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)
 |  
 |  Logistic Regression (aka logit, MaxEnt) classifier.
 |  
 |  In the multiclass case, the training algorithm uses the one-vs-rest (OvR)
 |  scheme if the 'multi_class' option is set to 'ovr', and uses the
 |  cross-entropy loss if the 'multi_class' option is set to 'multinomial'.
 |  (Currently the 'multinomial' option is supported only by the 'lbfgs',
 |  'sag', 'saga' and 'newton-cg' solvers.)
 |  
 |  This class implements regularized logistic regression using the
 |  'liblinear' library, 'newton-cg', 'sag'