In [1]:
from sklearn import datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

In [2]:
cancer = datasets.load_breast_cancer()
clf = LogisticRegression()

In [3]:
model = clf.fit(cancer.data, cancer.target)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [4]:
print("Accuracy: {:.2f}".format(model.score(cancer.data, cancer.target)))

Accuracy: 0.94


In [5]:
preds = model.predict(cancer.data)

In [6]:
print("ROC AUC: {:.2f}".format(metrics.roc_auc_score(cancer.target, preds)))
print("Accuracy: {:.2f}".format(metrics.accuracy_score(cancer.target, preds)))
print("F1 score: {:.2f}".format(metrics.f1_score(cancer.target, preds)))

ROC AUC: 0.94
Accuracy: 0.94
F1 score: 0.96


In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(cancer.data, 
                                                    cancer.target,
                                                    test_size = 0.1,
                                                   random_state = 42)

In [9]:
model = clf.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [10]:
print("Train accuracy: {:.2f}".format(model.score(X_train, y_train)))
print("Test accuracy: {:.2f}".format(model.score(X_test, y_test)))

Train accuracy: 0.96
Test accuracy: 0.98


In [11]:
from sklearn.linear_model import Lasso, Ridge, ElasticNet

In [12]:
boston = datasets.load_boston()

In [13]:
lasso = Lasso()
ridge = Ridge()
elastic = ElasticNet()

In [14]:
for model in [lasso, ridge, elastic]:    
    X_train, X_test, y_train, y_test = train_test_split(boston.data, 
                                                    boston.target,
                                                    test_size = 0.2,
                                                   random_state = 42)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print(model.__class__)
    print("MSE: {:.2f}\n".format(metrics.mean_squared_error(y_test, preds)))

<class 'sklearn.linear_model._coordinate_descent.Lasso'>
MSE: 24.41

<class 'sklearn.linear_model._ridge.Ridge'>
MSE: 24.48

<class 'sklearn.linear_model._coordinate_descent.ElasticNet'>
MSE: 23.97



In [15]:
print("R2: {:.2f}".format(metrics.r2_score(y_test, preds)))
print("R2: {:.2f}".format(model.score(X_test, y_test)))

R2: 0.67
R2: 0.67


# Cross validation

In [16]:
from sklearn.model_selection import KFold, cross_val_score
iris = datasets.load_iris()
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [17]:
print(iris.DESCR[:475])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Vi


In [22]:
clf = LogisticRegression()
cv = KFold(n_splits = 10) # Stratified KFold

In [23]:
for split_idx, (train_idx, test_idx) in enumerate(cv.split(iris.data)):
    X_train, X_test = iris.data[train_idx], iris.data[test_idx]
    y_train, y_test = iris.target[train_idx], iris.target[test_idx]
    
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print("Split {} score: {:.2f}".format(split_idx, score))

Split 0 score: 1.00
Split 1 score: 1.00
Split 2 score: 1.00


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Split 3 score: 1.00
Split 4 score: 0.93
Split 5 score: 0.87
Split 6 score: 1.00
Split 7 score: 0.87
Split 8 score: 0.87
Split 9 score: 0.93


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [24]:
cv_score = cross_val_score(clf,
                          iris.data, iris.target,
                          scoring = 'accuracy', cv = cv)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [25]:
print("Cross val score: {}".format(cv_score))
print("Mean cross val score: {}".format(cv_score.mean()))

Cross val score: [1.         1.         1.         1.         0.93333333 0.86666667
 1.         0.86666667 0.86666667 0.93333333]
Mean cross val score: 0.9466666666666667
