In [21]:
from sklearn import datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

cancer = datasets.load_breast_cancer()
logistic_regression = LogisticRegression(max_iter=3000)
model = logistic_regression.fit(cancer.data, cancer.target)
print(f'Accuracy {model.score(cancer.data, cancer.target)}')

Accuracy 0.9578207381370826


In [22]:
predictions = model.predict(cancer.data)
print(f'Accuracy {metrics.accuracy_score(cancer.target, predictions)}')
print(f'ROC AUC {metrics.roc_auc_score(cancer.target, predictions)}')
print(f'f1 score {metrics.f1_score(cancer.target, predictions)}')

Accuracy 0.9578207381370826
ROC AUC 0.9520175994926273
f1 score 0.9666666666666667


In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2, random_state=12)

In [28]:
logistic_regression = LogisticRegression(max_iter=3000)
model = logistic_regression.fit(X_train, y_train)

print(f'Train accuracy: {model.score(X_train, y_train)}')
print(f'Test accuracy: {model.score(X_test, y_test)}')

Train accuracy: 0.9648351648351648
Test accuracy: 0.9385964912280702


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [30]:
from sklearn.linear_model import Lasso, Ridge, ElasticNet

boston = datasets.load_boston()
lasso = Lasso()
ridge = Ridge()
elasticnet = ElasticNet()
for model in [lasso, ridge, elasticnet]:
    X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.2, random_state=12)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print(model.__class__)
    print(f'MSE: {metrics.mean_squared_error(y_test, predictions)}')

<class 'sklearn.linear_model._coordinate_descent.Lasso'>
MSE: 29.131764829634708
<class 'sklearn.linear_model._ridge.Ridge'>
MSE: 20.71681582186216
<class 'sklearn.linear_model._coordinate_descent.ElasticNet'>
MSE: 27.897263904550158


# Кросс валидация

In [31]:
from sklearn.model_selection import KFold, cross_val_score
iris = datasets.load_iris()
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [32]:
logistic_regression = LogisticRegression()
cv = KFold(n_splits=5)

for split_idx, (train_idx, test_idx) in enumerate(cv.split(iris.data)):
    X_train, X_test = iris.data[train_idx], iris.data[test_idx]
    y_train, y_test = iris.target[train_idx], iris.target[test_idx]
    logistic_regression.fit(X_train, y_train)
    score = logistic_regression.score(X_test, y_test)
    print(f'Split {split_idx}   Score {score}')

Split 0   Score 1.0
Split 1   Score 1.0
Split 2   Score 0.8666666666666667
Split 3   Score 0.9333333333333333
Split 4   Score 0.8333333333333334


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

In [33]:
cv_score = cross_val_score(
    logistic_regression, iris.data, iris.target, scoring='accuracy', cv=cv
)
print(f'Cross val score: {cv_score}')
print(f'Mean cross val score: {cv_score.mean()}')

Cross val score: [1.         1.         0.86666667 0.93333333 0.83333333]
Mean cross val score: 0.9266666666666665


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

In [34]:
TN = 920
FP = 5
TP = 15
FN = 60

acc = (TP + TN)/(TN + FP + TP + FN)

precision = TP / (TP + FP)
recall = TP / (TP + FN)

f1 = (2 * precision * recall) / (precision + recall)

In [35]:
acc, precision, recall, f1

(0.935, 0.75, 0.2, 0.31578947368421056)

In [80]:
pred_1 = [0, 0, 1, 0, 1]
tar_1 = [0, 0, 1, 1, 1]
pred_2 = [1, 0, 1, 0, 1]

In [81]:
print(metrics.mean_squared_error(pred_1, tar_1, squared=False))
print(metrics.roc_auc_score(pred_1, tar_1))
print(metrics.mean_squared_error(pred_1, tar_1))
print(metrics.mean_absolute_error(pred_1, tar_1))
print(metrics.log_loss(pred_1, tar_1))

0.4472135954999579
0.8333333333333334
0.2
0.2
6.907915198468176


In [82]:
print(metrics.mean_squared_error(pred_2, tar_1, squared=False))
print(metrics.roc_auc_score(pred_2, tar_1))
print(metrics.mean_squared_error(pred_2, tar_1))
print(metrics.mean_absolute_error(pred_2, tar_1))
print(metrics.log_loss(pred_2, tar_1))

0.6324555320336759
0.5833333333333333
0.4
0.4
13.815670477450311
