In [None]:

import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, StratifiedKFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score, cohen_kappa_score, matthews_corrcoef, precision_recall_curve
import joblib

data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
print('Accuracy:', accuracy_score(y_test, model.predict(X_test)))

In [None]:
model = LogisticRegression(penalty='l1', solver='liblinear')
model.fit(X_train, y_train)
print('Accuracy:', accuracy_score(y_test, model.predict(X_test)))

In [None]:
model = LogisticRegression(penalty='l2', max_iter=1000)
model.fit(X_train, y_train)
print('Accuracy:', accuracy_score(y_test, model.predict(X_test)))

In [None]:
model = LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.5, max_iter=1000)
model.fit(X_train, y_train)
print('Accuracy:', accuracy_score(y_test, model.predict(X_test)))

In [None]:
params={'C':[0.1,1,10],'penalty':['l1','l2'],'solver':['liblinear']}
grid=GridSearchCV(LogisticRegression(),params,cv=5)
grid.fit(X_train,y_train)
print(grid.best_params_)

In [None]:
params={'C':[0.01,0.1,1,10]}
rand=RandomizedSearchCV(LogisticRegression(max_iter=1000),params,n_iter=3)
rand.fit(X_train,y_train)
print(rand.best_params_)

In [None]:
cv=StratifiedKFold(n_splits=5)
scores=cross_val_score(LogisticRegression(max_iter=1000),X,y,cv=cv)
print(scores.mean())

In [None]:
scaler=StandardScaler()
X_scaled=scaler.fit_transform(X)
Xtr,Xte,ytr,yte=train_test_split(X_scaled,y,test_size=0.2)
model.fit(Xtr,ytr)
print(accuracy_score(yte,model.predict(Xte)))

In [None]:
print(confusion_matrix(y_test,model.predict(X_test)))

In [None]:
y_pred=model.predict(X_test)
print(precision_score(y_test,y_pred), recall_score(y_test,y_pred), f1_score(y_test,y_pred))

In [None]:
model=LogisticRegression(class_weight='balanced',max_iter=1000)
model.fit(X_train,y_train)
print(accuracy_score(y_test,model.predict(X_test)))

In [None]:
y_prob=model.predict_proba(X_test)[:,1]
print(roc_auc_score(y_test,y_prob))

In [None]:
print(cohen_kappa_score(y_test,model.predict(X_test)))

In [None]:
print(matthews_corrcoef(y_test,model.predict(X_test)))

In [None]:
precision,recall,_=precision_recall_curve(y_test,y_prob)
print('PR curve generated')

In [None]:
for s in ['liblinear','lbfgs','saga']:
 m=LogisticRegression(solver=s,max_iter=1000)
 m.fit(X_train,y_train)
 print(s,accuracy_score(y_test,m.predict(X_test)))

In [None]:
model=LogisticRegression(C=0.5,max_iter=1000)
model.fit(X_train,y_train)
print(accuracy_score(y_test,model.predict(X_test)))

In [None]:
print(np.abs(model.coef_[0]))

In [None]:
raw_acc=accuracy_score(y_test,model.predict(X_test))
Xs=StandardScaler().fit_transform(X)
Xt,Xe,yt,ye=train_test_split(Xs,y,test_size=0.2)
model.fit(Xt,yt)
print(raw_acc, accuracy_score(ye,model.predict(Xe)))

In [None]:
for c in [0.01,0.1,1,10]:
 m=LogisticRegression(C=c,max_iter=1000)
 print(c, cross_val_score(m,X,y,cv=5).mean())

In [None]:
model=LogisticRegression(multi_class='ovr',max_iter=1000)
model.fit(X_train,y_train)
print(accuracy_score(y_test,model.predict(X_test)))

In [None]:
df=pd.DataFrame(X)
model.fit(X_train,y_train)
print(accuracy_score(y_test,model.predict(X_test)))

In [None]:
for p in ['l1','l2']:
 m=LogisticRegression(penalty=p,solver='liblinear')
 m.fit(X_train,y_train)
 print(p,accuracy_score(y_test,m.predict(X_test)))

In [None]:
joblib.dump(model,'log.pkl')
loaded=joblib.load('log.pkl')
print(accuracy_score(y_test,loaded.predict(X_test)))

In [None]:
print(model.predict(X_test[:5]))