# ML model examples: Logistic regression

### Acknowledgments & Credits

This lesson is adapted from the excellent curriculum materials by Cliburn Chan (2021) at https://github.com/cliburn/bios-823-2021/ under the MIT License.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

## Example dataset: Breast Cancer Wisconsin (Diagnostic) Data Set

See <https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Diagnostic)> for more information.

### Load and inspect the data

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
bc = load_breast_cancer(as_frame=True)

In [None]:
bc.data.head()

In [None]:
bc.target_names

In [None]:
bc.target.head()

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap import UMAP

In [None]:
dr_models = {
    'PCA': PCA(),
    't-SNE': TSNE(),
    'UMAP': UMAP(),
}

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

In [None]:
fig, axes = plt.subplots(1,3,figsize=(12,4))
axes = axes.ravel()

for i, (k, v) in enumerate(dr_models.items()):
    X = v.fit_transform(scaler.fit_transform(bc.data))
    target = bc.target
    ax = axes[i]
    ax.scatter(X[:, 0], X[:, 1], c=target)
    ax.set_xlabel(f'{k}1')
    ax.set_ylabel(f'{k}2')
    ax.set_xticks([])
    ax.set_yticks([])

### Split into training and test sets

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = bc.data
y = bc.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, stratify=y)

### Preprocess (standardize etc)

In [None]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
pd.Series(y_test).value_counts(normalize=True)

### Create and train models

In [None]:
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
# from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

In [None]:
sl_modles = dict(
    dummy = DummyClassifier(strategy='prior'),
    knn = KNeighborsClassifier(),
    lr = LogisticRegression(),
    svc = SVC(),
    nn = MLPClassifier(max_iter=500),
)

In [None]:
for name, clf in sl_modles.items():
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print(f'{name}: {score:.3f}')