# ML model examples: Linear and logistic regression

### Acknowledgments & Credits

This lesson is adapted from the excellent curriculum materials by Cliburn Chan (2021) at https://github.com/cliburn/bios-823-2021/ under the MIT License.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

## Logistic regression example: Breast Cancer Wisconsin (Diagnostic) Data Set

See <https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Diagnostic)> for more information.

### Load and inspect the data

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
bc = load_breast_cancer(as_frame=True)

In [None]:
bc.data

In [None]:
bc.target_names

In [None]:
bc.target.head()

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from umap import UMAP

In [None]:
dr_models = {
    'PCA': PCA(),
    't-SNE': TSNE(),
    'UMAP': UMAP(),
}

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

In [None]:
fig, axes = plt.subplots(1,3,figsize=(12,4))
axes = axes.ravel()

for i, (k, v) in enumerate(dr_models.items()):
    X = v.fit_transform(scaler.fit_transform(bc.data))
    target = bc.target
    ax = axes[i]
    ax.scatter(X[:, 0], X[:, 1], c=target)
    ax.set_xlabel(f'{k}1')
    ax.set_ylabel(f'{k}2')
    ax.set_xticks([])
    ax.set_yticks([])

### Split into training and test sets

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = bc.data
y = bc.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, stratify=y)

### Preprocess (standardize etc)

In [None]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
pd.Series(y_test).value_counts(normalize=True)

### Create and train models

In [None]:
from sklearn.dummy import DummyClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
# from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

In [None]:
sl_models = dict(
    dummy = DummyClassifier(strategy='prior'),
    knn = KNeighborsClassifier(),
    lr = LogisticRegression(),
    svc = SVC(),
    nn = MLPClassifier(max_iter=500),
)

In [None]:
for name, clf in sl_models.items():
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print(f'{name}: {score:.3f}')

## Linear Regression example


In [None]:
import numpy as np
import matplotlib.pyplot as plt

### Create data set

In [None]:
np.random.seed(123)
xs = np.random.normal(size=(25,)).astype('float32')
ys = 3*xs -1 + np.random.normal(scale=0.3, size=xs.shape).astype('float32')

In [None]:
plt.scatter(xs, ys)

#### Initial parameter guess

### Analytic solution

From linear algebra, we know that the analytic *least squares* solution can be found by projecting onto the column space of $X$ to give the normal equations

$$
\hat{\theta} = (X^TX)^{-1}X^T y
$$

where $X$ is the matrix augmented with an column of $\mathbf{1}$ to represent the intercept or bias.

In [None]:
X = np.c_[np.ones_like(xs), xs]
X

#### Solving normal equations directly

In [None]:
b, w = np.linalg.solve(X.T@X, X.T@ys)
print(f'w = {w:.2f}, b = {b:.2f}')

In [None]:
(X @ np.array([b, w])) - (w*xs + b)

#### Analytic solution using library function

In [None]:
(b, w), resid, rank, s = np.linalg.lstsq(X, ys, rcond=None)
print(f'w = {w:.4f}, b = {b:.4f}')

In [None]:
plt.scatter(xs, ys)
plt.plot(xs, w*xs + b, c='red')

### Using `keras` (and `tensorflow`)

In [None]:
# import tensorflow as tf
from keras import Sequential
import keras.layers as kl
import keras.ops as kops 
import keras.optimizers as koptim

In [None]:
def custom_loss(yhat, y):
    return kops.mean((yhat - y)**2)

In [None]:
model_keras = Sequential()
model_keras.add(kl.Input(shape=(1,)))
model_keras.add(kl.Dense(1, activation='linear'))
model_keras.summary()

In [None]:
model_keras.compile(
    optimizer = koptim.SGD(learning_rate=0.1),
    # loss='mse'
    loss = custom_loss
)

In [None]:
history = model_keras.fit(xs, ys, epochs=100, verbose=0)

In [None]:
[w.squeeze().item() for w in model_keras.get_weights()]

In [None]:
plt.scatter(xs, ys)
plt.plot(xs, model_keras.predict(xs), c='red')

### Using PyTorch

In [None]:
import torch
from torch import optim, nn

In [None]:
model_torch = nn.Sequential(
    nn.Linear(in_features=1, out_features=1)
)

In [None]:
optimizer = optim.SGD(model_torch.parameters(), lr=0.1)

In [None]:
def loss_torch(yhat, y):
    return torch.mean((yhat - y)**2)
# loss_torch = nn.MSELoss()

In [None]:
xs_tensor = torch.as_tensor(xs).unsqueeze(1)
ys_tensor = torch.as_tensor(ys).unsqueeze(1)

Inputs have shape batch_size × num_features:

In [None]:
xs_tensor.shape, ys_tensor.shape

In [None]:
ys_tensor

In [None]:
yhat = model_torch(xs_tensor)
yhat

In [None]:
loss = loss_torch(yhat, ys_tensor)
loss

In [None]:
ys_tensor.requires_grad, yhat.requires_grad, loss.requires_grad

In [None]:
loss.backward()

In [None]:
model_torch[0].weight.grad, model_torch[0].bias.grad

In [None]:
optimizer.step()
optimizer.zero_grad()

In [None]:
for epoch in range(1000):
    yhat = model_torch(xs_tensor)
    loss = loss_torch(yhat, ys_tensor)

    loss.backward() 
    optimizer.step()
    optimizer.zero_grad()


In [None]:
model_torch.state_dict()

In [None]:
plt.scatter(xs, ys)
plt.plot(xs, model_torch(xs_tensor).detach(), c='red')

## Underfitting and overfitting

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import torch
from torch import optim, nn

In [None]:
bc = load_breast_cancer(as_frame=True)
bc.data

In [None]:
X = bc.data['worst radius']
y = bc.data['worst area']

In [None]:
plt.scatter(X, np.sqrt(y))

In [None]:
def build_model(hidden_units=1, activation=None):
    model = nn.Sequential(
        nn.Linear(in_features=1, out_features=hidden_units),
    )
    if (hidden_units > 1):
        if activation:
            model.append(activation)
        model.append(nn.Linear(in_features=hidden_units, out_features=1))
    return model


In [None]:
model = build_model(hidden_units=12, activation=nn.GELU())
loss_fn = nn.MSELoss()
# optimizer = optim.SGD(model.parameters(), lr=1e-3)
optimizer = optim.Adam(model.parameters(), lr=1e-2)

In [None]:
model

In [None]:
X_tensor = torch.as_tensor(X).unsqueeze(1).float()
y_tensor = torch.as_tensor(y).unsqueeze(1).float()


In [None]:
X_tensor.shape, y_tensor.shape

In [None]:
def train_model(model, X, y, loss_fn, optimizer, epochs=10_000):
    X = torch.as_tensor(X).unsqueeze(1).float()
    y = torch.as_tensor(y).unsqueeze(1).float()
    for epoch in range(1, 1+epochs):
        yhat = model(X)
        loss = loss_fn(yhat, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if epoch == 1 or epoch % 1000 == 0:
            print(f'Epoch {epoch:6d}, Train loss {loss:f}')
    return model, X, y

In [None]:
model, X_tensor, y_tensor = train_model(model=model, X=X, y=y,
                                        loss_fn=loss_fn, optimizer=optimizer,
                                        epochs=20_000)

In [None]:
model.state_dict()

In [None]:
plt.scatter(X_tensor, y_tensor)
X_seq = torch.linspace(X_tensor.min(), X_tensor.max(), 500).unsqueeze(1)
plt.plot(X_seq, model(X_seq).detach(), c='red')