In [1]:
import numpy as np
from scipy.optimize import least_squares
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the MNIST dataset
mnist = fetch_openml("mnist_784", version=1)

In [3]:
x = mnist.data
y = mnist.target

# (o dataset já vem embaralhado)

# The pixel intensities are scaled to lie between 0 and 1 
x = x / 255.0

# change y to be 1 when it's 0, and -1 when it's not 0
y = np.where(y == '0', 1, -1)

x_train = x[:60000]
y_train = y[:60000]

x_train = np.array(x_train)
y_train = np.array(y_train)

x_test = x[60000:]
y_test = y[60000:]

x_test = np.array(x_test)
y_test = np.array(y_test)

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(60000, 784) (60000,) (10000, 784) (10000,)


In [4]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(x_train)
X_test = scaler.transform(x_test)

In [5]:
def sigmoid(z):
    return np.tanh(z)

# Residual function for LM
def residuals(params, X, y, lam):
    beta = params[:-1]
    v = params[-1]
    preds = sigmoid(X @ beta + v)
    residual = preds - y
    # Add regularization
    regularization = lam * beta
    return np.concatenate([residual, regularization])

In [6]:
# Initial parameters
n_features = X_train.shape[1]
initial_params = np.zeros(n_features + 1)
lam = 100 # Regularization parameter

In [None]:
result = least_squares(residuals, initial_params, args=(X_train, y_train, lam), verbose=2)

In [11]:
print(result.optimality)
print(result.cost)

0.19689954964553635
1768.7499162228255


In [12]:
# Extract optimized parameters
beta_opt = result.x[:-1]
v_opt = result.x[-1]

In [13]:
# Predictions
y_hat = np.sign(sigmoid(X_test @ beta_opt + v_opt))

In [14]:
# compare y_hat with y_test
accuracy = np.mean(y_hat == y_test)

print(f'Precisão de {accuracy*100:.2f}%')

Precisão de 99.04%
