In [1]:
import numpy as np
from scipy.optimize import least_squares
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the MNIST dataset
mnist = fetch_openml("mnist_784", version=1)

In [3]:
x = mnist.data
y = mnist.target

# (o dataset já vem embaralhado)

# The pixel intensities are scaled to lie between 0 and 1 
x = x / 255.0

# change y to be 1 when it's 0, and -1 when it's not 0
y = np.where(y == '0', 1, -1)

x_train = x[:60000]
y_train = y[:60000]

x_train = np.array(x_train)
y_train = np.array(y_train)

x_test = x[60000:]
y_test = y[60000:]

x_test = np.array(x_test)
y_test = np.array(y_test)

# add extra column of 1's to x_train and x_test (at the beginning)
x_train = np.insert(x_train, 0, 1, axis=1)
x_test = np.insert(x_test, 0, 1, axis=1)

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(60000, 785) (60000,) (10000, 785) (10000,)


In [4]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(x_train)
X_test = scaler.transform(x_test)

In [5]:
def sigmoid(z):
    return np.tanh(z)

# Residual function for LM
def residuals(params, X, y, lam):
    beta = params[:-1]
    v = params[-1]
    preds = sigmoid(X @ beta + v)
    residual = preds - y
    # Add regularization
    regularization = lam * beta
    return np.concatenate([residual, regularization])

In [6]:
# Initial parameters
n_features = X_train.shape[1]
initial_params = np.zeros(n_features + 1)
lam = 100 # Regularization parameter

In [7]:
# Solve using Levenberg-Marquardt
result = least_squares(residuals, initial_params, args=(X_train, y_train, lam), verbose=2)

   Iteration     Total nfev        Cost      Cost reduction    Step norm     Optimality   
       0              1         3.0000e+04                                    4.82e+04    
       1              2         4.2237e+03      2.58e+04       8.17e-01       5.22e+03    
       2              3         2.0743e+03      2.15e+03       5.32e-01       8.34e+02    
       3              4         1.7752e+03      2.99e+02       4.86e-01       8.40e+01    
       4              5         1.7691e+03      6.09e+00       8.11e-02       2.38e+01    
       5              6         1.7688e+03      2.79e-01       2.37e-02       1.40e+01    
       6              7         1.7688e+03      4.67e-02       1.15e-02       8.20e+00    
       7              8         1.7688e+03      1.08e-02       6.22e-03       5.13e+00    
       8              9         1.7688e+03      3.49e-03       3.64e-03       3.18e+00    
       9             10         1.7688e+03      1.20e-03       2.19e-03       2.00e+00    

In [8]:
print(result.optimality)
print(result.cost)

0.19689954964553635
1768.7499162228255


In [9]:
# Extract optimized parameters
beta_opt = result.x[:-1]
v_opt = result.x[-1]

In [10]:
# Predictions
y_hat = np.sign(sigmoid(X_test @ beta_opt + v_opt))

In [11]:
# compare y_hat with y_test
accuracy = np.mean(y_hat == y_test)

print(f'Precisão de {accuracy*100:.2f}%')

Precisão de 99.04%
