How can Luigi Mangione be super duper fine

In [74]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

# Load the dataset
iris = load_iris()
# Prepare the data
X = iris.data  # Feature matrix
y = iris.target  # Target vector

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Use softmax regression (multi-class logistic regression)
target_model = LogisticRegression(multi_class="multinomial", solver="lbfgs", max_iter=200)
target_model.fit(X_train, y_train)

# Predict on the test set
y_pred = target_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

accuracy, y_pred[:5]  # Show accuracy and first 5 predictions
print(accuracy)

1.0




In [138]:
target_model.classes_

array([0, 1, 2])

In [75]:
# target_model.predict_log_proba(X_test)
encoder = OneHotEncoder(sparse_output=False)
y_test = encoder.fit_transform(pd.DataFrame(y_test))
print(y_test)

[[0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]]


In [79]:
X_test.shape, y_test.shape

((30, 4), (30, 3))

In [106]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)
def multinomial_loss(W, X, y, lambda_reg):
    W = W.reshape(X.shape[1], -1)
    epsilon = 1e-6
    p_hat = softmax(X @ W)
    loss = -np.mean(np.sum(np.log(p_hat + epsilon) * y,axis=1)) + .5 * lambda_reg * np.sum(W**2)
    return loss
def multinomial_grad(W, X, y, lambda_reg):
    W = W.reshape(X.shape[1], -1)
    cost = softmax(X @ W) - y
    gradient = 1/X.shape[0] * X.T @ cost + lambda_reg * W
    gradient = gradient.reshape(-1)
    return gradient

def find_score(W, X):
    W.reshape(X.shape[1], -1)
    p_hat = softmax(X @ W)
    return np.argmax(p_hat, axis=1)

    


In [98]:
W = np.random.randn(12)
print(multinomial_loss(W, X_test, y_test, 0.01))
multinomial_grad(W, X_test, y_test, 0.01)

4.640252825158451


array([ 4.26620038, -1.79629018, -2.45492655,  1.91577184, -0.84647421,
       -1.09461994,  3.35770367, -1.31010827, -2.04555733,  1.17620446,
       -0.38955644, -0.75872396])

In [135]:
import numpy as np
from scipy.optimize import minimize

def run_opti(loss, gradient, X, Y, w_dim):
    k = Y.shape[1] # Number of classes

    best_w = None
    best_acc = 0
    num_classes = 3
    acc = []
    alphas = [10**x for x in range(-20, 4)] # regularization terms
    fprimes = [gradient]

    for fprime in fprimes:
        for alpha in alphas:
            w0 = 1e-8 * np.random.randn(X.shape[1] * num_classes)

            num_unknows = len(w0.ravel())
            method = "BFGS"
            if num_unknows > 1000:
                method = "L-BFGS-B"
            # try:
            optimLogitBFGS = minimize(loss, x0=w0,
                                        method = method,
                                        args = (X, Y, alpha),
                                        jac = fprime,
                                        options={'gtol': 1e-6,
                                                'disp': True,
                                                'maxiter': 100})
            wopt = optimLogitBFGS.x
            # print(multinomial_loss(wopt, X, Y, alpha))
            # return wopt
            wopt_reshape = wopt.reshape(X.shape[1], 3)
            clonemodel_res = np.argmax(softmax(X @ wopt_reshape), axis = 1)
            true_y = target_model.predict(X)
            tempacc= (np.mean(true_y == clonemodel_res))
            acc.append(tempacc)
            # except ValueError:
            #     print(f"Failed to optimize with alpha={alpha} and method={method}")
            #     wopt = np.zeros(w0.shape)
    return acc
    


In [136]:
accs= run_opti(multinomial_loss, multinomial_grad, X_test, y_test, (X_test.shape[1], 3))

Optimization terminated successfully.
         Current function value: -0.000001
         Iterations: 31
         Function evaluations: 33
         Gradient evaluations: 33
Optimization terminated successfully.
         Current function value: -0.000001
         Iterations: 31
         Function evaluations: 33
         Gradient evaluations: 33
Optimization terminated successfully.
         Current function value: -0.000001
         Iterations: 31
         Function evaluations: 33
         Gradient evaluations: 33
Optimization terminated successfully.
         Current function value: -0.000001
         Iterations: 31
         Function evaluations: 33
         Gradient evaluations: 33
Optimization terminated successfully.
         Current function value: -0.000001
         Iterations: 31
         Function evaluations: 33
         Gradient evaluations: 33
Optimization terminated successfully.
         Current function value: -0.000001
         Iterations: 31
         Function evaluations:

  res = _minimize_bfgs(fun, x0, args, jac, callback, **options)
  res = _minimize_bfgs(fun, x0, args, jac, callback, **options)
  res = _minimize_bfgs(fun, x0, args, jac, callback, **options)
  res = _minimize_bfgs(fun, x0, args, jac, callback, **options)


In [137]:
accs

[1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 0.8666666666666667,
 0.7,
 0.36666666666666664,
 0.36666666666666664,
 0.36666666666666664]

[0 0 1 0 0 2 1 0 0 0 2 1 1 0 0 1 2 2 1 2 1 2 2 0 2 1 0 0 0 1 2 0 0 0 1 0 1
 2 0 1 2 0 2 2 1 2 2 1 0 1 2 0 0 1 1 0 2 0 0 2 1 2 2 2 2 1 0 0 2 2 0 0 0 1
 2 0 2 2 0 1 1 2 1 2 0 2 1 2 1 1 1 1 1 1 0 1 2 2 0 1 2 1 0 2 0 1 2 2 1 2 1
 1 2 2 0 1 2 0 1 2]


In [115]:
onehotypred = OneHotEncoder(sparse_output=False).fit_transform(pd.DataFrame(clone))

true_y = target_model.predict(X_train)


In [121]:
np.mean(true_y == clone)

0.9583333333333334

## Minimize weights (binary case)

In [None]:
def multinomial_loss(W, X, y, lambda_reg):
    epsilon = 1e-6
    p_hat = softmax(X @ W)
    loss = -np.mean(np.sum(np.log(p_hat + epsilon) * y,axis=1)) + .5 * lambda_reg * np.sum(W**2)
    return loss

In [41]:
def multinomial_loss2(W, X, y, lambda_reg):
    p_hat = softmax(X @ W_init)
    epsilon = 1e-6
    return np.mean(np.log(p_hat*epsilon)*y) + .5 * lambda_reg * np.sum(W**2)

def multinomial_grad2(W, X, y, lambda_reg):
    cost = softmax(X @ W) - y
    gradient = 1/X.shape[0] * X.T @ cost + lambda_reg * W
    return gradient


In [40]:
multinomial_loss2(W_init, X, y, lambda_reg)

-15.405764864688257

In [43]:
multinomial_grad(W_init, X, y, lambda_reg)

array([-0.37536781, -0.3225786 , -0.33816599, -0.37609352])

In [44]:
result = minimize(
    fun=multinomial_loss2,
    x0=W_init,
    args=(X, y, lambda_reg),
    method="L-BFGS-B",
    jac=multinomial_grad2,
    options={"disp": True}
)

In [48]:
import numpy as np
from scipy.optimize import minimize
from scipy.special import softmax

# Define the multinomial loss
def multinomial_loss(W, X, y, lambda_reg):
    epsilon = 1e-6
    W = W.reshape(X.shape[1], -1)  # Reshape W to correct dimensions
    p_hat = softmax(X @ W, axis=1)
    loss = -np.mean(np.sum(np.log(p_hat + epsilon) * y, axis=1)) + 0.5 * lambda_reg * np.sum(W**2)
    return loss

# Define the gradient of the multinomial loss
def multinomial_grad(W, X, y, lambda_reg):
    W = W.reshape(X.shape[1], -1)  # Reshape W to correct dimensions
    cost = softmax(X @ W, axis=1) - y
    grad = (1 / X.shape[0]) * X.T @ cost + lambda_reg * W
    return grad.flatten()  # Return as a flat array for optimizer compatibility

# Simulated data
np.random.seed(42)
n_samples = 100
n_features = 5
n_classes = 3
X = np.random.rand(n_samples, n_features)
y = np.eye(n_classes)[np.random.choice(n_classes, n_samples)]  # One-hot encoded labels

# Regularization parameter
lambda_reg = 0.1

# Initial guess for weights
W_init = np.random.rand(n_features, n_classes).flatten()

# Minimize the loss function
result = minimize(
    fun=multinomial_loss,
    x0=W_init,
    args=(X, y, lambda_reg),
    method="L-BFGS-B",
    jac=multinomial_grad,
    options={"disp": True}
)

# Extract the optimized weight matrix
W_opt = result.x.reshape(n_features, n_classes)

# Display results
print("Optimal Weight Matrix (W):")
print(W_opt)
print("\nFinal Loss:", result.fun)


Optimal Weight Matrix (W):
[[ 0.16171177 -0.13798981 -0.02372588]
 [ 0.23969347 -0.06567177 -0.17402611]
 [-0.15554053  0.08163731  0.07390005]
 [ 0.10807501 -0.07850352 -0.02957389]
 [-0.05274769 -0.01572479  0.06846955]]

Final Loss: 1.0781224178155708


In [49]:
multinomial_grad(W_init, X, y, lambda_reg)

array([ 0.0002774 ,  0.12586429,  0.09676574,  0.00332422,  0.10963249,
        0.13713105,  0.02793491,  0.09639048,  0.05562939, -0.07426768,
        0.14127733,  0.06947489, -0.00478114,  0.14959968,  0.02111559])

(100, 5)

In [10]:
import pandas as pd
import numpy as np
def all_pairs(Y): #generate all pairs with different labels
    
    classes = pd.Series(Y).unique().tolist()
    return [(i, j)
            for i in range(len(Y))              # go over all points
            for c in classes                    # and all other classes
            if c != Y[i]
            for j in np.where(Y == c)[0][0:1]   # and build a pair
            if i > j]

In [21]:
all_pairs(np.array([1,2,0,1,2,2,0,1]))

[(1, 0),
 (2, 0),
 (2, 1),
 (3, 1),
 (3, 2),
 (4, 0),
 (4, 2),
 (5, 0),
 (5, 2),
 (6, 0),
 (6, 1),
 (7, 1),
 (7, 2)]

In [27]:
from scipy.spatial.distance import cdist, pdist, squareform
Y = np.array([1,2,0,1,2,2,0,1])
classes = pd.Series(Y).unique().tolist()

np.where(Y == 2)
squareform(pdist([[1,2,3],[4,5,6],[10,11,12]], 'euclidean'))

array([[ 0.        ,  5.19615242, 15.58845727],
       [ 5.19615242,  0.        , 10.39230485],
       [15.58845727, 10.39230485,  0.        ]])

[1, 2, 0]