In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import seaborn as sns
import numpy as np
import pandas as pd
import cvxpy as cp

In [21]:
def generate_synthetic_fico_data(n=10000):
    np.random.seed(42)  # For reproducibility
    
    # Simulate Payment History
    # A mix of positive and negative factors
    payment_history = np.random.normal(700, 100, n) - np.random.choice([0, 20, 50, 100], n, p=[0.7, 0.15, 0.1, 0.05])
    
    # Simulate Amounts Owed
    # Influence of credit utilization and total balance
    amounts_owed = 850 - np.random.beta(2, 5, n) * 500  # Beta distribution for utilization skew
    
    # Simulate Length of Credit History
    # Older accounts generally mean higher scores
    length_of_credit_history = np.random.choice(range(300, 850), n, replace=True)
    
    # Simulate Credit Mix
    # More diverse credit types yield higher scores
    credit_mix = np.random.choice(range(600, 850), n, replace=True)
    
    # Simulate New Credit
    # Recent credit activities lower the score
    new_credit = 850 - np.random.gamma(2, 100, n)
    
    # Combine into DataFrame
    data = {
        'Payment History': payment_history,
        'Amounts Owed': amounts_owed,
        'Length of Credit History': length_of_credit_history,
        'Credit Mix': credit_mix,
        'New Credit': new_credit
    }
    df = pd.DataFrame(data) # Divide by 10 to scale down the values
    
    # Calculate FICO Score with given weights
    weights = {'Payment History': 0.45, 'Amounts Owed': 0.20, 'Length of Credit History': 0.15, 'Credit Mix': 0.10, 'New Credit': 0.10}
    df['FICO Score'] = df.apply(lambda row: sum(row[feature] * weight for feature, weight in weights.items()), axis=1)
    
    return df


def w(beta, gamma):
    """
    Applies the transformation to beta.
    
    Args:
    beta (float): The input weight.
    gamma (float): The gamma parameter for the transformation.

    Returns:
    float: The transformed weight.
    """
    return np.exp(-(-np.log(beta))**gamma)


def behavioral(weights, gamma):
    """
    Modifies the weights according to the specified formula.
    
    Args:
    weights (np.array): Original model weights.
    gamma (float): The gamma parameter for the transformation.

    Returns:
    np.array: Modified weights reflecting behavioral adjustments.
    """
    sorted_indices = np.argsort(weights)  # Sort indices of weights
    sorted_weights = weights[sorted_indices]  # Sort weights
    transformed_weights = [w(beta, gamma) for beta in sorted_weights]
    
    perceived_weights = np.zeros_like(weights)
    for i in range(len(weights)):
        sum_current = sum(transformed_weights[i:])
        sum_next = sum(transformed_weights[i+1:]) if i+1 < len(weights) else 0
        perceived_weights[sorted_indices[i]] = sum_current - sum_next

    return perceived_weights

In [22]:
# Generate the dataset
df = generate_synthetic_fico_data()
fico_df = df.copy()

In [23]:
# Sigmoid function to calculate probability
def sigmoid(x):
    return 1 / (1 + np.exp(-0.1 * (x - 650)))

# Calculate the approval probability for each individual
fico_df['Approval Probability'] = sigmoid(fico_df['FICO Score'])

# Assign "true" labels based on approval probability being above a random threshold
# This introduces randomness in the assignment, making some borderline cases get approved or denied
fico_df['True Label'] = (fico_df['Approval Probability'] > np.random.rand(fico_df.shape[0])).astype(int)

fico_df.head(10)

Unnamed: 0,Payment History,Amounts Owed,Length of Credit History,Credit Mix,New Credit,FICO Score,Approval Probability,True Label
0,749.671415,706.285266,690,670,741.096754,723.218866,0.99934,1
1,686.17357,568.537433,624,685,719.431848,656.528778,0.657659,1
2,764.768854,782.821399,559,609,535.475954,699.007859,0.992614,1
3,852.302986,779.838422,500,609,774.948796,752.898908,0.999966,1
4,676.584663,802.699392,848,638,781.528487,734.155825,0.999779,1
5,676.586304,703.907235,417,664,776.518005,651.847084,0.546046,0
6,857.921282,652.638288,813,712,750.692035,784.811438,0.999999,1
7,756.743473,794.64831,530,657,256.070511,670.271276,0.883616,1
8,653.052561,732.876715,316,843,742.269311,646.375927,0.410377,1
9,734.256004,744.433418,795,636,514.639393,713.615825,0.998276,1


In [40]:
import cvxpy as cp

def logistic_regression_with_sum_constraint(X, y):
    """
    Train logistic regression model with the constraint that coefficients sum to 1.
    Args:
    X (pd.DataFrame): Feature dataframe.
    y (pd.Series): Target vector.

    Returns:
    tuple: (coefficients, threshold)
    """
    n_samples, n_features = X.shape
    
    # Variables
    beta = cp.Variable(n_features)
    intercept = cp.Variable()

    # Logistic loss
    logits = X.values @ beta + intercept
    log_likelihood = cp.sum(
        cp.multiply(y.values, logits) - cp.logistic(logits)
    )

    # Objective and constraints
    objective = cp.Maximize(log_likelihood)
    constraints = [cp.sum(beta) == 1]

    # Problem
    problem = cp.Problem(objective, constraints)
    problem.solve()

    return beta.value, intercept.value

# # Example usage to get beta and threshold
# X = pd.DataFrame([[0.1, 0.2], [0.4, 0.5], [0.7, 0.8], [0.3, 0.6], [0.5, 0.9]], columns=['Feature1', 'Feature2'])
# y = pd.Series([0, 1, 1, 0, 1])

# beta, threshold = logistic_regression_with_sum_constraint(X, y)
# print("Beta coefficients:", beta)
# print("Threshold:", threshold)


In [41]:
from sklearn.model_selection import train_test_split

# Example dataset loading
# Assuming 'FICO Score' is not used as a feature directly in the model training
X = fico_df[['Payment History', 'Amounts Owed', 'Length of Credit History', 'Credit Mix', 'New Credit']]  # model features
y = fico_df['True Label']  # the target variable

# Splitting the dataset
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.3, random_state=42)

# Train the logistic regression model
beta, threshold = logistic_regression_with_sum_constraint(X_train, y_train)
print("Beta coefficients:", beta)
print("Threshold:", threshold)


    Your problem is being solved with the ECOS solver by default. Starting in 
    CVXPY 1.5.0, Clarabel will be used as the default solver instead. To continue 
    using ECOS, specify the ECOS solver explicitly using the ``solver=cp.ECOS`` 
    argument to the ``problem.solve`` method.
    


Beta coefficients: [0.43061013 0.2051757  0.14545218 0.11374165 0.10502033]
Threshold: -652.848471173721


In [42]:
# feature names
feature_names = X.columns

# feature weights
coefficients = beta

# Printing feature weights
print("Feature weights:")
for feature, coef in zip(feature_names, coefficients):
    print(f"{feature}: {coef:.4f}")

    # print perceived weights
perceived_weights = behavioral(coefficients, gamma=0.5)
print("\nPerceived Feature weights:")
for feature, coef in zip(feature_names, perceived_weights):
    print(f"{feature}: {coef:.4f}")

Feature weights:
Payment History: 0.4306
Amounts Owed: 0.2052
Length of Credit History: 0.1455
Credit Mix: 0.1137
New Credit: 0.1050

Perceived Feature weights:
Payment History: 0.3994
Amounts Owed: 0.2841
Length of Credit History: 0.2495
Credit Mix: 0.2289
New Credit: 0.2229


In [51]:
import pandas as pd
import numpy as np
import cvxpy as cp

def logistic_regression_classifier(x, beta, threshold):
    score = np.dot(beta, x) + threshold
    return 1 if score >= 0 else 0

def cost_function(z, x_0):
    return np.sum((z - x_0) ** 2)

def agent_optimization(x_0, beta, threshold, R):
    n = len(x_0)
    z = cp.Variable(n)  # manipulated feature vector
    t = cp.Variable(boolean=True)  # binary variable for classification

    # Define the constraints
    constraints = [t == (beta @ z + threshold >= 0)]

    # Define the objective function
    objective = cp.Maximize(R * t - cp.sum_squares(z - x_0))

    # Define the problem and solve it
    problem = cp.Problem(objective, constraints)
    problem.solve()

    return z.value

def logistic_regression_with_sum_constraint(X, y):
    n_samples, n_features = X.shape
    
    # Variables
    beta = cp.Variable(n_features)
    intercept = cp.Variable()

    # Logistic loss
    logits = X.values @ beta + intercept
    log_likelihood = cp.sum(
        cp.multiply(y.values, logits) - cp.logistic(logits)
    )

    # Objective and constraints
    objective = cp.Maximize(log_likelihood)
    constraints = [cp.sum(beta) == 1]

    # Problem
    problem = cp.Problem(objective, constraints)
    problem.solve()

    return beta.value, intercept.value

def firm_payoff(y_true, y_pred):
    true_positives = np.sum((y_true == 1) & (y_pred == 1))
    false_negatives = np.sum((y_true == 1) & (y_pred == 0))
    return true_positives - false_negatives

def print_firm_payoff(X, y, beta, threshold):
    y_pred = np.array([logistic_regression_classifier(x, beta, threshold) for x in X.values])
    payoff = firm_payoff(y.values, y_pred)
    print("Firm's payoff:", payoff)

def agents_decision_boundary_optimize(X, beta, threshold, D):
    optimized_X = X.copy()
    for i, x in X.iterrows():
        score = np.dot(beta, x) + threshold
        distance = np.abs(score) / np.linalg.norm(beta)
        if distance <= D:
            optimized_X.iloc[i] = agent_optimization(x, beta, threshold, R=10.0)
    return optimized_X

def total_optimization_cost(X, optimized_X):
    total_cost = 0
    for x, opt_x in zip(X.values, optimized_X.values):
        total_cost += cost_function(opt_x, x)
    return total_cost

def run_optimization_loop(X, y, D, max_iterations=20):
    beta, threshold = logistic_regression_with_sum_constraint(X, y)
    print("Initial beta:", beta)
    print("Initial threshold:", threshold)
    
    for iteration in range(max_iterations):
        print(f"\nIteration {iteration + 1}")
        
        # Print firm's payoff before optimization
        print("Firm's payoff before optimization:")
        print_firm_payoff(X, y, beta, threshold)
        
        # Agents optimize their features
        optimized_X = agents_decision_boundary_optimize(X, beta, threshold, D)
        
        # Print firm's payoff after optimization
        print("Firm's payoff after optimization:")
        print_firm_payoff(optimized_X, y, beta, threshold)
        
        # Print total cost users pay for trying to qualify
        total_cost = total_optimization_cost(X, optimized_X)
        print("Total optimization cost for users:", total_cost)
        
        # Print agents' features after optimization
        print("Agents' features after optimization:")
        print(optimized_X)
        
        # Create a new dataset with optimized features
        X = optimized_X
        
        # Train the firm on the new dataset
        new_beta, new_threshold = logistic_regression_with_sum_constraint(X, y)
        print("New beta:", new_beta)
        print("New threshold:", new_threshold)
        
        # Check for convergence
        if np.allclose(beta, new_beta):
            print("Converged.")
            break
        
        beta, threshold = new_beta, new_threshold

# Example usage with dataset X, y
X = pd.DataFrame([[0.1, 0.2], [0.4, 0.5], [0.7, 0.8], [0.3, 0.6], [0.5, 0.9]], columns=['Feature1', 'Feature2'])
y = pd.Series([0, 1, 1, 0, 1])
D = 0.3  # Define distance threshold
run_optimization_loop(X, y, D)


Initial beta: [ 1.14533869 -0.14533869]
Initial threshold: 0.038590477669974144

Iteration 1
Firm's payoff before optimization:
Firm's payoff: 3


    Your problem is being solved with the ECOS solver by default. Starting in 
    CVXPY 1.5.0, Clarabel will be used as the default solver instead. To continue 
    using ECOS, specify the ECOS solver explicitly using the ``solver=cp.ECOS`` 
    argument to the ``problem.solve`` method.
    


TypeError: float() argument must be a string or a number, not 'Inequality'

In [82]:
import cvxpy as cp
import numpy as np

def agent_optimization(x_0, beta, threshold, R):
    # Define the dimensions
    n = len(beta)
    
    # Define the decision variables
    z = cp.Variable(n)
    y = cp.Variable(boolean=True)
    
    # Define the objective function
    c = lambda z, x_0: cp.norm(z - x_0, 2)  # cost function: norm between z and x_0
    objective = cp.Maximize(R * y - c(z, x_0))
    
    # Define the big-M value
    M = 1e6  # A sufficiently large number
    # epsilon = 1e-6  # A small number to ensure strict inequality
    
    # Define the constraints
    constraints = [
        beta.T @ z >= threshold - M * (1 - y),
        beta.T @ z <= threshold + M * y,
        y >= 0,
        y <= 1
    ]
    
    # Formulate and solve the problem
    problem = cp.Problem(objective, constraints)
    problem.solve()
    
    # Retrieve and return the results
    z_opt = z.value
    y_opt = y.value
    
    return z_opt, y_opt, objective.value

# Example usage
x_0 = np.array([1, 2])
beta = np.array([0.5, 0.5])
threshold = 11
R = 10

z_opt, y_opt, obj = agent_optimization(x_0, beta, threshold, R)
print(f"Optimal z: {z_opt}")
print(f"Optimal y: {y_opt}")
print(f"Objective value: {obj}")

Optimal z: [1. 2.]
Optimal y: 0.9999904118106796
Objective value: 9.999904118106796
