In [21]:
import numpy as np
import pandas as pd

from pyqubo import Array
import neal
import matplotlib.pyplot as plt
import requests


In [28]:

# Load the CSV file
file_path = '../data_p/quantum_data.address_class4.csv'
df = pd.read_csv(file_path)

# Extracting each column as an array
columns = df.columns
features = df[columns[:-1]]  # All columns except the last one
result = df[columns[-1]]    # The last column
n_features = features.shape[1]

# Calculate the correlation matrix for features
feature_correlation = features.corr(method='spearman')

# Calculate the correlation of each feature with the result
result_correlation = features.apply(lambda x: x.corr(result))

feature_correlation, result_correlation, n_features



n_tx                      -0.253882
total_days                -0.212742
total_spent_btc            0.022361
total_received_btc         0.019558
total_spent_usd            0.025373
                             ...   
dist_receive_4th_moment   -0.322040
dist_payback_1st_moment   -0.273879
dist_payback_2nd_moment   -0.256793
dist_payback_3rd_moment   -0.125139
dist_payback_4th_moment   -0.322040
Length: 68, dtype: float64

In [20]:
# not used

# Assuming alpha is given and set to 0.5 for demonstration purposes
alpha = 0.5

# Define n_features as the number of features (excluding the last column which is the result)
n_features = df.shape[1] - 1

# Feature influence coefficients: Correlation of each feature with the result
influence_coefficients = np.abs(result_correlation.values)

# Independence coefficients: Absolute pairwise correlations among features
# We only need the unique pairwise coefficients, which are in the upper triangle of the correlation matrix
# excluding the diagonal. We use np.triu_indices to get the indices of the upper triangle.
independence_indices = np.triu_indices(n_features, k=1)
independence_coefficients = np.abs(feature_correlation.values)[independence_indices]

# Now we need to construct the QUBO matrix with the influence coefficients on the diagonal
# and the independence coefficients on the off-diagonal elements.
qubo_matrix = np.zeros((n_features, n_features))

# Set the linear coefficients on the diagonal for influence terms
np.fill_diagonal(qubo_matrix, alpha * influence_coefficients)

# Set the quadratic coefficients for independence terms
for i, (row, col) in enumerate(zip(*independence_indices)):
    qubo_matrix[row, col] = qubo_matrix[col, row] = (1 - alpha) * independence_coefficients[i]

# The QUBO matrix is now ready to be used with a solver
qubo_matrix


array([[0.12694124, 0.28579998, 0.02360067, ..., 0.47805923, 0.06132133,
        0.48055712],
       [0.28579998, 0.10637117, 0.06779948, ..., 0.32365731, 0.00496602,
        0.29095447],
       [0.02360067, 0.06779948, 0.01118054, ..., 0.01900371, 0.09374643,
        0.03184455],
       ...,
       [0.47805923, 0.32365731, 0.01900371, ..., 0.12839625, 0.04605288,
        0.49453116],
       [0.06132133, 0.00496602, 0.09374643, ..., 0.04605288, 0.06256962,
        0.07040788],
       [0.48055712, 0.29095447, 0.03184455, ..., 0.49453116, 0.07040788,
        0.16101985]])

In [29]:
# Project: feature selection for credit scoring
class FeatureSelection_v1(object):
    def __init__(self, num_features, dependence_coefficients, influence_coefficients, alpha):
        self.alpha = alpha
        
        # Number of features
        self.num_features = num_features
        self.dependence_coefficients = dependence_coefficients
        self.influence_coefficients = influence_coefficients
        
        # Create binary variables for the features
        self.array = Array.create('feature', shape=self.num_features, vartype='BINARY')

    def influence(self):
        # Objective: Maximize influence of the features
        H = sum(-self.influence_coefficients[i] * self.array[i] for i in range(self.num_features))
        return self.alpha * H
        
    def dependency(self):
        # Objective: Minimize dependency among the features
        H = sum(self.dependence_coefficients[i][j] * self.array[i] * self.array[j] 
                for i in range(self.num_features) for j in range(i + 1, self.num_features))
        return (1 - self.alpha) * H



In [32]:
# Define alpha for the QUBO problem
alpha = 0.5

feature_qubo = FeatureSelection_v1(n_features, feature_correlation.values, result_correlation.values, alpha)

# Feature influence coefficients: Correlation of each feature with the result
#feature_qubo = FeatureSelection_v1(features, independence_coefficients, influence_coefficients, alpha)
objective = feature_qubo.influence() + feature_qubo.dependency()
model = objective.compile()
qubo, offset = model.to_qubo()

# Solve QUBO using Simulated Annealing Sampler
sampler = neal.SimulatedAnnealingSampler()
response = sampler.sample_qubo(qubo)
print("qubo:", qubo)
    
# Print results
for sample, energy in response.data(['sample', 'energy']):
    print(sample, energy)

qubo: {('feature[57]', 'feature[45]'): 0.32804021333383376, ('feature[60]', 'feature[4]'): 0.018146906674689062, ('feature[26]', 'feature[4]'): 0.05515963096046043, ('feature[58]', 'feature[4]'): 0.09376495388856204, ('feature[66]', 'feature[64]'): 0.03733300178927672, ('feature[60]', 'feature[44]'): 0.3947342734326504, ('feature[58]', 'feature[50]'): 0.5, ('feature[13]', 'feature[8]'): -0.08507094251026512, ('feature[6]', 'feature[6]'): -0.0674669628812896, ('feature[50]', 'feature[20]'): 0.06040015801943973, ('feature[62]', 'feature[26]'): -0.0018182177093904225, ('feature[19]', 'feature[15]'): -0.03614984255007491, ('feature[67]', 'feature[5]'): 0.03215139414339955, ('feature[20]', 'feature[18]'): 0.034924899235929056, ('feature[59]', 'feature[13]'): 0.48055711956077213, ('feature[58]', 'feature[19]'): -0.06040015801943973, ('feature[45]', 'feature[7]'): 0.2690719554922819, ('feature[15]', 'feature[13]'): 0.5, ('feature[63]', 'feature[7]'): 0.44896187770535206, ('feature[15]', 'feat

In [17]:
independence_coefficients

array([0.57159996, 0.04720135, 0.04823961, ..., 0.09210575, 0.98906232,
       0.14081575])