In [1]:
import numpy as np
import pandas as pd

from pyqubo import Array
import neal
import matplotlib.pyplot as plt
import requests


In [2]:
# Project: feature selection for credit scoring
class FeatureSelection_v1(object):
    def __init__(self, num_features, dependence_coefficients, influence_coefficients, alpha):
        self.alpha = alpha
        
        # Number of features
        self.num_features = num_features
        self.dependence_coefficients = dependence_coefficients
        self.influence_coefficients = influence_coefficients
        
        # Create binary variables for the features
        self.array = Array.create('feature', shape=self.num_features, vartype='BINARY')

    def influence(self):
        # Objective: Maximize influence of the features
        H = sum(-self.influence_coefficients[i] * self.array[i] for i in range(self.num_features))
        return self.alpha * H
        
    def dependency(self):
        # Objective: Minimize dependency among the features
        H = sum(self.dependence_coefficients[i][j] * self.array[i] * self.array[j] 
                for i in range(self.num_features) for j in range(i + 1, self.num_features))
        return (1 - self.alpha) * H



In [3]:

# Load the CSV file
for i in range(0, 6):
    # Load the class 0~6 CSV file
    file_path = f'../data_p/quantum_data.address_class{i}.csv'
    df = pd.read_csv(file_path)

    # Extracting each column as an array
    columns = df.columns
    features = df[columns[:-1]]  # All columns except the last one
    result = df[columns[-1]]    # The last column
    n_features = features.shape[1]

    # Calculate the correlation matrix for features
    feature_correlation = features.corr(method='spearman')

    # Calculate the correlation of each feature with the result
    result_correlation = features.apply(lambda x: x.corr(result))
    
    # Define alpha for the QUBO problem
    alpha = 0.5

    feature_qubo = FeatureSelection_v1(n_features, feature_correlation.values, result_correlation.values, alpha)

    # Feature influence coefficients: Correlation of each feature with the result
    objective = feature_qubo.influence() + feature_qubo.dependency()
    model = objective.compile()
    qubo, offset = model.to_qubo()

    # Solve QUBO using Simulated Annealing Sampler
    sampler = neal.SimulatedAnnealingSampler()
    response = sampler.sample_qubo(qubo)
    # print("qubo:", qubo)
        
    # Print results
    for sample, energy in response.data(['sample', 'energy']):
        print(sample, energy)
        
    # Find the best sample (modify this as per your criteria)
    # For simplicity, we're taking the first sample as an example
    best_sample = list(response.samples())[0]

    # Identify selected features
    selected_features = [int(key.split('[')[1].split(']')[0]) for key, value in best_sample.items() if value == 1]

    # Filter the DataFrame to keep only the selected columns
    filtered_df = df.iloc[:, selected_features]

    # Add the index of the last column (class) to the selected features
    last_column = df[columns[-1]]
    filtered_df = pd.concat([filtered_df, last_column], axis=1)

    # Save the filtered DataFrame to a new CSV file
    filtered_df.to_csv(f'../data_p/quantum_qubo_data.address_class{i}.csv', index=False)



  cold_beta = np.log(number_min_gaps/max_single_qubit_excitation_rate) / (2*min_effective_field)


{'feature[0]': 1, 'feature[10]': 1, 'feature[11]': 0, 'feature[12]': 0, 'feature[13]': 0, 'feature[14]': 1, 'feature[15]': 1, 'feature[16]': 1, 'feature[17]': 1, 'feature[18]': 1, 'feature[19]': 1, 'feature[1]': 0, 'feature[20]': 1, 'feature[21]': 0, 'feature[22]': 0, 'feature[23]': 0, 'feature[24]': 0, 'feature[25]': 1, 'feature[26]': 0, 'feature[27]': 1, 'feature[28]': 1, 'feature[29]': 1, 'feature[2]': 1, 'feature[30]': 1, 'feature[31]': 0, 'feature[32]': 0, 'feature[33]': 0, 'feature[34]': 0, 'feature[35]': 1, 'feature[36]': 1, 'feature[37]': 0, 'feature[38]': 1, 'feature[39]': 1, 'feature[3]': 0, 'feature[40]': 1, 'feature[41]': 1, 'feature[42]': 1, 'feature[43]': 1, 'feature[44]': 1, 'feature[45]': 0, 'feature[46]': 0, 'feature[47]': 0, 'feature[48]': 1, 'feature[49]': 1, 'feature[4]': 1, 'feature[50]': 1, 'feature[51]': 0, 'feature[52]': 1, 'feature[53]': 1, 'feature[54]': 0, 'feature[55]': 1, 'feature[56]': 1, 'feature[57]': 1, 'feature[58]': 0, 'feature[59]': 1, 'feature[5]': 

  cold_beta = np.log(number_min_gaps/max_single_qubit_excitation_rate) / (2*min_effective_field)
  cold_beta = np.log(number_min_gaps/max_single_qubit_excitation_rate) / (2*min_effective_field)


{'feature[0]': 0, 'feature[10]': 0, 'feature[11]': 0, 'feature[12]': 0, 'feature[13]': 1, 'feature[14]': 1, 'feature[15]': 1, 'feature[16]': 0, 'feature[17]': 0, 'feature[18]': 1, 'feature[19]': 1, 'feature[1]': 0, 'feature[20]': 1, 'feature[21]': 1, 'feature[22]': 1, 'feature[23]': 1, 'feature[24]': 0, 'feature[25]': 1, 'feature[26]': 0, 'feature[27]': 0, 'feature[28]': 0, 'feature[29]': 0, 'feature[2]': 0, 'feature[30]': 0, 'feature[31]': 1, 'feature[32]': 0, 'feature[33]': 1, 'feature[34]': 0, 'feature[35]': 0, 'feature[36]': 0, 'feature[37]': 0, 'feature[38]': 1, 'feature[39]': 0, 'feature[3]': 0, 'feature[40]': 0, 'feature[41]': 0, 'feature[42]': 1, 'feature[43]': 0, 'feature[44]': 0, 'feature[45]': 0, 'feature[46]': 0, 'feature[47]': 1, 'feature[48]': 0, 'feature[49]': 0, 'feature[4]': 0, 'feature[50]': 0, 'feature[51]': 0, 'feature[52]': 1, 'feature[53]': 0, 'feature[54]': 1, 'feature[55]': 1, 'feature[56]': 0, 'feature[57]': 0, 'feature[58]': 0, 'feature[59]': 1, 'feature[5]': 

  cold_beta = np.log(number_min_gaps/max_single_qubit_excitation_rate) / (2*min_effective_field)
  cold_beta = np.log(number_min_gaps/max_single_qubit_excitation_rate) / (2*min_effective_field)


{'feature[0]': 0, 'feature[10]': 1, 'feature[11]': 1, 'feature[12]': 0, 'feature[13]': 1, 'feature[14]': 0, 'feature[15]': 0, 'feature[16]': 0, 'feature[17]': 1, 'feature[18]': 1, 'feature[19]': 1, 'feature[1]': 1, 'feature[20]': 0, 'feature[21]': 0, 'feature[22]': 1, 'feature[23]': 0, 'feature[24]': 1, 'feature[25]': 0, 'feature[26]': 0, 'feature[27]': 0, 'feature[28]': 1, 'feature[29]': 1, 'feature[2]': 0, 'feature[30]': 1, 'feature[31]': 1, 'feature[32]': 0, 'feature[33]': 0, 'feature[34]': 1, 'feature[35]': 1, 'feature[36]': 1, 'feature[37]': 1, 'feature[38]': 0, 'feature[39]': 0, 'feature[3]': 1, 'feature[40]': 1, 'feature[41]': 0, 'feature[42]': 1, 'feature[43]': 1, 'feature[44]': 1, 'feature[45]': 0, 'feature[46]': 0, 'feature[47]': 0, 'feature[48]': 1, 'feature[49]': 0, 'feature[4]': 1, 'feature[50]': 0, 'feature[51]': 0, 'feature[52]': 0, 'feature[53]': 1, 'feature[54]': 1, 'feature[55]': 0, 'feature[56]': 1, 'feature[57]': 0, 'feature[58]': 0, 'feature[59]': 0, 'feature[5]': 

  cold_beta = np.log(number_min_gaps/max_single_qubit_excitation_rate) / (2*min_effective_field)


In [4]:
import pandas as pd

# Initialize an empty DataFrame for merging
merged_df = pd.DataFrame()

# Loop through class0 to class5
for i in range(0, 6):
    # Read each file
    file_path = f'../data_p/quantum_qubo_data.address_class{i}.csv'
    df = pd.read_csv(file_path)

    # Remove rows where the last column (class{i}) has a value of 0
    df = df[df[df.columns[-1]] != 0]

    # Rename the last column to 'class', and set its value to the current class number for rows with 1
    df.rename(columns={df.columns[-1]: 'class'}, inplace=True)
    df['class'] = df['class'].apply(lambda x: i if x == 1 else x)

    # Add the DataFrame to the merged DataFrame
    merged_df = pd.concat([merged_df, df], ignore_index=True)

# Fill all NaN values with 0
merged_df.fillna(0, inplace=True)

# Move the 'class' column to the end
class_column = merged_df.pop('class')
merged_df['class'] = class_column

# Save the merged DataFrame to a new CSV file
merged_df.to_csv('../data_p/quantum_qubo_data.address.csv', index=False)
