# Algoritma Klasifikasi Char 1

In [1]:
import pickle
from sklearn.metrics import accuracy_score, roc_auc_score
import pandas as pd
from qiskit.algorithms.optimizers import COBYLA
from qiskit.circuit.library import TwoLocal, ZZFeatureMap
from qiskit.utils import algorithm_globals

from qiskit_machine_learning.algorithms import VQC, PegasosQSVC, NeuralNetworkClassifier, QSVC
from qiskit_machine_learning.datasets import ad_hoc_data

from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.neural_networks import TwoLayerQNN
from qiskit_machine_learning.algorithms import NeuralNetworkClassifier

from qiskit import Aer
from qiskit.utils import QuantumInstance
# from qiskit.aqua import QuantumInstance

import numpy as np
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import numpy as np
from IPython.display import clear_output
from qiskit import QuantumCircuit
from qiskit.algorithms.optimizers import COBYLA, L_BFGS_B
from qiskit.circuit import Parameter
from qiskit.circuit.library import RealAmplitudes, ZZFeatureMap
from qiskit.utils import algorithm_globals

from qiskit_machine_learning.algorithms.classifiers import NeuralNetworkClassifier, VQC
from qiskit_machine_learning.algorithms.regressors import NeuralNetworkRegressor, VQR
from qiskit_machine_learning.neural_networks import SamplerQNN, EstimatorQNN

algorithm_globals.random_seed = 42

def train_vqc(df, features, label, test_size=0.2, random_state=42):
    X = df[features].values
    y = label_binarize(df[label], classes=df[label].unique())

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    feature_map = ZZFeatureMap(feature_dimension=len(features), reps=2, entanglement="linear")
    ansatz = TwoLocal(feature_map.num_qubits, ["ry", "rz"], "cz", reps=3)
    vqc = VQC(
        feature_map=feature_map,
        ansatz=ansatz,
        optimizer=COBYLA(maxiter=100),
    )
    vqc.fit(X_train, y_train)
    vqc_score = vqc.score(X_test, y_test)
    vqc.save('vqc_model')
    # with open('vqc_model.pkl', 'wb') as model_file:
    #     pickle.dump(vqc, model_file)
    return vqc_score, roc_auc_score(y_test, vqc.predict(X_test))

def train_pegasos_svc(df, features, label, test_size=0.2, random_state=42):
    X = df[features].values
    y = df[label].apply(lambda x: 1 if x == 'E' else -1)

    # Make it binary by considering 'E' as positive and others as negative
    y_binary = np.where(y == 1, 1, -1)

    X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=test_size, random_state=random_state)

    pegasos_svc = PegasosQSVC()
    pegasos_svc.fit(X_train, y_train)
    pegasos_svc_score = pegasos_svc.score(X_test, y_test)
    pegasos_svc.save('pegasos_svc_model')
    return pegasos_svc_score, roc_auc_score(y_test, pegasos_svc.predict(X_test))

def train_qsvc(df, features, label, test_size=0.2, random_state=42):
    X = df[features].values
    y = df[label].apply(lambda x: 1 if x == 'E' else -1)

    # Make it binary by considering 'E' as positive and others as negative
    y_binary = np.where(y == 1, 1, -1)

    X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=test_size, random_state=random_state)

    qsvc = QSVC()
    qsvc.fit(X_train, y_train)
    qsvc_score = qsvc.score(X_test, y_test)
    qsvc.save('qsvc_model')
    return qsvc_score, roc_auc_score(y_test, qsvc.predict(X_test))



  from qiskit.algorithms.optimizers import COBYLA


# Penerapan

In [2]:
import pandas as pd

In [3]:
from sklearn.utils import resample

def count_value_frequency(df, column):
    value_counts = df[column].value_counts()
    return value_counts

def balance_dataset(df, target_column):
    # Menghitung frekuensi nilai pada kolom target
    value_counts = count_value_frequency(df, target_column)

    # Menentukan jumlah minimum frekuensi nilai
    min_frequency = value_counts.min()

    # Melakukan resampling untuk setiap nilai dalam kolom target
    balanced_data = pd.DataFrame()
    for value in value_counts.index:
        # Mengambil subset data dengan nilai tertentu
        subset = df[df[target_column] == value]

        # Melakukan resampling dengan jumlah minimum frekuensi
        subset_resampled = resample(subset, replace=True, n_samples=min_frequency, random_state=42)

        # Menggabungkan subset resampled ke dalam dataset seimbang
        balanced_data = pd.concat([balanced_data, subset_resampled])

    return balanced_data

In [4]:
import numpy as np

def divide_dataset_by_float(df, float_value):
    num_samples = int(len(df) * float_value)
    return df.head(num_samples)

def divide_balanced_dataset(balanced_df, target_column, float_value):
    # Menghitung frekuensi nilai pada kolom target
    value_counts = count_value_frequency(balanced_df, target_column)

    # Menentukan jumlah dataset setelah dibagi sesuai float value
    divided_data = pd.DataFrame()
    for value in value_counts.index:
        # Mengambil subset data dengan nilai tertentu
        subset = balanced_df[balanced_df[target_column] == value]

        # Menghitung jumlah dataset setelah dibagi sesuai float value
        num_samples = int(len(subset) * float_value)
        
        # Memilih sebagian dari subset sesuai dengan float value
        subset_divided = subset.head(num_samples)
        
        # Menggabungkan subset yang sudah dibagi ke dalam dataset hasil
        divided_data = pd.concat([divided_data, subset_divided])

    return divided_data


In [5]:
df1 = pd.read_csv('Bdataset_type_2_vers2_hidropobicity_covid.csv')
df1 = df1.dropna(subset=["Position"])

# df2 = pd.read_csv(out_dir+'dataset/Tdataset_type_2_vers2_hidropobicity.csv')

In [6]:
print(len(df1[df1['label'] == 'E']))
print(len(df1[df1['label'] == '.']))

120796
225436


In [7]:
120796*0.0003

36.2388

In [7]:
df1 = balance_dataset(df1, 'label')
float_value = 0.0003 # Misalnya, kita ingin membagi setiap kelompok menjadi setengahnya

# Membagi dataset seimbang sesuai float value
df1 = divide_balanced_dataset(df1, 'label', float_value)
df1 = df1.reset_index()

In [9]:
print(len(df1[df1['label'] == 'E']))
print(len(df1[df1['label'] == '.']))

36
36


In [10]:
df1.head()

Unnamed: 0,index,Position,amino,length sequence,label,Kyte-Doolittle,Hopp-Woods,Cornette,Eisenberg,Rose,Janin,Engelman GES,amino acid to number
0,187565,5159.0,N,15,.,-3.5,0.2,-0.5,-0.78,0.63,-0.5,-4.8,3
1,225525,378.0,K,20,.,-3.9,3.0,-3.1,-1.5,0.52,-1.8,-8.8,12
2,203167,1028.0,F,15,.,2.8,-2.5,4.4,1.19,0.88,0.5,3.7,14
3,159578,120.0,V,16,.,4.2,-1.5,4.7,1.08,0.86,0.6,2.6,20
4,184505,888.0,F,15,.,2.8,-2.5,4.4,1.19,0.88,0.5,3.7,14


In [11]:
# dtest = pd.DataFrame()
# mod = ['vqc','pegasos_svc','qsvc']
# #prop_scale = ['hoop_woods','emini','parker','levitt']
# prop_scale = ['Kyte-Doolittle', 'Hopp-Woods', 'Cornette', 'Eisenberg', 'Rose', 'Janin', 'Engelman GES']
# dt_test = [0.1,0.2,0.3]
# mod4x = mod*len(prop_scale)
# prop_scale6x = prop_scale*len(mod)
# dtest['algoritm'] = mod4x
# dtest['prop_scale'] = prop_scale6x
# dtest['accuracy'] = 0.0
# dtest['auc'] = 0.0
# dtest['n_amino_acids'] = 1
# dtest['test_size'] = 0.0
# dtest = pd.concat([dtest,dtest,dtest])
# dtest = dtest.reset_index(drop=True)
# #dtest = dtest.sample(frac=1).reset_index(drop=True)
# for i in mod:
#     dtest.loc[dtest['algoritm'] == i, 'test_size'] = int(len(dtest[dtest['algoritm'] == i])/len(dt_test))*dt_test

# dtest.to_csv('result_of_quantum_classification_covid.csv', index=False)

In [17]:
# len(dtest)

63

# Penerapan Char 1

In [8]:
dtest = pd.read_csv('result_of_quantum_classification_covid.csv')

In [9]:
dtest

Unnamed: 0,algoritm,prop_scale,accuracy,auc,n_amino_acids,test_size
0,vqc,Kyte-Doolittle,0.500000,0.5,1,0.1
1,pegasos_svc,Hopp-Woods,0.250000,0.5,1,0.1
2,qsvc,Cornette,0.250000,0.5,1,0.1
3,vqc,Eisenberg,0.333333,0.3,1,0.2
4,pegasos_svc,Rose,0.333333,0.5,1,0.2
...,...,...,...,...,...,...
58,pegasos_svc,Cornette,0.000000,0.0,1,0.2
59,qsvc,Eisenberg,0.000000,0.0,1,0.2
60,vqc,Rose,0.000000,0.0,1,0.3
61,pegasos_svc,Janin,0.000000,0.0,1,0.3


In [10]:
begin = dtest[dtest['accuracy'] == 0.00]

In [11]:
begin.index[0]

22

In [12]:
y = 'label'
for i in range(begin.index[0], len(dtest)):
    X = ['Position', 'length sequence']

    if dtest['algoritm'][i] == 'vqc':
        X.append(dtest['prop_scale'][i])
        acc, auc = train_vqc(df1, X, y, dtest['test_size'][i])

    if dtest['algoritm'][i] == 'pegasos_svc':
        X.append(dtest['prop_scale'][i])
        acc, auc = train_pegasos_svc(df1, X, y, dtest['test_size'][i])

    if dtest['algoritm'][i] == 'qsvc':
        X.append(dtest['prop_scale'][i])
        acc, auc = train_qsvc(df1, X, y, dtest['test_size'][i])

    # Add more conditions for other algorithms as needed

    dtest['accuracy'][i] = acc
    dtest['auc'][i] = auc

    print(f"Acc = {acc}; AUC = {auc}")

    dtest.to_csv('result_of_quantum_classification_covid.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.3333333333333333; AUC = 0.5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.4666666666666667; AUC = 0.55


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.6363636363636364; AUC = 0.6068376068376068


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.4090909090909091; AUC = 0.5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.3181818181818182; AUC = 0.30341880341880345


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.625; AUC = 0.5833333333333334


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.25; AUC = 0.5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.5; AUC = 0.6666666666666667


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.7333333333333333; AUC = 0.7000000000000002


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.3333333333333333; AUC = 0.5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.4; AUC = 0.5


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.5909090909090909; AUC = 0.6025641025641025


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc


Acc = 0.4090909090909091; AUC = 0.5
Acc = 0.5909090909090909; AUC = 0.6025641025641025


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['accuracy'][i] = acc
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dtest['auc'][i] = auc
