In [21]:
import pandas as pd
import time
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder, StandardScaler

from qiskit.circuit.library import ZZFeatureMap
from qiskit.circuit.library import PauliFeatureMap
from qiskit.primitives import Sampler

from qiskit_algorithms.state_fidelities import ComputeUncompute
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_machine_learning.algorithms import QSVC

from sklearn.model_selection import train_test_split

In [None]:
# Load Excel file
train_df = pd.read_csv("mlb_vqc_features.csv")

# Explore the structure
print(train_df.shape)
print(train_df.head(5))

(2430, 10)
   hits (Home-Away)  homeruns (Home-Away)  leftonbase (Home-Away)  \
0              -4.0                   0.0                   -13.0   
1               1.0                  -3.0                     7.0   
2               0.0                   1.0                    -3.0   
3             -10.0                  -5.0                    -8.0   
4               1.0                  -2.0                     5.0   

   obp (Home-Away)  slg (Home-Away)  strikeouts (Home-Away)  \
0           -0.201           -0.132                     0.0   
1           -0.089           -0.219                    -3.0   
2            0.035            0.150                     3.0   
3           -0.171           -0.596                     5.0   
4            0.007           -0.122                     6.0   

   strikepercentage (Home-Away)  whip (Home-Away)  SP ERA (Home-Away)  y  
0                         -0.03              1.23               -1.80  0  
1                         -0.02              

In [None]:
train_df

Unnamed: 0,hits (Home-Away),homeruns (Home-Away),leftonbase (Home-Away),obp (Home-Away),slg (Home-Away),strikeouts (Home-Away),strikepercentage (Home-Away),whip (Home-Away),SP ERA (Home-Away),y
0,-4.0,0.0,-13.0,-0.201,-0.132,0.0,-0.03,1.23,-1.80,0
1,1.0,-3.0,7.0,-0.089,-0.219,-3.0,-0.02,0.55,8.25,0
2,0.0,1.0,-3.0,0.035,0.150,3.0,0.05,-0.16,-1.91,1
3,-10.0,-5.0,-8.0,-0.171,-0.596,5.0,0.04,1.11,7.80,0
4,1.0,-2.0,5.0,0.007,-0.122,6.0,-0.01,0.00,-0.60,0
...,...,...,...,...,...,...,...,...,...,...
2425,4.0,1.0,0.0,-0.006,-0.008,-1.0,0.02,0.08,1.56,1
2426,4.0,0.0,-5.0,-0.012,-0.034,5.0,-0.12,-0.02,0.00,1
2427,2.0,-1.0,15.0,0.017,0.012,-2.0,0.03,0.01,-8.10,1
2428,0.0,1.0,-2.0,0.015,0.049,4.0,0.00,0.09,-2.01,1


In [None]:
# creates x for features and y for labels
X_train = train_df.drop(columns=['y']).to_numpy()
X_train = StandardScaler().fit_transform(X_train)
X_train

array([[-0.79246589,  0.02291385, -1.73433899, ..., -0.62586501,
         6.88193371, -0.20819799],
       [ 0.25319664, -1.83310819,  1.03104405, ..., -0.44372705,
         3.08202953,  1.40731087],
       [ 0.04406413,  0.64158787, -0.35164747, ...,  0.83123868,
        -0.88551749, -0.22588018],
       ...,
       [ 0.46232915, -0.59576016,  2.13719726, ...,  0.46696275,
         0.06445855, -1.22090504],
       [ 0.04406413,  0.64158787, -0.21337832, ..., -0.07945113,
         0.51150611, -0.24195489],
       [ 0.04406413,  0.64158787, -0.48991663, ...,  0.46696275,
        -0.77375561,  0.08114688]], shape=(2430, 9))

In [None]:
y_train = train_df['y'].to_numpy()
y_train

array([0, 0, 1, ..., 1, 1, 1], shape=(2430,))

In [None]:
# Smaller subset for testing and faster execution

df_sample = train_df.sample(100, random_state=42)


X_train = StandardScaler().fit_transform(df_sample.drop(columns=['y']).to_numpy())
y_train = df_sample['y'].to_numpy()

In [None]:
test_df = pd.read_csv("mlb_vqc_features.csv")

In [None]:
feature_map = PauliFeatureMap(feature_dimension=len(X_train[0]), reps=2, paulis = ['Z','Y','ZZ'])

sampler = Sampler()

fidelity = ComputeUncompute(sampler=sampler)

kernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=feature_map)


  sampler = Sampler()


In [29]:
start_time=time.time()

qsvc = QSVC(quantum_kernel=kernel)

qsvc.fit(X_train, y_train)

end_time=time.time()

In [30]:

qsvc_score = qsvc.score(X_test, y_test)

end_time2=time.time()

print(f"Training time: {end_time - start_time} seconds")
print(f"Scoring time: {end_time2 - end_time} seconds")
print(f"QSVC accuracy: {qsvc_score}")

Training time: 123.75767779350281 seconds
Scoring time: 61.542373180389404 seconds
QSVC accuracy: 0.65
