In [9]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler

from sklearn import svm
from sklearn.datasets import make_blobs
from sklearn.inspection import DecisionBoundaryDisplay

from qiskit.circuit.library import ZZFeatureMap
from qiskit.primitives import Sampler

from qiskit_machine_learning.state_fidelities import ComputeUncompute
from qiskit_machine_learning.kernels import FidelityQuantumKernel

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

from qiskit_machine_learning.algorithms import QSVC
# Load Excel file
df = pd.read_csv("mlb_game_data_2025.csv")

# Explore the structure
print(df.shape)
print(df.head(5))

(2463, 21)
             Date Home Team Away Team  Difference doubles  Difference triples  \
0  March 27, 2025       NYY       MIL                 0.0                 0.0   
1  March 27, 2025       TOR       BAL                -1.0                 0.0   
2  March 27, 2025       TEX       BOS                 1.0                 0.0   
3  March 27, 2025       WSH       PHI                -1.0                -1.0   
4  March 27, 2025        KC       CLE                -1.0                -1.0   

   Difference homeruns  Difference hits  Difference hitbypitch  \
0                  1.0              0.0                    0.0   
1                 -5.0            -10.0                   -1.0   
2                 -2.0              1.0                    0.0   
3                 -1.0             -3.0                    1.0   
4                  0.0             -3.0                    0.0   

   Difference avg  Difference obp  ...  Difference ops  \
0           0.029           0.035  ...         

In [10]:
df

Unnamed: 0,Date,Home Team,Away Team,Difference doubles,Difference triples,Difference homeruns,Difference hits,Difference hitbypitch,Difference avg,Difference obp,...,Difference ops,Difference groundintodoubleplay,Difference leftonbase,Difference strikeouts,Difference baseonballs,Difference whip,Difference strikepercentage,Difference pitchesperinning,Difference homerunsper9,Home Team Won
0,"March 27, 2025",NYY,MIL,0.0,0.0,1.0,0.0,0.0,0.029,0.035,...,0.185,0.0,-3.0,3.0,0.0,-0.16,0.05,-0.06,-1.25,True
1,"March 27, 2025",TOR,BAL,-1.0,0.0,-5.0,-10.0,-1.0,-0.216,-0.171,...,-0.767,2.0,-8.0,5.0,0.0,1.11,0.04,7.11,5.00,False
2,"March 27, 2025",TEX,BOS,1.0,0.0,-2.0,1.0,0.0,0.030,0.007,...,-0.115,1.0,5.0,6.0,1.0,0.00,-0.01,2.00,2.00,False
3,"March 27, 2025",WSH,PHI,-1.0,-1.0,-1.0,-3.0,1.0,-0.064,-0.083,...,-0.296,1.0,0.0,6.0,3.0,0.60,-0.01,2.20,0.90,False
4,"March 27, 2025",KC,CLE,-1.0,-1.0,0.0,-3.0,0.0,-0.078,-0.101,...,-0.258,-1.0,-1.0,-5.0,2.0,0.50,-0.04,1.00,0.00,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2458,"September 28, 2025",CLE,TEX,0.0,0.0,1.0,4.0,-1.0,-0.008,-0.006,...,-0.014,0.0,0.0,-1.0,-2.0,0.08,0.02,-6.86,-1.10,True
2459,"September 28, 2025",MIA,NYM,2.0,1.0,0.0,4.0,1.0,0.001,-0.012,...,-0.046,-2.0,-5.0,5.0,4.0,-0.02,-0.12,-1.19,0.00,True
2460,"September 28, 2025",MIL,CIN,3.0,0.0,-1.0,2.0,3.0,0.013,0.017,...,0.029,-1.0,15.0,-2.0,-4.0,0.01,0.03,-5.46,0.87,True
2461,"September 28, 2025",ATL,PIT,1.0,0.0,1.0,0.0,0.0,0.014,0.015,...,0.064,0.0,-2.0,4.0,-1.0,0.09,0.00,0.14,-1.13,True


In [11]:
# creates x for features and y for labels
X = df.drop(columns=['Home Team', 'Away Team', 'Date' ,'Home Team Won']).to_numpy()
X = StandardScaler().fit_transform(X)
X



array([[ 0.0151025 , -0.03416992,  0.64186126, ...,  0.82955168,
         0.07054738, -0.70090722],
       [-0.50152899, -0.03416992, -3.05616047, ...,  0.64733588,
         2.19399214,  2.94981181],
       [ 0.531734  , -0.03416992, -1.2071496 , ..., -0.26374313,
         0.68063053,  1.19746667],
       ...,
       [ 1.56499699, -0.03416992, -0.59081265, ...,  0.46512008,
        -1.52869972,  0.53741667],
       [ 0.531734  , -0.03416992,  0.64186126, ..., -0.08152733,
         0.12977875, -0.63081342],
       [-1.01816049, -1.94690863,  0.64186126, ...,  0.46512008,
         0.37855053, -0.63081342]], shape=(2463, 17))

In [12]:
Y = df['Home Team Won'].to_numpy()
Y

array([ True, False, False, ...,  True,  True,  True], shape=(2463,))

In [13]:
# Smaller subset for testing and faster execution

df_sample = df.sample(100, random_state=42)


X = StandardScaler().fit_transform(df_sample.drop(columns=['Home Team', 'Away Team', 'Date' ,'Home Team Won']).to_numpy())
Y = df_sample['Home Team Won'].to_numpy()

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


In [15]:
feature_map = ZZFeatureMap(feature_dimension=len(X[0]), reps=2, entanglement='linear')

sampler = Sampler()

fidelity = ComputeUncompute(sampler=sampler)

kernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=feature_map)


  sampler = Sampler()
  fidelity = ComputeUncompute(sampler=sampler)


In [16]:
start_time=time.time()

qsvc = QSVC(quantum_kernel=kernel)

qsvc.fit(X_train, y_train)

end_time=time.time()
qsvc_score = qsvc.score(X_test, y_test)

end_time2=time.time()

print(f"Training time: {end_time - start_time} seconds")
print(f"Scoring time: {end_time2 - end_time} seconds")
print(f"QSVC accuracy: {qsvc_score}")

AlgorithmError: 'Sampler job failed!'