In [2]:
# Qiskit VQC template generated for your dataset
# - Uses the last column as label (binary 0/1)
# - Uses these selected features: ['Difference hits', 'Difference pitchesperinning', 'Difference homerunsper9', 'Difference homeruns', 'Difference doubles', 'Difference baseonballs']
# - Change DATA_CSV to your local path.
# - Python >=3.9, install: pip install "qiskit>=1.2" "qiskit-machine-learning>=0.7" scikit-learn

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from qiskit.circuit.library import ZZFeatureMap, EfficientSU2
from qiskit.primitives import Estimator
from qiskit.quantum_info import SparsePauliOp
from qiskit_machine_learning.neural_networks import EstimatorQNN
from qiskit_machine_learning.algorithms.classifiers import NeuralNetworkClassifier
from qiskit_algorithms.optimizers import COBYLA

# ----------- CONFIG -------------
DATA_CSV = "mlb_game_data_2025.csv"   # <-- set to your CSV path
LABEL_COL = "Home Team Won"
SELECTED_FEATURES = ['Difference hits', 'Difference pitchesperinning', 'Difference homerunsper9', 'Difference homeruns', 'Difference doubles', 'Difference baseonballs']
TEST_SIZE = 0.25
RANDOM_STATE = 7
REPS = 2
SHOTS = None  # set an int for sampling-based estimator (e.g., 2048)

def load_data(path):
    df = pd.read_csv(path)
    y_raw = df[LABEL_COL]
    # map to 0/1 using unique values from the training-time mapping (recreate deterministically)
    uniq = sorted(y_raw.dropna().unique().tolist(), key=lambda x: str(x))
    mapping = {uniq[0]: 0, uniq[1]: 1} if len(uniq) >= 2 else None
    if mapping is None:
        # fallback: threshold numeric label by median
        if pd.api.types.is_numeric_dtype(y_raw):
            y = (y_raw > y_raw.median()).astype(int)
        else:
            y_fac, _ = pd.factorize(y_raw)
            y = (y_fac != 0).astype(int)
    else:
        y = y_raw.map(mapping)
    X = df[SELECTED_FEATURES].copy()
    # drop NaNs
    valid = X.notna().all(axis=1) & y.notna()
    X, y = X[valid], y[valid].astype(int)
    return X.values.astype(float), y.values.astype(int)

def build_qnn(n_features):
    feature_map = ZZFeatureMap(feature_dimension=n_features, reps=REPS, entanglement="linear")
    ansatz = EfficientSU2(n_features, reps=REPS, entanglement="linear")
    from qiskit.circuit import QuantumCircuit
    qc = QuantumCircuit(n_features)
    qc.compose(feature_map, inplace=True)
    qc.compose(ansatz, inplace=True)

    input_params = list(feature_map.parameters)
    weight_params = list(ansatz.parameters)
    observable = SparsePauliOp.from_list([("Z" + "I"*(n_features-1), 1.0)])

    if SHOTS is None:
        est = Estimator()
    else:
        try:
            from qiskit_aer.primitives import Estimator as AerEstimator
            est = AerEstimator(shots=SHOTS)
        except Exception:
            est = Estimator()  # fallback

    qnn = EstimatorQNN(
        circuit=qc,
        input_params=input_params,
        weight_params=weight_params,
        estimator=est,
        observables=observable,
    )
    return qnn

def main():
    X, y = load_data(DATA_CSV)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    X_tr, X_te, y_tr, y_te = train_test_split(
        X, y, test_size=TEST_SIZE, stratify=y, random_state=RANDOM_STATE
    )

    qnn = build_qnn(X_tr.shape[1])
    clf = NeuralNetworkClassifier(
        neural_network=qnn,
        optimizer=COBYLA(maxiter=200, tol=1e-3),
        loss="cross_entropy",
        one_hot=False,
    )
    clf.fit(X_tr, y_tr)
    y_pred = clf.predict(X_te)
    acc = accuracy_score(y_te, y_pred)
    print(f"Selected features: {SELECTED_FEATURES}")
    print(f"Test accuracy: {acc:.3f}")

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'qiskit_machine_learning'