Running the code on this file will train the 48 VQCs mentioned in the final project.

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import time

# Load
df_vqc = pd.read_csv("Datasets/mlb_vqc_features.csv")

# Separate X and y
X_df = df_vqc.drop(columns=["y"])
y = df_vqc["y"].astype(int).to_numpy()


y_pm1 = 2*y - 1   # convert to {-1, +1} for quantum expectation output

# Standardize training data
scaler = StandardScaler().fit(X_df.values)
Z = scaler.transform(X_df.values)

# Map to angles: φ = π * tanh(z)  → ensures stable range (-π, π)
phi = np.pi * np.tanh(Z).astype(np.float32)

# Store for Qiskit
X_angles = phi
y_labels = y_pm1

n_qubits = X_angles.shape[1]
print(f"Data prepared for VQC: {len(X_angles)} samples, {n_qubits} features/qubits.")


Data prepared for VQC: 2084 samples, 9 features/qubits.


In [2]:
X_df

Unnamed: 0,hits (Home-Away),homeruns (Home-Away),leftonbase (Home-Away),obp (Home-Away),slg (Home-Away),strikeouts (Home-Away),strikepercentage (Home-Away),whip (Home-Away),SP ERA (Home-Away)
0,1.033333,0.200000,0.066667,0.032683,0.052300,1.600000,0.006667,-0.209833,-1.522333
1,-1.500000,-1.100000,-3.200000,0.023800,0.054200,3.100000,-0.007000,-0.446000,-1.853000
2,1.748252,0.412587,0.706294,0.031210,0.052874,1.846154,0.014965,-0.215804,-2.147972
3,-1.272727,-0.818182,-2.545455,0.022273,0.048818,3.090909,-0.006364,-0.435455,-1.702727
4,1.700000,1.400000,-0.400000,0.044400,0.152200,5.200000,0.012000,-0.205000,-1.978000
...,...,...,...,...,...,...,...,...,...
2079,-0.125000,-0.300000,-0.750000,-0.009750,-0.027350,-0.175000,0.013750,0.008750,-0.396500
2080,1.025000,0.400000,1.200000,0.010750,0.002850,1.800000,-0.005750,0.005500,-1.444750
2081,-1.975000,-0.075000,-2.150000,-0.017175,0.002075,0.600000,0.015000,0.232250,1.386500
2082,2.000000,0.575000,0.500000,0.016975,0.021550,2.125000,0.005750,-0.038000,-1.851000


In [4]:
# Load and normalize postseason test data (using training scaler)
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load processed postseason test data
test_df_vqc = pd.read_csv("Datasets/postseason_test_processed.csv")

# Separate X and y
X_test_df = test_df_vqc.drop(columns=["y"], errors="ignore")
y_test = test_df_vqc['y']
y_test_pm1 = 2 * y_test - 1

# Use the same n features as training
n = 9
X_test_df = X_test_df.iloc[:, :n]  # keep first n columns to match training
print(f"Test data shape: {X_test_df.shape}")

# IMPORTANT: Transform test data using the TRAINING scaler (already fitted on X_df)
# DO NOT fit a new scaler on test data!
Z_test = scaler.transform(X_test_df.values)

# Map to angles: φ = π * tanh(z) → ensures stable range (-π, π)
phi_test = np.pi * np.tanh(Z_test).astype(np.float32)

# Store for Qiskit predictions
X_test_angles = phi_test
y_test_labels = y_test_pm1

print(f"Test data prepared for VQC: {len(X_test_angles)} samples, {X_test_angles.shape[1]} features/qubits.")


Test data shape: (47, 9)
Test data prepared for VQC: 47 samples, 9 features/qubits.


In [21]:
import time
import numpy as np

from qiskit.circuit.library import ZFeatureMap ,ZZFeatureMap,RealAmplitudes,TwoLocal, EfficientSU2
from qiskit_machine_learning.algorithms.classifiers.vqc import VQC
from qiskit_algorithms.optimizers import COBYLA, SPSA


quantum_instance_kw = {}  


feature_map_configs = [
    {"type": "Z",  "reps_list": [1, 2]},   # ZFeatureMap with reps 1,2
    {"type": "ZZ", "reps_list": [1, 2]},   # ZZFeatureMap with reps 1,2
]

ansatz_configs = [
    {
        "name": "RealAmps",
        "reps_list": [1, 2],
        "builder": lambda reps: RealAmplitudes(
            num_qubits=n_qubits,
            reps=reps,
            entanglement="linear",
        ),
    },
    {
        "name": "TwoLocal",
        "reps_list": [1, 2],
        "builder": lambda reps: TwoLocal(
            num_qubits=n_qubits,
            rotation_blocks=['ry', 'rz'],
            entanglement_blocks='cx',
            entanglement="linear",
            reps=reps,
        ),
    },
    {
        "name": "EffSU2",
        "reps_list": [1, 2],
        "builder": lambda reps: EfficientSU2(
            num_qubits=n_qubits,
            reps=reps,
            entanglement="linear",
        ),
    },
]

optimizer_configs = [
    {"name": "COBYLA300", "factory": lambda: COBYLA(maxiter=300, tol=1e-4, rhobeg=0.2)},
    {"name": "SPSA500",   "factory": lambda: SPSA(maxiter=500)},
]


def build_feature_map(fm_type, reps):
    if fm_type == "Z":
        return ZFeatureMap(feature_dimension=n_qubits, reps=reps)
    elif fm_type == "ZZ":
        return ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement="linear")
    else:
        raise ValueError(f"Unknown feature map type {fm_type}")

def train_vqc(feature_map, ansatz, optimizer, tag):
    print(f"Training {tag} ...")
    vqc = VQC(
        feature_map=feature_map,
        ansatz=ansatz,
        optimizer=optimizer,
        **quantum_instance_kw,
    )
    t0 = time.time()
    vqc.fit(X_angles, y01)
    dt = time.time() - t0
    print(f"  → done in {dt:.1f}s")
    return vqc


trained_vqcs = {}  # key: (fm_type, fm_reps, ans_name, ans_reps, opt_name) → VQC object


In [22]:
# Load postseason data once before loop
postseason_original = pd.read_csv("postseason_test_data.csv")

for fm_conf in feature_map_configs:
    fm_type = fm_conf["type"]
    for fm_reps in fm_conf["reps_list"]:
        fm = build_feature_map(fm_type, fm_reps)
        for ans_conf in ansatz_configs:
            ans_name = ans_conf["name"]
            for ans_reps in ans_conf["reps_list"]:
                ans = ans_conf["builder"](ans_reps)
                for opt_conf in optimizer_configs:
                    opt_name = opt_conf["name"]
                    optimizer = opt_conf["factory"]()
                    tag = (
                        f"{fm_type}(reps={fm_reps})-"
                        f"{ans_name}(reps={ans_reps})-"
                        f"{opt_name}"
                    )
                    print(f"\n[DEBUG] Current optimizer: {opt_name}")
                    vqc_model = train_vqc(fm, ans, optimizer, tag)
                    y_pred_test = vqc_model.predict(X_test_angles)      
                    test_acc = (y_pred_test == y_test).mean()
                    print(f"  Test accuracy: {test_acc:.3f}")

                    # Create predictions comparison table
                    actual_winner = []
                    predicted_winner = []
                    
                    for i in range(len(postseason_original)):
                        home_team = postseason_original.iloc[i]['Home Team']
                        away_team = postseason_original.iloc[i]['Away Team']
                        
                        # Actual winner
                        if y_test.iloc[i] == 1:
                            actual_winner.append(home_team)
                        else:
                            actual_winner.append(away_team)
                        
                        # Predicted winner
                        if y_pred_test[i] == 1:
                            predicted_winner.append(home_team)
                        else:
                            predicted_winner.append(away_team)
                    
                    # Create results dataframe
                    results_df = pd.DataFrame({
                        'Series_Game': postseason_original['Series_Game'],
                        'Actual Winner': actual_winner,
                        'Predicted Winner': predicted_winner,
                        'Correct': (y_pred_test == y_test.values)
                    })
                    
                    # Calculate accuracy
                    correct_count = results_df['Correct'].sum()
                    total_count = len(results_df)
                    
                    # Save to CSV with descriptive filename
                    filename = f"predictions_{fm_type}_r{fm_reps}_{ans_name}_r{ans_reps}_{opt_name}.csv"
                    results_df.to_csv(filename, index=False)
                    print(f"  Saved predictions to: {filename}")
                    print(f"  Correct: {correct_count}/{total_count} ({test_acc:.1%})")

                    key = (fm_type, fm_reps, ans_name, ans_reps, opt_name)
                    trained_vqcs[key] = vqc_model

print("\nAll VQCs trained and stored!")
print(f"Total models stored: {len(trained_vqcs)}")


[DEBUG] Current optimizer: COBYLA300
Training Z(reps=1)-RealAmps(reps=1)-COBYLA300 ...
  → done in 1762.4s
  Test accuracy: 0.553
  Saved predictions to: predictions_Z_r1_RealAmps_r1_COBYLA300.csv
  Correct: 26/47 (55.3%)

[DEBUG] Current optimizer: SPSA500
Training Z(reps=1)-RealAmps(reps=1)-SPSA500 ...
  → done in 1762.4s
  Test accuracy: 0.553
  Saved predictions to: predictions_Z_r1_RealAmps_r1_COBYLA300.csv
  Correct: 26/47 (55.3%)

[DEBUG] Current optimizer: SPSA500
Training Z(reps=1)-RealAmps(reps=1)-SPSA500 ...
  → done in 8101.1s
  Test accuracy: 0.553
  Saved predictions to: predictions_Z_r1_RealAmps_r1_SPSA500.csv
  Correct: 26/47 (55.3%)

[DEBUG] Current optimizer: COBYLA300
Training Z(reps=1)-RealAmps(reps=2)-COBYLA300 ...
  → done in 8101.1s
  Test accuracy: 0.553
  Saved predictions to: predictions_Z_r1_RealAmps_r1_SPSA500.csv
  Correct: 26/47 (55.3%)

[DEBUG] Current optimizer: COBYLA300
Training Z(reps=1)-RealAmps(reps=2)-COBYLA300 ...
  → done in 2521.6s
  Test accur