In [8]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import time

# Load
df_vqc = pd.read_csv("mlb_vqc_features.csv")

# Separate X and y
X_df = df_vqc.drop(columns=["y"])

n = 9  # <-- choose how many features/qubits you want

X_df= X_df.iloc[:, :n]   # keep first n columns
print("New shape:", X_df.shape)


y01 = df_vqc["y"].astype(int).to_numpy()
y_pm1 = 2*y01 - 1   # convert to {-1, +1} for quantum expectation output

# Standardize ALL training data (no splitting now)
scaler = StandardScaler().fit(X_df.values)
Z = scaler.transform(X_df.values)

# Map to angles: φ = π * tanh(z)  → ensures stable range (-π, π)
phi = np.pi * np.tanh(Z).astype(np.float32)

# Store for Qiskit
X_angles = phi
y_labels = y_pm1

n_qubits = X_angles.shape[1]
print(f"✅ Data prepared for VQC: {len(X_angles)} samples, {n_qubits} features/qubits.")


New shape: (2084, 9)
✅ Data prepared for VQC: 2084 samples, 9 features/qubits.


In [9]:
X_df

Unnamed: 0,hits (Home-Away),homeruns (Home-Away),leftonbase (Home-Away),obp (Home-Away),slg (Home-Away),strikeouts (Home-Away),strikepercentage (Home-Away),whip (Home-Away),SP ERA (Home-Away)
0,1.033333,0.200000,0.066667,0.032683,0.052300,1.600000,0.006667,-0.209833,-1.522333
1,-1.500000,-1.100000,-3.200000,0.023800,0.054200,3.100000,-0.007000,-0.446000,-1.853000
2,1.748252,0.412587,0.706294,0.031210,0.052874,1.846154,0.014965,-0.215804,-2.147972
3,-1.272727,-0.818182,-2.545455,0.022273,0.048818,3.090909,-0.006364,-0.435455,-1.702727
4,1.700000,1.400000,-0.400000,0.044400,0.152200,5.200000,0.012000,-0.205000,-1.978000
...,...,...,...,...,...,...,...,...,...
2079,-0.125000,-0.300000,-0.750000,-0.009750,-0.027350,-0.175000,0.013750,0.008750,-0.396500
2080,1.025000,0.400000,1.200000,0.010750,0.002850,1.800000,-0.005750,0.005500,-1.444750
2081,-1.975000,-0.075000,-2.150000,-0.017175,0.002075,0.600000,0.015000,0.232250,1.386500
2082,2.000000,0.575000,0.500000,0.016975,0.021550,2.125000,0.005750,-0.038000,-1.851000


In [10]:
from qiskit.circuit.library import ZZFeatureMap, RealAmplitudes
from qiskit_machine_learning.algorithms.classifiers.vqc import VQC
from qiskit_algorithms.optimizers import COBYLA, SPSA


quantum_instance_kw = {}

# feature map & ansatz
feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=2, entanglement='linear')
ansatz = RealAmplitudes(num_qubits=n_qubits, reps=2, entanglement='linear')

# optimizer
optimizer = COBYLA(maxiter=300, tol=1e-4, rhobeg=0.2)

# VQC classifier (expects labels in {0,1})
vqc = VQC(
    feature_map=feature_map,
    ansatz=ansatz,
    optimizer=optimizer,
    **quantum_instance_kw
)
t1 = time.time()
# train
vqc.fit(X_angles, y01)
t2 = time.time()
print(f"VQC trained in {t2 - t1:.1f} seconds")
y_pred = vqc.predict(X_angles)
train_acc = (y_pred == y01).mean()
print(f"Train accuracy (VQC): {train_acc:.3f}")


VQC trained in 2575.0 seconds
Train accuracy (VQC): 0.521


In [11]:
# Load and normalize postseason test data (using training scaler)
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load processed postseason test data
test_df_vqc = pd.read_csv("postseason_test_processed.csv")

# Separate X and y
X_test_df = test_df_vqc.drop(columns=["y"], errors="ignore")
y_test = test_df_vqc['y']
y_test_pm1 = 2 * y_test - 1

# Use the same n features as training
n = 9
X_test_df = X_test_df.iloc[:, :n]  # keep first n columns to match training
print(f"Test data shape: {X_test_df.shape}")

# IMPORTANT: Transform test data using the TRAINING scaler (already fitted on X_df)
# DO NOT fit a new scaler on test data!
Z_test = scaler.transform(X_test_df.values)

# Map to angles: φ = π * tanh(z) → ensures stable range (-π, π)
phi_test = np.pi * np.tanh(Z_test).astype(np.float32)

# Store for Qiskit predictions
X_test_angles = phi_test
y_test_labels = y_test_pm1

print(f"✅ Test data prepared for VQC: {len(X_test_angles)} samples, {X_test_angles.shape[1]} features/qubits.")
print(f"Test labels range: [{y_test_labels.min()}, {y_test_labels.max()}]")


Test data shape: (47, 9)
✅ Test data prepared for VQC: 47 samples, 9 features/qubits.
Test labels range: [-1, 1]


In [12]:
y_pred_test = vqc.predict(X_test_angles)
train_acc_test = (y_pred_test == y_test).mean()
print(f"Train accuracy (VQC): {train_acc_test:.3f}")

Train accuracy (VQC): 0.553


In [13]:
# Create predictions comparison table
import pandas as pd
import numpy as np

# Load original postseason data to get game identifiers
postseason_original = pd.read_csv("postseason_test_data.csv")

# Determine actual and predicted winners as team abbreviations
actual_winner = []
predicted_winner = []

for i in range(len(postseason_original)):
    home_team = postseason_original.iloc[i]['Home Team']
    away_team = postseason_original.iloc[i]['Away Team']
    
    # Actual winner
    if y_test[i] == 1:
        actual_winner.append(home_team)
    else:
        actual_winner.append(away_team)
    
    # Predicted winner
    if y_pred_test[i] == 1:
        predicted_winner.append(home_team)
    else:
        predicted_winner.append(away_team)

# Create results dataframe
results_df = pd.DataFrame({
    'Series_Game': postseason_original['Series_Game'],
    'Actual Winner': actual_winner,
    'Predicted Winner': predicted_winner,
    'Correct': (y_pred_test == y_test)
})



# Calculate accuracy
accuracy = results_df['Correct'].mean()
correct_count = results_df['Correct'].sum()
total_count = len(results_df)

print(f"VQC Postseason Prediction Results")
print(f"=" * 60)
print(f"Total Games: {total_count}")
print(f"Correct Predictions: {correct_count}")
print(f"Wrong Predictions: {total_count - correct_count}")
print(f"Accuracy: {accuracy:.1%}")
print(f"\n" + "=" * 60)

# Display the full results
results_df


VQC Postseason Prediction Results
Total Games: 47
Correct Predictions: 26
Wrong Predictions: 21
Accuracy: 55.3%



Unnamed: 0,Series_Game,Actual Winner,Predicted Winner,Correct
0,WS Game 1,TOR,LAD,False
1,WS Game 2,LAD,LAD,True
2,WS Game 3,LAD,TOR,False
3,WS Game 4,TOR,TOR,True
4,WS Game 5,TOR,TOR,True
5,WS Game 6,LAD,LAD,True
6,WS Game 7,LAD,LAD,True
7,ALCS Game 1,SEA,SEA,True
8,ALCS Game 2,SEA,SEA,True
9,ALCS Game 3,TOR,TOR,True
