In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC, SVR
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                             f1_score, confusion_matrix, classification_report,
                             mean_squared_error, mean_absolute_error, r2_score)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:

# LOAD DATASET
print("="*100)
print(" T20I CRICKET PREDICTION USING SUPPORT VECTOR MACHINES (SVM)")
print(" THREE MODELS: TOSS DECISION | MATCH WINNER | TARGET SCORE")
print("="*100)

# Load the dataset
df = pd.read_excel("C:/Users/a/Desktop/AIML/t20i_Matches_Data_final.xlsx")

print("\n DATASET OVERVIEW")
print("-" * 100)
print(f"Dataset Shape: {df.shape}")
print(f"Total Samples: {df.shape[0]}")
print(f"Total Features: {df.shape[1]}")
print("\nFirst 5 rows:")
print(df.head())

# Check for missing values
print("\n Missing Values Check:")
missing = df.isnull().sum()
if missing.sum() == 0:
    print("No missing values found!")
else:
    print(missing[missing > 0])

 T20I CRICKET PREDICTION USING SUPPORT VECTOR MACHINES (SVM)
 THREE MODELS: TOSS DECISION | MATCH WINNER | TARGET SCORE

 DATASET OVERVIEW
----------------------------------------------------------------------------------------------------
Dataset Shape: (2591, 14)
Total Samples: 2591
Total Features: 14

First 5 rows:
   Team1 Name  Team1 Runs Scored  Team1 Wickets Fell  Team2 Name  \
0          40           0.230769                 1.0           2   
1          26           0.612179                 0.8          65   
2          80           0.336538                 0.8          64   
3          49           0.333333                 0.9          80   
4         102           0.333333                 0.8          89   

   Team2 Runs Scored  Team2 Wickets Fell  Match Venue (Stadium)  \
0           0.278431                 0.1                    118   
1           0.545098                 0.8                     83   
2           0.415686                 0.5                     32   
3  

In [3]:

# ############################################################################
# MODEL 1: TOSS DECISION PREDICTION (CLASSIFICATION)
# ############################################################################
print("\n\n" + "="*100)
print(" MODEL 1: TOSS DECISION PREDICTION (SVM CLASSIFICATION)")
print("="*100)

print("\n Problem: Predict whether toss winner chooses to BAT or BOWL")
print("Algorithm: Support Vector Machine (SVM) - Classification")

# Features for toss decision
toss_features = [
    'Match Venue (Stadium)', 'Match Venue (City)', 'Match Venue (Country)',
    'Toss Winner'
]

X_toss = df[toss_features].copy()

# Create target: 1 if bat, 0 if bowl
y_toss = df['Toss Winner Choice_bat'].astype(int).copy()

# Encode categorical variables
le_stadium_1 = LabelEncoder()
le_city_1 = LabelEncoder()
le_country_1 = LabelEncoder()
le_winner_1 = LabelEncoder()

X_toss['Match Venue (Stadium)'] = le_stadium_1.fit_transform(X_toss['Match Venue (Stadium)'])
X_toss['Match Venue (City)'] = le_city_1.fit_transform(X_toss['Match Venue (City)'])
X_toss['Match Venue (Country)'] = le_country_1.fit_transform(X_toss['Match Venue (Country)'])
X_toss['Toss Winner'] = le_winner_1.fit_transform(X_toss['Toss Winner'])

print(f"\n Features used: {len(toss_features)}")
print(f" Target: Toss Decision (0=Bowl, 1=Bat)")
print(f" Class distribution:\n  Bat:  {(y_toss==1).sum()} ({(y_toss==1).sum()/len(y_toss)*100:.1f}%)")
print(f"  Bowl: {(y_toss==0).sum()} ({(y_toss==0).sum()/len(y_toss)*100:.1f}%)")

# Train-test split
X_train_toss, X_test_toss, y_train_toss, y_test_toss = train_test_split(
    X_toss, y_toss, test_size=0.2, random_state=42, stratify=y_toss
)

# Feature scaling
scaler_toss = StandardScaler()
X_train_toss_scaled = scaler_toss.fit_transform(X_train_toss)
X_test_toss_scaled = scaler_toss.transform(X_test_toss)

print(f"\n Training samples: {X_train_toss.shape[0]}")
print(f" Testing samples: {X_test_toss.shape[0]}")

# ----------------------------------------------------------------------------
# MODEL 1 - VARIETY 1: Linear Kernel
# ----------------------------------------------------------------------------
print("\n" + "-"*100)
print("MODEL 1 - VARIETY 1: SVM with LINEAR KERNEL")
print("-"*100)

svm_toss_linear = SVC(kernel='linear', C=1.0, random_state=42, probability=True)
svm_toss_linear.fit(X_train_toss_scaled, y_train_toss)

y_pred_toss_linear = svm_toss_linear.predict(X_test_toss_scaled)

acc_toss_linear = accuracy_score(y_test_toss, y_pred_toss_linear)
precision_toss_linear = precision_score(y_test_toss, y_pred_toss_linear, average='weighted')
recall_toss_linear = recall_score(y_test_toss, y_pred_toss_linear, average='weighted')
f1_toss_linear = f1_score(y_test_toss, y_pred_toss_linear, average='weighted')

print(f"\n Performance Metrics:")
print(f"   Accuracy:  {acc_toss_linear:.4f}")
print(f"   Precision: {precision_toss_linear:.4f}")
print(f"   Recall:    {recall_toss_linear:.4f}")
print(f"   F1-Score:  {f1_toss_linear:.4f}")

cv_toss_linear = cross_val_score(svm_toss_linear, X_train_toss_scaled, y_train_toss, cv=5)
print(f"\n 5-Fold CV Score: {cv_toss_linear.mean():.4f} (±{cv_toss_linear.std():.4f})")

# ----------------------------------------------------------------------------
# MODEL 1 - VARIETY 2: RBF Kernel
# ----------------------------------------------------------------------------
print("\n" + "-"*100)
print("MODEL 1 - VARIETY 2: SVM with RBF KERNEL")
print("-"*100)

svm_toss_rbf = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42, probability=True)
svm_toss_rbf.fit(X_train_toss_scaled, y_train_toss)

y_pred_toss_rbf = svm_toss_rbf.predict(X_test_toss_scaled)

acc_toss_rbf = accuracy_score(y_test_toss, y_pred_toss_rbf)
precision_toss_rbf = precision_score(y_test_toss, y_pred_toss_rbf, average='weighted')
recall_toss_rbf = recall_score(y_test_toss, y_pred_toss_rbf, average='weighted')
f1_toss_rbf = f1_score(y_test_toss, y_pred_toss_rbf, average='weighted')

print(f"\n Performance Metrics:")
print(f"   Accuracy:  {acc_toss_rbf:.4f}")
print(f"   Precision: {precision_toss_rbf:.4f}")
print(f"   Recall:    {recall_toss_rbf:.4f}")
print(f"   F1-Score:  {f1_toss_rbf:.4f}")

cv_toss_rbf = cross_val_score(svm_toss_rbf, X_train_toss_scaled, y_train_toss, cv=5)
print(f"\n 5-Fold CV Score: {cv_toss_rbf.mean():.4f} (±{cv_toss_rbf.std():.4f})")

# ----------------------------------------------------------------------------
# MODEL 1 - VARIETY 3: Hyperparameter Tuning
# ----------------------------------------------------------------------------
print("\n" + "-"*100)
print("MODEL 1 - VARIETY 3: SVM with GRID SEARCH TUNING")
print("-"*100)

param_grid_toss = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto', 0.01],
    'kernel': ['rbf', 'poly']
}

print("\n Hyperparameter Grid:")
for param, values in param_grid_toss.items():
    print(f"   {param}: {values}")

grid_toss = GridSearchCV(
    SVC(random_state=42, probability=True),
    param_grid_toss,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=0
)

grid_toss.fit(X_train_toss_scaled, y_train_toss)

print(f"\n Best Parameters: {grid_toss.best_params_}")
print(f" Best CV Score: {grid_toss.best_score_:.4f}")

svm_toss_tuned = grid_toss.best_estimator_
y_pred_toss_tuned = svm_toss_tuned.predict(X_test_toss_scaled)

acc_toss_tuned = accuracy_score(y_test_toss, y_pred_toss_tuned)
precision_toss_tuned = precision_score(y_test_toss, y_pred_toss_tuned, average='weighted')
recall_toss_tuned = recall_score(y_test_toss, y_pred_toss_tuned, average='weighted')
f1_toss_tuned = f1_score(y_test_toss, y_pred_toss_tuned, average='weighted')

print(f"\n Performance Metrics:")
print(f"   Accuracy:  {acc_toss_tuned:.4f}")
print(f"   Precision: {precision_toss_tuned:.4f}")
print(f"   Recall:    {recall_toss_tuned:.4f}")
print(f"   F1-Score:  {f1_toss_tuned:.4f}")

print("\n Confusion Matrix:")
cm_toss = confusion_matrix(y_test_toss, y_pred_toss_tuned)
print(cm_toss)



 MODEL 1: TOSS DECISION PREDICTION (SVM CLASSIFICATION)

 Problem: Predict whether toss winner chooses to BAT or BOWL
Algorithm: Support Vector Machine (SVM) - Classification

 Features used: 4
 Target: Toss Decision (0=Bowl, 1=Bat)
 Class distribution:
  Bat:  1220 (47.1%)
  Bowl: 1371 (52.9%)

 Training samples: 2072
 Testing samples: 519

----------------------------------------------------------------------------------------------------
MODEL 1 - VARIETY 1: SVM with LINEAR KERNEL
----------------------------------------------------------------------------------------------------

 Performance Metrics:
   Accuracy:  0.5299
   Precision: 0.2808
   Recall:    0.5299
   F1-Score:  0.3670

 5-Fold CV Score: 0.5290 (±0.0008)

----------------------------------------------------------------------------------------------------
MODEL 1 - VARIETY 2: SVM with RBF KERNEL
----------------------------------------------------------------------------------------------------

 Performance Metrics

In [None]:
# ############################################################################
# MODEL 2: MATCH WINNER PREDICTION (CLASSIFICATION)
# ############################################################################
print("\n\n" + "="*100)
print(" MODEL 2: MATCH WINNER PREDICTION (SVM CLASSIFICATION)")
print("="*100)

print("\n Problem: Predict which team wins the match")
print("Algorithm: Support Vector Machine (SVM) - Classification")

# Features for match winner
winner_features = [
    'Team1 Runs Scored', 'Team1 Wickets Fell',
    'Team2 Runs Scored', 'Team2 Wickets Fell',
    'Match Venue (Stadium)', 'Match Venue (City)', 'Match Venue (Country)',
    'Toss Winner', 'Toss Winner Choice_bat', 'Toss Winner Choice_bowl'
]

X_winner = df[winner_features].copy()
y_winner = df['Match Winner'].copy()

# Encode categorical variables
le_stadium_2 = LabelEncoder()
le_city_2 = LabelEncoder()
le_country_2 = LabelEncoder()
le_toss_2 = LabelEncoder()
le_match_winner = LabelEncoder()

X_winner['Match Venue (Stadium)'] = le_stadium_2.fit_transform(X_winner['Match Venue (Stadium)'])
X_winner['Match Venue (City)'] = le_city_2.fit_transform(X_winner['Match Venue (City)'])
X_winner['Match Venue (Country)'] = le_country_2.fit_transform(X_winner['Match Venue (Country)'])
X_winner['Toss Winner'] = le_toss_2.fit_transform(X_winner['Toss Winner'])
X_winner['Toss Winner Choice_bat'] = X_winner['Toss Winner Choice_bat'].astype(int)
X_winner['Toss Winner Choice_bowl'] = X_winner['Toss Winner Choice_bowl'].astype(int)

y_winner_encoded = le_match_winner.fit_transform(y_winner)

print(f"\n✓ Features used: {len(winner_features)}")
print(f"✓ Number of teams: {len(np.unique(y_winner_encoded))}")
print(f"✓ Top 5 teams by matches:")
print(df['Match Winner'].value_counts().head())

# Train-test split
X_train_winner, X_test_winner, y_train_winner, y_test_winner = train_test_split(
    X_winner, y_winner_encoded, test_size=0.2, random_state=42
)

# Feature scaling
scaler_winner = StandardScaler()
X_train_winner_scaled = scaler_winner.fit_transform(X_train_winner)
X_test_winner_scaled = scaler_winner.transform(X_test_winner)

print(f"\n✓ Training samples: {X_train_winner.shape[0]}")
print(f"✓ Testing samples: {X_test_winner.shape[0]}")

# ----------------------------------------------------------------------------
# MODEL 2 - VARIETY 1: Linear Kernel
# ----------------------------------------------------------------------------
print("\n" + "-"*100)
print("MODEL 2 - VARIETY 1: SVM with LINEAR KERNEL")
print("-"*100)

svm_winner_linear = SVC(kernel='linear', C=1.0, random_state=42, probability=True)
svm_winner_linear.fit(X_train_winner_scaled, y_train_winner)

y_pred_winner_linear = svm_winner_linear.predict(X_test_winner_scaled)

acc_winner_linear = accuracy_score(y_test_winner, y_pred_winner_linear)
precision_winner_linear = precision_score(y_test_winner, y_pred_winner_linear, average='weighted')
recall_winner_linear = recall_score(y_test_winner, y_pred_winner_linear, average='weighted')
f1_winner_linear = f1_score(y_test_winner, y_pred_winner_linear, average='weighted')

print(f"\n Performance Metrics:")
print(f"   Accuracy:  {acc_winner_linear:.4f}")
print(f"   Precision: {precision_winner_linear:.4f}")
print(f"   Recall:    {recall_winner_linear:.4f}")
print(f"   F1-Score:  {f1_winner_linear:.4f}")

cv_winner_linear = cross_val_score(svm_winner_linear, X_train_winner_scaled, y_train_winner, cv=5)
print(f"\n 5-Fold CV Score: {cv_winner_linear.mean():.4f} (±{cv_winner_linear.std():.4f})")

# ----------------------------------------------------------------------------
# MODEL 2 - VARIETY 2: RBF Kernel
# ----------------------------------------------------------------------------
print("\n" + "-"*100)
print("MODEL 2 - VARIETY 2: SVM with RBF KERNEL")
print("-"*100)

svm_winner_rbf = SVC(kernel='rbf', C=10, gamma='scale', random_state=42, probability=True)
svm_winner_rbf.fit(X_train_winner_scaled, y_train_winner)

y_pred_winner_rbf = svm_winner_rbf.predict(X_test_winner_scaled)

acc_winner_rbf = accuracy_score(y_test_winner, y_pred_winner_rbf)
precision_winner_rbf = precision_score(y_test_winner, y_pred_winner_rbf, average='weighted')
recall_winner_rbf = recall_score(y_test_winner, y_pred_winner_rbf, average='weighted')
f1_winner_rbf = f1_score(y_test_winner, y_pred_winner_rbf, average='weighted')

print(f"\n Performance Metrics:")
print(f"   Accuracy:  {acc_winner_rbf:.4f}")
print(f"   Precision: {precision_winner_rbf:.4f}")
print(f"   Recall:    {recall_winner_rbf:.4f}")
print(f"   F1-Score:  {f1_winner_rbf:.4f}")

cv_winner_rbf = cross_val_score(svm_winner_rbf, X_train_winner_scaled, y_train_winner, cv=5)
print(f"\n 5-Fold CV Score: {cv_winner_rbf.mean():.4f} (±{cv_winner_rbf.std():.4f})")

# ----------------------------------------------------------------------------
# MODEL 2 - VARIETY 3: Hyperparameter Tuning
# ----------------------------------------------------------------------------
print("\n" + "-"*100)
print("MODEL 2 - VARIETY 3: SVM with GRID SEARCH TUNING")
print("-"*100)

param_grid_winner = {
    'C': [1, 10, 100],
    'gamma': ['scale', 'auto'],
    'kernel': ['rbf']
}

print("\n Hyperparameter Grid:")
for param, values in param_grid_winner.items():
    print(f"   {param}: {values}")

grid_winner = GridSearchCV(
    SVC(random_state=42, probability=True),
    param_grid_winner,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=0
)

grid_winner.fit(X_train_winner_scaled, y_train_winner)

print(f"\n✓ Best Parameters: {grid_winner.best_params_}")
print(f"✓ Best CV Score: {grid_winner.best_score_:.4f}")

svm_winner_tuned = grid_winner.best_estimator_
y_pred_winner_tuned = svm_winner_tuned.predict(X_test_winner_scaled)

acc_winner_tuned = accuracy_score(y_test_winner, y_pred_winner_tuned)
precision_winner_tuned = precision_score(y_test_winner, y_pred_winner_tuned, average='weighted')
recall_winner_tuned = recall_score(y_test_winner, y_pred_winner_tuned, average='weighted')
f1_winner_tuned = f1_score(y_test_winner, y_pred_winner_tuned, average='weighted')

print(f"\n Performance Metrics:")
print(f"   Accuracy:  {acc_winner_tuned:.4f}")
print(f"   Precision: {precision_winner_tuned:.4f}")
print(f"   Recall:    {recall_winner_tuned:.4f}")
print(f"   F1-Score:  {f1_winner_tuned:.4f}")

print("\n Confusion Matrix (first 10x10 shown):")
cm_winner = confusion_matrix(y_test_winner, y_pred_winner_tuned)
print(cm_winner[:10, :10])



 MODEL 2: MATCH WINNER PREDICTION (SVM CLASSIFICATION)

 Problem: Predict which team wins the match
Algorithm: Support Vector Machine (SVM) - Classification

✓ Features used: 10
✓ Number of teams: 100
✓ Top 5 teams by matches:
Match Winner
40    140
67    138
62    109
2     100
99     98
Name: count, dtype: int64

✓ Training samples: 2072
✓ Testing samples: 519

----------------------------------------------------------------------------------------------------
MODEL 2 - VARIETY 1: SVM with LINEAR KERNEL
----------------------------------------------------------------------------------------------------

 Performance Metrics:
   Accuracy:  0.1407
   Precision: 0.1281
   Recall:    0.1407
   F1-Score:  0.1083


In [None]:

# ############################################################################
# MODEL 3: TARGET SCORE PREDICTION (REGRESSION)
# ############################################################################
print("\n\n" + "="*100)
print(" MODEL 3: TARGET SCORE PREDICTION (SVM REGRESSION - SVR)")
print("="*100)

print("\n Problem: Predict target score (Team1 Runs Scored)")
print("Algorithm: Support Vector Regression (SVR)")

# Features for target score prediction
score_features = [
    'Team1 Wickets Fell', 'Match Venue (Stadium)', 
    'Match Venue (City)', 'Match Venue (Country)',
    'Toss Winner', 'Toss Winner Choice_bat', 'Toss Winner Choice_bowl'
]

X_score = df[score_features].copy()
y_score = df['Team1 Runs Scored'].copy()

# Encode categorical variables
le_stadium_3 = LabelEncoder()
le_city_3 = LabelEncoder()
le_country_3 = LabelEncoder()
le_toss_3 = LabelEncoder()

X_score['Match Venue (Stadium)'] = le_stadium_3.fit_transform(X_score['Match Venue (Stadium)'])
X_score['Match Venue (City)'] = le_city_3.fit_transform(X_score['Match Venue (City)'])
X_score['Match Venue (Country)'] = le_country_3.fit_transform(X_score['Match Venue (Country)'])
X_score['Toss Winner'] = le_toss_3.fit_transform(X_score['Toss Winner'])
X_score['Toss Winner Choice_bat'] = X_score['Toss Winner Choice_bat'].astype(int)
X_score['Toss Winner Choice_bowl'] = X_score['Toss Winner Choice_bowl'].astype(int)

print(f"\n Features used: {len(score_features)}")
print(f" Target: Team1 Runs Scored")
print(f" Score Range: {y_score.min():.0f} - {y_score.max():.0f}")
print(f" Mean Score: {y_score.mean():.2f} (±{y_score.std():.2f})")

# Train-test split
X_train_score, X_test_score, y_train_score, y_test_score = train_test_split(
    X_score, y_score, test_size=0.2, random_state=42
)

# Feature scaling
scaler_score = StandardScaler()
X_train_score_scaled = scaler_score.fit_transform(X_train_score)
X_test_score_scaled = scaler_score.transform(X_test_score)

print(f"\n Training samples: {X_train_score.shape[0]}")
print(f" Testing samples: {X_test_score.shape[0]}")

# ----------------------------------------------------------------------------
# MODEL 3 - VARIETY 1: Linear Kernel SVR
# ----------------------------------------------------------------------------
print("\n" + "-"*100)
print("MODEL 3 - VARIETY 1: SVR with LINEAR KERNEL")
print("-"*100)

svr_linear = SVR(kernel='linear', C=1.0)
svr_linear.fit(X_train_score_scaled, y_train_score)

y_pred_score_linear = svr_linear.predict(X_test_score_scaled)

mse_linear = mean_squared_error(y_test_score, y_pred_score_linear)
rmse_linear = np.sqrt(mse_linear)
mae_linear = mean_absolute_error(y_test_score, y_pred_score_linear)
r2_linear = r2_score(y_test_score, y_pred_score_linear)

print(f"\n Performance Metrics:")
print(f"   RMSE (Root Mean Squared Error): {rmse_linear:.4f}")
print(f"   MAE (Mean Absolute Error):      {mae_linear:.4f}")
print(f"   R² Score:                       {r2_linear:.4f}")

cv_score_linear = cross_val_score(svr_linear, X_train_score_scaled, y_train_score, 
                                   cv=5, scoring='neg_mean_squared_error')
cv_rmse_linear = np.sqrt(-cv_score_linear.mean())
print(f"\n 5-Fold CV RMSE: {cv_rmse_linear:.4f} (±{np.sqrt(cv_score_linear.std()):.4f})")

# ----------------------------------------------------------------------------
# MODEL 3 - VARIETY 2: RBF Kernel SVR
# ----------------------------------------------------------------------------
print("\n" + "-"*100)
print("MODEL 3 - VARIETY 2: SVR with RBF KERNEL")
print("-"*100)

svr_rbf = SVR(kernel='rbf', C=10, gamma='scale', epsilon=0.1)
svr_rbf.fit(X_train_score_scaled, y_train_score)

y_pred_score_rbf = svr_rbf.predict(X_test_score_scaled)

mse_rbf = mean_squared_error(y_test_score, y_pred_score_rbf)
rmse_rbf = np.sqrt(mse_rbf)
mae_rbf = mean_absolute_error(y_test_score, y_pred_score_rbf)
r2_rbf = r2_score(y_test_score, y_pred_score_rbf)

print(f"\n Performance Metrics:")
print(f"   RMSE (Root Mean Squared Error): {rmse_rbf:.4f}")
print(f"   MAE (Mean Absolute Error):      {mae_rbf:.4f}")
print(f"   R² Score:                       {r2_rbf:.4f}")

cv_score_rbf = cross_val_score(svr_rbf, X_train_score_scaled, y_train_score, 
                                cv=5, scoring='neg_mean_squared_error')
cv_rmse_rbf = np.sqrt(-cv_score_rbf.mean())
print(f"\n 5-Fold CV RMSE: {cv_rmse_rbf:.4f} (±{np.sqrt(cv_score_rbf.std()):.4f})")

# ----------------------------------------------------------------------------
# MODEL 3 - VARIETY 3: Hyperparameter Tuning
# ----------------------------------------------------------------------------
print("\n" + "-"*100)
print("MODEL 3 - VARIETY 3: SVR with GRID SEARCH TUNING")
print("-"*100)

param_grid_score = {
    'C': [1, 10, 100],
    'gamma': ['scale', 'auto', 0.01],
    'epsilon': [0.01, 0.1, 0.2],
    'kernel': ['rbf']
}

print("\n Hyperparameter Grid:")
for param, values in param_grid_score.items():
    print(f"   {param}: {values}")

grid_score = GridSearchCV(
    SVR(),
    param_grid_score,
    cv=5,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=0
)

grid_score.fit(X_train_score_scaled, y_train_score)

print(f"\n Best Parameters: {grid_score.best_params_}")
print(f" Best CV RMSE: {np.sqrt(-grid_score.best_score_):.4f}")

svr_tuned = grid_score.best_estimator_
y_pred_score_tuned = svr_tuned.predict(X_test_score_scaled)

mse_tuned = mean_squared_error(y_test_score, y_pred_score_tuned)
rmse_tuned = np.sqrt(mse_tuned)
mae_tuned = mean_absolute_error(y_test_score, y_pred_score_tuned)
r2_tuned = r2_score(y_test_score, y_pred_score_tuned)

print(f"\n Performance Metrics:")
print(f"   RMSE (Root Mean Squared Error): {rmse_tuned:.4f}")
print(f"   MAE (Mean Absolute Error):      {mae_tuned:.4f}")
print(f"   R² Score:                       {r2_tuned:.4f}")

print("\n Sample Predictions (First 10):")
comparison_df = pd.DataFrame({
    'Actual': y_test_score.values[:10],
    'Predicted': y_pred_score_tuned[:10],
    'Error': y_test_score.values[:10] - y_pred_score_tuned[:10]
})
print(comparison_df.to_string(index=False))

# ############################################################################
# FINAL COMPARISON AND VISUALIZATIONS
# ############################################################################
print("\n\n" + "="*100)
print(" FINAL MODEL COMPARISON")
print("="*100)

# Model 1 Comparison
print("\n MODEL 1: TOSS DECISION PREDICTION")
print("-" * 100)
model1_comparison = pd.DataFrame({
    'Variant': ['Linear SVM', 'RBF SVM', 'Tuned SVM'],
    'Accuracy': [acc_toss_linear, acc_toss_rbf, acc_toss_tuned],
    'F1-Score': [f1_toss_linear, f1_toss_rbf, f1_toss_tuned],
    'CV Score': [cv_toss_linear.mean(), cv_toss_rbf.mean(), grid_toss.best_score_]
})
print(model1_comparison.to_string(index=False))
best_m1 = model1_comparison.loc[model1_comparison['Accuracy'].idxmax()]
print(f"\n Best Model: {best_m1['Variant']} (Accuracy: {best_m1['Accuracy']:.4f})")

# Model 2 Comparison
print("\n MODEL 2: MATCH WINNER PREDICTION")
print("-" * 100)
model2_comparison = pd.DataFrame({
    'Variant': ['Linear SVM', 'RBF SVM', 'Tuned SVM'],
    'Accuracy': [acc_winner_linear, acc_winner_rbf, acc_winner_tuned],
    'F1-Score': [f1_winner_linear, f1_winner_rbf, f1_winner_tuned],
    'CV Score': [cv_winner_linear.mean(), cv_winner_rbf.mean(), grid_winner.best_score_]
})
print(model2_comparison.to_string(index=False))
best_m2 = model2_comparison.loc[model2_comparison['Accuracy'].idxmax()]
print(f"\n Best Model: {best_m2['Variant']} (Accuracy: {best_m2['Accuracy']:.4f})")

# Model 3 Comparison
print("\n MODEL 3: TARGET SCORE PREDICTION")
print("-" * 100)
model3_comparison = pd.DataFrame({
    'Variant': ['Linear SVR', 'RBF SVR', 'Tuned SVR'],
    'RMSE': [rmse_linear, rmse_rbf, rmse_tuned],
    'MAE': [mae_linear, mae_rbf, mae_tuned],
    'R² Score': [r2_linear, r2_rbf, r2_tuned],
    'CV RMSE': [cv_rmse_linear, cv_rmse_rbf, np.sqrt(-grid_score.best_score_)]
})
print(model3_comparison.to_string(index=False))
best_m3 = model3_comparison.loc[model3_comparison['RMSE'].idxmin()]
print