# *Question.*
1. Construct an SVM model using the data voice.csv under the following conditions,

    a. Split the data using ratios of 70:30 and 80:20 for each model to be developed.
        - Use a model with a linear kernel.
        - Use a model with a polynomial kernel.
        - Use a model with an RBF kernel.
    b. Tabulate the performance of each split and kernel based on the accuracy metric.
2. Use the data from practical session 5 to develop a daytime and nighttime classification model using an SVM with an RBF kernel employing histogram features. Use an 80:20 ratio. You may experiment with hyperparameter tuning of the RBF kernel. Record the accuracy performance!

## **Task 1**

### Step 1 - Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

### Step 2 - Voice Classification

In [2]:
print("=" * 80)
print("TASK 1: VOICE CLASSIFICATION USING SVM")
print("=" * 80)

# Step 0: Load the dataset
print("\n[STEP 0] Loading voice.csv dataset...")
df = pd.read_csv("voice.csv")

print(f"Dataset shape: {df.shape}")
print(f"Number of features: {df.shape[1] - 1}")
print(f"Target column: '{df.columns[-1]}'")
print(f"\nFirst 5 rows:")
print(df.head())
print(f"\nClass distribution:")
print(df.iloc[:, -1].value_counts())

# Prepare features (X) and target (y)
X = df.iloc[:, :-1]  # All columns except the last one
y = df.iloc[:, -1]  # Last column (target variable)

print(f"\nFeatures shape: {X.shape}")
print(f"Target shape: {y.shape}")

TASK 1: VOICE CLASSIFICATION USING SVM

[STEP 0] Loading voice.csv dataset...
Dataset shape: (3168, 21)
Number of features: 20
Target column: 'label'

First 5 rows:
   meanfreq        sd    median       Q25       Q75       IQR       skew  \
0  0.059781  0.064241  0.032027  0.015071  0.090193  0.075122  12.863462   
1  0.066009  0.067310  0.040229  0.019414  0.092666  0.073252  22.423285   
2  0.077316  0.083829  0.036718  0.008701  0.131908  0.123207  30.757155   
3  0.151228  0.072111  0.158011  0.096582  0.207955  0.111374   1.232831   
4  0.135120  0.079146  0.124656  0.078720  0.206045  0.127325   1.101174   

          kurt    sp.ent       sfm  ...  centroid   meanfun    minfun  \
0   274.402906  0.893369  0.491918  ...  0.059781  0.084279  0.015702   
1   634.613855  0.892193  0.513724  ...  0.066009  0.107937  0.015826   
2  1024.927705  0.846389  0.478905  ...  0.077316  0.098706  0.015656   
3     4.177296  0.963322  0.727232  ...  0.151228  0.088965  0.017798   
4     4.33371

## **Task A Split Ratio 70:30**

In [3]:
# Split the data 70:30
X_train_70, X_test_70, y_train_70, y_test_70 = train_test_split(
    X, y, train_size=0.7, test_size=0.3, random_state=42, stratify=y
)

print(f"\nTraining samples: {len(X_train_70)}")
print(f"Testing samples: {len(X_test_70)}")

# Feature Scaling (IMPORTANT for SVM!)
scaler_70 = StandardScaler()
X_train_70_scaled = scaler_70.fit_transform(X_train_70)
X_test_70_scaled = scaler_70.transform(X_test_70)

print("\n‚úì Data split completed")
print("‚úì Feature scaling applied (StandardScaler)")


Training samples: 2217
Testing samples: 951

‚úì Data split completed
‚úì Feature scaling applied (StandardScaler)


#### **a. Linear Kernel**

In [4]:
# Create and train SVM with linear kernel
svm_linear_70 = SVC(kernel="linear", random_state=42)
print("\nTraining SVM with linear kernel...")
svm_linear_70.fit(X_train_70_scaled, y_train_70)
print("‚úì Training completed")

# Make predictions
y_pred_linear_70 = svm_linear_70.predict(X_test_70_scaled)

# Calculate accuracy
accuracy_linear_70 = accuracy_score(y_test_70, y_pred_linear_70)
print(f"\nAccuracy: {accuracy_linear_70:.4f} ({accuracy_linear_70 * 100:.2f}%)")

print("\nConfusion Matrix:")
cm = confusion_matrix(y_test_70, y_pred_linear_70)
print(cm)

print("\nClassification Report:")
print(classification_report(y_test_70, y_pred_linear_70))


Training SVM with linear kernel...
‚úì Training completed

Accuracy: 0.9790 (97.90%)

Confusion Matrix:
[[463  13]
 [  7 468]]

Classification Report:
              precision    recall  f1-score   support

      female       0.99      0.97      0.98       476
        male       0.97      0.99      0.98       475

    accuracy                           0.98       951
   macro avg       0.98      0.98      0.98       951
weighted avg       0.98      0.98      0.98       951



#### **b. Polynomial Kernel**

In [5]:
# Create and train SVM with polynomial kernel (degree=3 is default)
svm_poly_70 = SVC(kernel="poly", degree=3, random_state=42)
print("\nTraining SVM with polynomial kernel (degree=3)...")
svm_poly_70.fit(X_train_70_scaled, y_train_70)
print("‚úì Training completed")

# Make predictions
y_pred_poly_70 = svm_poly_70.predict(X_test_70_scaled)

# Calculate accuracy
accuracy_poly_70 = accuracy_score(y_test_70, y_pred_poly_70)
print(f"\nAccuracy: {accuracy_poly_70:.4f} ({accuracy_poly_70 * 100:.2f}%)")

print("\nConfusion Matrix:")
cm = confusion_matrix(y_test_70, y_pred_poly_70)
print(cm)

print("\nClassification Report:")
print(classification_report(y_test_70, y_pred_poly_70))


Training SVM with polynomial kernel (degree=3)...
‚úì Training completed

Accuracy: 0.9600 (96.00%)

Confusion Matrix:
[[443  33]
 [  5 470]]

Classification Report:
              precision    recall  f1-score   support

      female       0.99      0.93      0.96       476
        male       0.93      0.99      0.96       475

    accuracy                           0.96       951
   macro avg       0.96      0.96      0.96       951
weighted avg       0.96      0.96      0.96       951



#### **c. RBF Kernel**

In [6]:
# Create and train SVM with RBF kernel
svm_rbf_70 = SVC(kernel="rbf", random_state=42)
print("\nTraining SVM with RBF kernel...")
svm_rbf_70.fit(X_train_70_scaled, y_train_70)
print("‚úì Training completed")

# Make predictions
y_pred_rbf_70 = svm_rbf_70.predict(X_test_70_scaled)

# Calculate accuracy
accuracy_rbf_70 = accuracy_score(y_test_70, y_pred_rbf_70)
print(f"\nAccuracy: {accuracy_rbf_70:.4f} ({accuracy_rbf_70 * 100:.2f}%)")

print("\nConfusion Matrix:")
cm = confusion_matrix(y_test_70, y_pred_rbf_70)
print(cm)

print("\nClassification Report:")
print(classification_report(y_test_70, y_pred_rbf_70))



Training SVM with RBF kernel...
‚úì Training completed

Accuracy: 0.9832 (98.32%)

Confusion Matrix:
[[467   9]
 [  7 468]]

Classification Report:
              precision    recall  f1-score   support

      female       0.99      0.98      0.98       476
        male       0.98      0.99      0.98       475

    accuracy                           0.98       951
   macro avg       0.98      0.98      0.98       951
weighted avg       0.98      0.98      0.98       951



## **Task B Split Ratio 80:20**

In [7]:
# Split the data 80:20
X_train_80, X_test_80, y_train_80, y_test_80 = train_test_split(
    X, y, train_size=0.8, test_size=0.2, random_state=42, stratify=y
)

print(f"\nTraining samples: {len(X_train_80)}")
print(f"Testing samples: {len(X_test_80)}")

# Feature Scaling
scaler_80 = StandardScaler()
X_train_80_scaled = scaler_80.fit_transform(X_train_80)
X_test_80_scaled = scaler_80.transform(X_test_80)

print("\n‚úì Data split completed")
print("‚úì Feature scaling applied (StandardScaler)")


Training samples: 2534
Testing samples: 634

‚úì Data split completed
‚úì Feature scaling applied (StandardScaler)


#### **a. Linear Kernel**

In [8]:
# Create and train SVM with linear kernel
svm_linear_80 = SVC(kernel="linear", random_state=42)
print("\nTraining SVM with linear kernel...")
svm_linear_80.fit(X_train_80_scaled, y_train_80)
print("‚úì Training completed")

# Make predictions
y_pred_linear_80 = svm_linear_80.predict(X_test_80_scaled)

# Calculate accuracy
accuracy_linear_80 = accuracy_score(y_test_80, y_pred_linear_80)
print(f"\nAccuracy: {accuracy_linear_80:.4f} ({accuracy_linear_80 * 100:.2f}%)")

print("\nConfusion Matrix:")
cm = confusion_matrix(y_test_80, y_pred_linear_80)
print(cm)

print("\nClassification Report:")
print(classification_report(y_test_80, y_pred_linear_80))


Training SVM with linear kernel...
‚úì Training completed

Accuracy: 0.9748 (97.48%)

Confusion Matrix:
[[306  11]
 [  5 312]]

Classification Report:
              precision    recall  f1-score   support

      female       0.98      0.97      0.97       317
        male       0.97      0.98      0.97       317

    accuracy                           0.97       634
   macro avg       0.97      0.97      0.97       634
weighted avg       0.97      0.97      0.97       634



#### **b. Polynomial Kernel**

In [9]:
# Create and train SVM with polynomial kernel
svm_poly_80 = SVC(kernel="poly", degree=3, random_state=42)
print("\nTraining SVM with polynomial kernel (degree=3)...")
svm_poly_80.fit(X_train_80_scaled, y_train_80)
print("‚úì Training completed")

# Make predictions
y_pred_poly_80 = svm_poly_80.predict(X_test_80_scaled)

# Calculate accuracy
accuracy_poly_80 = accuracy_score(y_test_80, y_pred_poly_80)
print(f"\nAccuracy: {accuracy_poly_80:.4f} ({accuracy_poly_80 * 100:.2f}%)")

print("\nConfusion Matrix:")
cm = confusion_matrix(y_test_80, y_pred_poly_80)
print(cm)

print("\nClassification Report:")
print(classification_report(y_test_80, y_pred_poly_80))


Training SVM with polynomial kernel (degree=3)...
‚úì Training completed

Accuracy: 0.9558 (95.58%)

Confusion Matrix:
[[292  25]
 [  3 314]]

Classification Report:
              precision    recall  f1-score   support

      female       0.99      0.92      0.95       317
        male       0.93      0.99      0.96       317

    accuracy                           0.96       634
   macro avg       0.96      0.96      0.96       634
weighted avg       0.96      0.96      0.96       634



#### **c. RBF Kernel**

In [10]:
# Create and train SVM with RBF kernel
svm_rbf_80 = SVC(kernel="rbf", random_state=42)
print("\nTraining SVM with RBF kernel...")
svm_rbf_80.fit(X_train_80_scaled, y_train_80)
print("‚úì Training completed")

# Make predictions
y_pred_rbf_80 = svm_rbf_80.predict(X_test_80_scaled)

# Calculate accuracy
accuracy_rbf_80 = accuracy_score(y_test_80, y_pred_rbf_80)
print(f"\nAccuracy: {accuracy_rbf_80:.4f} ({accuracy_rbf_80 * 100:.2f}%)")

print("\nConfusion Matrix:")
cm = confusion_matrix(y_test_80, y_pred_rbf_80)
print(cm)

print("\nClassification Report:")
print(classification_report(y_test_80, y_pred_rbf_80))


Training SVM with RBF kernel...
‚úì Training completed

Accuracy: 0.9826 (98.26%)

Confusion Matrix:
[[310   7]
 [  4 313]]

Classification Report:
              precision    recall  f1-score   support

      female       0.99      0.98      0.98       317
        male       0.98      0.99      0.98       317

    accuracy                           0.98       634
   macro avg       0.98      0.98      0.98       634
weighted avg       0.98      0.98      0.98       634



### **Task C Performance Comparison Table**

In [11]:
print("\n\n" + "=" * 80)
print("SECTION 1.3: PERFORMANCE COMPARISON TABLE")
print("=" * 80)

# Create results table
results = [
    {
        "Split Ratio": "70:30",
        "Kernel": "Linear",
        "Accuracy": accuracy_linear_70,
        "Accuracy (%)": f"{accuracy_linear_70 * 100:.2f}%",
    },
    {
        "Split Ratio": "70:30",
        "Kernel": "Polynomial",
        "Accuracy": accuracy_poly_70,
        "Accuracy (%)": f"{accuracy_poly_70 * 100:.2f}%",
    },
    {
        "Split Ratio": "70:30",
        "Kernel": "RBF",
        "Accuracy": accuracy_rbf_70,
        "Accuracy (%)": f"{accuracy_rbf_70 * 100:.2f}%",
    },
    {
        "Split Ratio": "80:20",
        "Kernel": "Linear",
        "Accuracy": accuracy_linear_80,
        "Accuracy (%)": f"{accuracy_linear_80 * 100:.2f}%",
    },
    {
        "Split Ratio": "80:20",
        "Kernel": "Polynomial",
        "Accuracy": accuracy_poly_80,
        "Accuracy (%)": f"{accuracy_poly_80 * 100:.2f}%",
    },
    {
        "Split Ratio": "80:20",
        "Kernel": "RBF",
        "Accuracy": accuracy_rbf_80,
        "Accuracy (%)": f"{accuracy_rbf_80 * 100:.2f}%",
    },
]

results_df = pd.DataFrame(results)



SECTION 1.3: PERFORMANCE COMPARISON TABLE


In [12]:
print("\nCOMPREHENSIVE ACCURACY COMPARISON:")
print("-" * 80)
print(results_df.to_string(index=False))
print("-" * 80)

# Find best performing model
best_idx = results_df["Accuracy"].idxmax()
best_model = results_df.iloc[best_idx]

print(f"\nüèÜ BEST MODEL:")
print(f"   Split Ratio: {best_model['Split Ratio']}")
print(f"   Kernel: {best_model['Kernel']}")
print(f"   Accuracy: {best_model['Accuracy (%)']}")

# Summary statistics
print(f"\nüìä SUMMARY STATISTICS:")
print(
    f"   Average Accuracy: {results_df['Accuracy'].mean():.4f} ({results_df['Accuracy'].mean() * 100:.2f}%)"
)
print(
    f"   Best Accuracy: {results_df['Accuracy'].max():.4f} ({results_df['Accuracy'].max() * 100:.2f}%)"
)
print(
    f"   Worst Accuracy: {results_df['Accuracy'].min():.4f} ({results_df['Accuracy'].min() * 100:.2f}%)"
)
print(f"   Std Deviation: {results_df['Accuracy'].std():.4f}")



COMPREHENSIVE ACCURACY COMPARISON:
--------------------------------------------------------------------------------
Split Ratio     Kernel  Accuracy Accuracy (%)
      70:30     Linear  0.978970       97.90%
      70:30 Polynomial  0.960042       96.00%
      70:30        RBF  0.983176       98.32%
      80:20     Linear  0.974763       97.48%
      80:20 Polynomial  0.955836       95.58%
      80:20        RBF  0.982650       98.26%
--------------------------------------------------------------------------------

üèÜ BEST MODEL:
   Split Ratio: 70:30
   Kernel: RBF
   Accuracy: 98.32%

üìä SUMMARY STATISTICS:
   Average Accuracy: 0.9726 (97.26%)
   Best Accuracy: 0.9832 (98.32%)
   Worst Accuracy: 0.9558 (95.58%)
   Std Deviation: 0.0118


## Task 2 Day/Night Image Classification Using SVM-RBF With Histogram Features

In [13]:
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd

### Step 1 - Feature Extraction

In [14]:
def extract_histogram_features(image_path, bins=256):
    """
    Extract color histogram features from an image.

    Parameters:
    -----------
    image_path : str
        Path to the image file
    bins : int
        Number of bins for the histogram (default=256)

    Returns:
    --------
    features : numpy array
        Flattened histogram features (bins x 3 channels = 768 features)
    """
    # Read image
    img = cv2.imread(image_path)

    if img is None:
        print(f"Warning: Could not read {image_path}")
        return None

    # Calculate histogram for each color channel (B, G, R)
    features = []

    for channel in range(3):  # 3 channels: Blue, Green, Red
        # Calculate histogram for this channel
        hist = cv2.calcHist([img], [channel], None, [bins], [0, 256])

        # Flatten the histogram
        hist = hist.flatten()

        # Normalize the histogram (important!)
        hist = hist / (hist.sum() + 1e-7)  # Add small value to avoid division by zero

        # Add to features
        features.extend(hist)

    return np.array(features)


### Step 2 - Load Image Datasets - Split Dataset

In [23]:
def load_image_dataset(base_dir, dataset_type="training", bins=256):
    """
    Load images from 'day' and 'night' subdirectories under training or test folders,
    and extract histogram features.

    Parameters:
    -----------
    base_dir : str
        Base directory containing 'training' and 'test' folders
    dataset_type : str
        Specify 'training' or 'test' to choose which dataset to load
    bins : int
        Number of histogram bins for feature extraction

    Returns:
    --------
    X : numpy array
        Feature matrix (n_samples, n_features)
    y : numpy array
        Labels (0 = day, 1 = night)
    """
    import os
    import numpy as np

    X, y = [], []

    data_dir = os.path.join(base_dir, dataset_type)
    print(f"\n[STEP 2] Loading {dataset_type.upper()} dataset from: {data_dir}")

    # Load DAY images (label = 0)
    day_dir = os.path.join(data_dir, "day")
    print(f"\nLoading images from: {day_dir}")
    if os.path.exists(day_dir):
        day_images = [
            f
            for f in os.listdir(day_dir)
            if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp"))
        ]
        print(f"Found {len(day_images)} day images")

        for idx, img_file in enumerate(day_images, 1):
            img_path = os.path.join(day_dir, img_file)
            features = extract_histogram_features(img_path, bins)
            if features is not None:
                X.append(features)
                y.append(0)
            if idx % 50 == 0:
                print(f"  Processed {idx}/{len(day_images)} day images...")
    else:
        print(f"ERROR: Directory not found: {day_dir}")

    # Load NIGHT images (label = 1)
    night_dir = os.path.join(data_dir, "night")
    print(f"\nLoading images from: {night_dir}")
    if os.path.exists(night_dir):
        night_images = [
            f
            for f in os.listdir(night_dir)
            if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp"))
        ]
        print(f"Found {len(night_images)} night images")

        for idx, img_file in enumerate(night_images, 1):
            img_path = os.path.join(night_dir, img_file)
            features = extract_histogram_features(img_path, bins)
            if features is not None:
                X.append(features)
                y.append(1)
            if idx % 50 == 0:
                print(f"  Processed {idx}/{len(night_images)} night images...")
    else:
        print(f"ERROR: Directory not found: {night_dir}")

    return np.array(X), np.array(y)


# ===============================
# Load both TRAINING and TEST datasets
# ===============================
BASE_DIR = "images"  # your main folder inside Week11

print("\n" + "=" * 80)
print("LOADING TRAINING DATASET")
print("=" * 80)
X_train, y_train = load_image_dataset(BASE_DIR, "training")

print("\n" + "=" * 80)
print("LOADING TEST DATASET")
print("=" * 80)
X_test, y_test = load_image_dataset(BASE_DIR, "test")


# Dataset Summary
def dataset_summary(X, y, name):
    print(f"\n‚úì {name.upper()} dataset loaded successfully!")
    print(f"  Total images: {len(X)}")
    print(f"  Day images (label=0): {np.sum(y == 0)}")
    print(f"  Night images (label=1): {np.sum(y == 1)}")
    print(
        f"  Class balance: {np.sum(y == 0) / len(y) * 100:.1f}% day, {np.sum(y == 1) / len(y) * 100:.1f}% night"
    )


dataset_summary(X_train, y_train, "Training")
dataset_summary(X_test, y_test, "Test")



LOADING TRAINING DATASET

[STEP 2] Loading TRAINING dataset from: images/training

Loading images from: images/training/day
Found 120 day images
  Processed 50/120 day images...
  Processed 100/120 day images...

Loading images from: images/training/night
Found 120 night images
  Processed 50/120 night images...
  Processed 100/120 night images...

LOADING TEST DATASET

[STEP 2] Loading TEST dataset from: images/test

Loading images from: images/test/day
Found 80 day images
  Processed 50/80 day images...

Loading images from: images/test/night
Found 80 night images
  Processed 50/80 night images...


: 

### Step 3 - Split Datasets

In [21]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.8, test_size=0.2, random_state=42, stratify=y
)

print(f"\nTraining set: {len(X_train)} images ({len(X_train) / len(X) * 100:.1f}%)")
print(f"  Day: {np.sum(y_train == 0)}")
print(f"  Night: {np.sum(y_train == 1)}")

print(f"\nTesting set: {len(X_test)} images ({len(X_test) / len(X) * 100:.1f}%)")
print(f"  Day: {np.sum(y_test == 0)}")
print(f"  Night: {np.sum(y_test == 1)}")

print("\n‚úì Data split completed with stratification (maintains class balance)")

ValueError: With n_samples=0, test_size=0.2 and train_size=0.8, the resulting train set will be empty. Adjust any of the aforementioned parameters.

### Step 4 - Feature Scaling

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("\n‚úì Features standardized using StandardScaler")
print(f"  Mean: ~0, Std: ~1 for each feature")

### Step 5 - Baseline Model

In [None]:
# Train baseline model
print("\nTraining baseline SVM-RBF model...")
svm_baseline = SVC(kernel="rbf", random_state=42)
svm_baseline.fit(X_train_scaled, y_train)
print("‚úì Training completed")

# Predictions
y_pred_baseline = svm_baseline.predict(X_test_scaled)

# Accuracy
accuracy_baseline = accuracy_score(y_test, y_pred_baseline)
print(f"\nBaseline Accuracy: {accuracy_baseline:.4f} ({accuracy_baseline * 100:.2f}%)")

print("\nConfusion Matrix:")
cm_baseline = confusion_matrix(y_test, y_pred_baseline)
print("              Predicted")
print("              Day  Night")
print(f"Actual Day    {cm_baseline[0][0]:3}   {cm_baseline[0][1]:3}")
print(f"       Night  {cm_baseline[1][0]:3}   {cm_baseline[1][1]:3}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred_baseline, target_names=["Day", "Night"]))

### Step 6 - Hyper Parameter Tuning

In [None]:
# Define parameter grid
C_values = [0.1, 1, 10, 100]
gamma_values = [0.001, 0.01, 0.1, 1, "scale", "auto"]

print(f"\nGrid Search:")
print(f"  C values: {C_values}")
print(f"  gamma values: {gamma_values}")
print(f"  Total combinations: {len(C_values) * len(gamma_values)}")

# Store results
tuning_results = []
best_accuracy = 0
best_params = {}
best_model = None

print("\n" + "-" * 80)
print("Testing combinations...")
print("-" * 80)

for C in C_values:
    for gamma in gamma_values:
        # Train model
        svm = SVC(kernel="rbf", C=C, gamma=gamma, random_state=42)
        svm.fit(X_train_scaled, y_train)

        # Predict
        y_pred = svm.predict(X_test_scaled)

        # Calculate accuracy
        accuracy = accuracy_score(y_test, y_pred)

        # Store results
        tuning_results.append({
            "C": C,
            "gamma": str(gamma),
            "Accuracy": accuracy,
            "Accuracy (%)": f"{accuracy * 100:.2f}%",
        })

        # Print result
        print(
            f"C={C:6}, gamma={str(gamma):8} ‚Üí Accuracy: {accuracy:.4f} ({accuracy * 100:.2f}%)"
        )

        # Track best model
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = {"C": C, "gamma": gamma}
            best_model = svm
print("-" * 80)

### Step 7 - Best Model Evaluation

In [None]:
print("\n" + "=" * 80)
print("STEP 7: BEST MODEL EVALUATION")
print("=" * 80)

print(f"\nBEST HYPERPARAMETERS:")
print(f"   C: {best_params['C']}")
print(f"   gamma: {best_params['gamma']}")
print(f"   Best Accuracy: {best_accuracy:.4f} ({best_accuracy * 100:.2f}%)")

# Detailed evaluation of best model
y_pred_best = best_model.predict(X_test_scaled)

print("\n" + "-" * 80)
print("CONFUSION MATRIX (Best Model):")
print("-" * 80)
cm_best = confusion_matrix(y_test, y_pred_best)
print("              Predicted")
print("              Day  Night")
print(f"Actual Day    {cm_best[0][0]:3}   {cm_best[0][1]:3}")
print(f"       Night  {cm_best[1][0]:3}   {cm_best[1][1]:3}")

print("\n" + "-" * 80)
print("CLASSIFICATION REPORT (Best Model):")
print("-" * 80)
print(classification_report(y_test, y_pred_best, target_names=["Day", "Night"]))


### Step 8 - Performance Comparison 

In [None]:
comparison = pd.DataFrame([
    {
        "Model": "Baseline (default)",
        "C": 1.0,
        "gamma": "scale",
        "Accuracy": accuracy_baseline,
        "Accuracy (%)": f"{accuracy_baseline * 100:.2f}%",
    },
    {
        "Model": "Best (tuned)",
        "C": best_params["C"],
        "gamma": best_params["gamma"],
        "Accuracy": best_accuracy,
        "Accuracy (%)": f"{best_accuracy * 100:.2f}%",
    },
])

print("\n" + "-" * 80)
print(comparison.to_string(index=False))
print("-" * 80)

improvement = (best_accuracy - accuracy_baseline) * 100
print(f"\nüìà Improvement from tuning: {improvement:+.2f} percentage points")