In [3]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from scipy.stats import loguniform
import warnings
warnings.filterwarnings('ignore')

In [4]:
# ========================
# 1. Data Loading & Preprocessing
# ========================

# Load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data"
columns = ['letter', 'x-box', 'y-box', 'width', 'height', 'onpix', 'x-bar',
           'y-bar', 'x2bar', 'y2bar', 'xybar', 'x2ybr', 'xy2br', 'x-ege',
           'xegvy', 'y-ege', 'yegvx']
data = pd.read_csv(url, header=None, names=columns)

# Convert letters to numerical labels
le = LabelEncoder()
data['label'] = le.fit_transform(data['letter'])

# Split features and target
X = data.drop(['letter', 'label'], axis=1)
y = data['label']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:
# ========================
# 2. Create 10 Data Splits
# ========================

splits = []
for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.3, random_state=i
    )
    splits.append((X_train, X_test, y_train, y_test))

In [None]:
# ========================
# 3. SVM Optimization
# ========================

# Define parameter space
param_dist = {
    'kernel': ['linear', 'rbf', 'poly'],
    'C': loguniform(1e0, 1e2),
    'gamma': loguniform(1e-4, 1e-1)
}

results = []
best_sample_data = None
max_accuracy = 0

for sample_num in range(10):
    print(f"\nProcessing Sample #{sample_num+1}")
    X_train, X_test, y_train, y_test = splits[sample_num]

    # Run randomized search
    search = RandomizedSearchCV(
        SVC(),
        param_distributions=param_dist,
        n_iter=100,
        cv=5,
        random_state=42,
        n_jobs=-1
    )
    search.fit(X_train, y_train)

    # Store results
    best_acc = search.best_score_ * 100
    best_params = search.best_params_

    results.append({
        'Sample #': f"S{sample_num+1}",
        'Best Accuracy': f"{best_acc:.2f}%",
        'Kernel': best_params['kernel'],
        'Nu': f"{best_params['C']:.3f}",
        'Epsilon': f"{best_params['gamma']:.5f}"
    })

    # Track best sample for convergence plot
    if best_acc > max_accuracy:
        max_accuracy = best_acc
        best_sample_data = {
            'cv_results': search.cv_results_,
            'sample_num': sample_num+1
        }



Processing Sample #1

Processing Sample #2

Processing Sample #3

Processing Sample #4

Processing Sample #5

Processing Sample #6

Processing Sample #7

Processing Sample #8


In [None]:
# ========================
# 4. Generate Results Table
# ========================

results_df = pd.DataFrame(results)
print("\nFinal Results Table:")
print(results_df[['Sample #', 'Best Accuracy', 'Kernel', 'Nu', 'Epsilon']])

In [None]:
# ========================
# 5. Convergence Graph
# ========================

if best_sample_data:
    # Extract cumulative maximum accuracy
    scores = best_sample_data['cv_results']['mean_test_score']
    iterations = range(1, 101)
    cum_max = np.maximum.accumulate(scores)

    # Plot settings
    plt.figure(figsize=(10, 6))
    plt.plot(iterations, cum_max, marker='o', linestyle='--')
    plt.title(f"Convergence Graph for Best Sample (S{best_sample_data['sample_num']})")
    plt.xlabel('Iteration')
    plt.ylabel('Best Accuracy')
    plt.grid(True)
    plt.savefig('convergence_graph.png')
    plt.show()

In [None]:
# ========================
# 6. Save Results
# ========================

# Save results to CSV
results_df.to_csv('svm_optimization_results.csv', index=False)

# Save convergence graph
plt.savefig('convergence_graph.png')

print("\nAll results saved successfully!")