In [2]:
#Using K-Fold cross validation on a9a dataset.
#adult data. uci: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#a9a

In [5]:
from sklearn.datasets import load_svmlight_file
from sklearn import svm
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score


In [4]:
X,y = load_svmlight_file("a9a.txt")



In [6]:
clf = svm.SVC(kernel='linear', C=1, random_state= 42) 


In [7]:
print("Training classifier with cross validation, k=5")
scores = cross_val_score(clf, X, y, cv=5)
print("Training Complete!")
acc = scores.mean() 
stdiv = scores.std()
print("Cross Validation Mean Accuracy = %0.2f" % acc )
print("Standard Deviation of the Mean Accuracy across all runs = %0.2f" % stdiv)

Training classifier with cross validation, k=5
Training Complete!
Cross Validation Mean Accuracy = 0.85
Standard Deviation of the Mean Accuracy across all runs = 0.00


In [8]:
clf = svm.SVC(kernel='rbf', C=1,gamma=0.1, random_state= 42) #added gamma 0.1
print("Training classifier with cross validation, k=5")
scores = cross_val_score(clf, X, y, cv=5)
print("Training Complete!")
acc = scores.mean() 
stdiv = scores.std()
print("Cross Validation Mean Accuracy = %0.2f" % acc )
print("Standard Deviation of the Mean Accuracy across all runs = %0.2f" % stdiv)

Training classifier with cross validation, k=5
Training Complete!
Cross Validation Mean Accuracy = 0.85
Standard Deviation of the Mean Accuracy across all runs = 0.00


In [9]:
clf = svm.SVC(kernel='rbf', C=1, gamma=0.01, random_state= 42) #added gamma 0.1
print("Training classifier with cross validation, k=5")
scores = cross_val_score(clf, X, y, cv=5)
print("Training Complete!")
acc = scores.mean() 
stdiv = scores.std()
print("Cross Validation Mean Accuracy = %0.2f" % acc )
print("Standard Deviation of the Mean Accuracy across all runs = %0.2f" % stdiv)

Training classifier with cross validation, k=5
Training Complete!
Cross Validation Mean Accuracy = 0.84
Standard Deviation of the Mean Accuracy across all runs = 0.00


In [10]:
clf = svm.SVC(kernel='poly', C=1, degree=2, random_state= 42) #added gamma 0.1
print("Training classifier with cross validation, k=5")
scores = cross_val_score(clf, X, y, cv=5)
print("Training Complete!")
acc = scores.mean() 
stdiv = scores.std()
print("Cross Validation Mean Accuracy = %0.2f" % acc )
print("Standard Deviation of the Mean Accuracy across all runs = %0.2f" % stdiv)

Training classifier with cross validation, k=5
Training Complete!
Cross Validation Mean Accuracy = 0.85
Standard Deviation of the Mean Accuracy across all runs = 0.00


In [13]:
X,y = load_svmlight_file("a1a.txt")

In [14]:
param_grid = [ 
 {'C': [1, 10], 'kernel': ['linear']}, 
 {'C': [1, 10], 'gamma': [0.001, 0.01], 'kernel': ['rbf']}, 
 ]
print("Creating classifier object...")
svc = svm.SVC()
print("Creating a grid search cross validator object...")
clf = GridSearchCV(svc, param_grid)
print("Fitting the models with different parameters...")
clf.fit(X, y)
print("Writing all fitting results...")
df = pd.DataFrame(clf.cv_results_)
df.to_csv("Parameter_Tuning_Results.csv")


Creating classifier object...
Creating a grid search cross validator object...
Fitting the models with different parameters...
Writing all fitting results...


In [15]:
param_grid = [
    {'C': [1, 10], 'kernel': ['linear']},
    {'C': [1, 10], 'gamma': [0.001, 0.01], 'kernel': ['rbf']},
    {'C': [0.01, 0.1, 1, 10], 'degree': [2, 3], 'kernel': ['poly']}
]

print("Creating classifier object...")
svc = svm.SVC()

print("Creating a grid search cross validator object...")
clf = GridSearchCV(svc, param_grid)

print("Fitting the models with different parameters...")
clf.fit(X, y)

print("Writing all fitting results...")
df = pd.DataFrame(clf.cv_results_)
df.to_csv("Parameter_Tuning_Results.csv")

print("Extracting testing errors for polynomial kernel parameters...")
poly_results = df[df['param_kernel'] == 'poly'][['param_C', 'param_degree', 'mean_test_score']]
poly_results['test_error'] = 1 - poly_results['mean_test_score']
print(poly_results[['param_C', 'param_degree', 'test_error']])

print("Ranking different parameter sets based on accuracy...")
sorted_results = df.sort_values(by='mean_test_score', ascending=False)
for rank, (_, row) in enumerate(sorted_results.iterrows(), start=1):
    params = {key: row[key] for key in df.columns if key.startswith('param_')}
    mean_accuracy = row['mean_test_score']
    std_dev = row['std_test_score']
    print(f"Rank {rank}: {params}, Mean Test Accuracy={mean_accuracy}, Mean StdDev={std_dev}")


Creating classifier object...
Creating a grid search cross validator object...
Fitting the models with different parameters...
Writing all fitting results...
Extracting testing errors for polynomial kernel parameters...
    param_C  param_degree  test_error
6      0.01           2.0    0.246106
7      0.01           3.0    0.246106
8      0.10           2.0    0.186293
9      0.10           3.0    0.191900
10     1.00           2.0    0.168224
11     1.00           3.0    0.169470
12    10.00           2.0    0.188785
13    10.00           3.0    0.203115
Ranking different parameter sets based on accuracy...
Rank 1: {'param_C': 1.0, 'param_kernel': 'poly', 'param_gamma': nan, 'param_degree': 2.0}, Mean Test Accuracy=0.8317757009345794, Mean StdDev=0.024922118380062308
Rank 2: {'param_C': 1.0, 'param_kernel': 'poly', 'param_gamma': nan, 'param_degree': 3.0}, Mean Test Accuracy=0.8305295950155763, Mean StdDev=0.025809738538576925
Rank 3: {'param_C': 10.0, 'param_kernel': 'linear', 'param

In [None]:
from sklearn.datasets import load_svmlight_file
from sklearn import svm
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold

print("Loading Dataset...")
X, y = load_svmlight_file("a1a.txt")
X = X.toarray()  # Convert sparse matrix to dense for splitting

K = 5
kf = KFold(n_splits=K, shuffle=True, random_state=42)

models = [
    {"kernel": "linear", "gamma": "scale"},
    {"kernel": "rbf", "gamma": 0.01},
    {"kernel": "rbf", "gamma": 0.001}
]

for model_params in models:
    print(f"Running K-Fold Cross Validation for SVM with {model_params}")
    accuracies = []
    
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        model = svm.SVC(kernel=model_params["kernel"], gamma=model_params["gamma"], C=1)
        model.fit(X_train, y_train)
        accuracy = model.score(X_test, y_test)
        accuracies.append(accuracy)
    
    avg_accuracy = np.mean(accuracies)
    print(f"Average Accuracy for {model_params}: {avg_accuracy}")




Loading Dataset...
Running K-Fold Cross Validation for SVM with {'kernel': 'linear', 'gamma': 'scale'}
Average Accuracy for {'kernel': 'linear', 'gamma': 'scale'}: 0.8361370716510903
Running K-Fold Cross Validation for SVM with {'kernel': 'rbf', 'gamma': 0.01}
Average Accuracy for {'kernel': 'rbf', 'gamma': 0.01}: 0.8299065420560747
Running K-Fold Cross Validation for SVM with {'kernel': 'rbf', 'gamma': 0.001}
Average Accuracy for {'kernel': 'rbf', 'gamma': 0.001}: 0.7538940809968847
