In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC, SVR  # Import both SVC and SVR
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold

# Function to evaluate SVM classifier model (for classification)
def evaluate_svm_model(X, y, description):
    if len(np.unique(y)) < 2:
        print(f"Skipping {description} because only one class is present.")
        return
    svm_model = SVC(kernel='linear', random_state=42)
    kf = KFold(n_splits=10, shuffle=True, random_state=42)
    cv_scores = cross_val_score(svm_model, X, y, cv=kf, scoring='accuracy')
    print(f'{description} - Mean Accuracy: {np.mean(cv_scores):.4f}, Std Dev: {np.std(cv_scores):.4f}')
    print()

# Function to evaluate SVR model (for regression)
def evaluate_svr_model(X, y, description):
    svr_model = SVR(kernel='linear')
    kf = KFold(n_splits=10, shuffle=True, random_state=42)
    cv_scores = cross_val_score(svr_model, X, y, cv=kf, scoring='neg_mean_squared_error')
    print(f'{description} - Mean MSE: {-np.mean(cv_scores):.4f}, Std Dev: {np.std(cv_scores):.4f}')
    print()

# Function to apply PCA and evaluate models
def pca_model(X, y, description, n=None):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    pca = PCA(n_components=n)
    X_pca = pca.fit_transform(X_scaled)
    if y.dtype == 'int' or y.dtype == 'object':
        evaluate_svm_model(X_pca, y, description)
    else:
        evaluate_svr_model(X_pca, y, description)

# Loading Abalone dataset
print("For Abalone dataset\n")
abalone_data = pd.read_csv('abalone.CSV')

# Encoding 'Sex' column
le = LabelEncoder()
abalone_data['Sex'] = le.fit_transform(abalone_data['Sex'])

# Features and target for regression
X1 = abalone_data.drop('Rings', axis=1)
y1 = abalone_data['Rings']

# Approach 1: Regression using SVR
print("Regression Approach (SVR)\n")
evaluate_svr_model(X1, y1, "Original Data (SVR)")
pca_model(X1, y1, "PCA Transformed Data (All Components - SVR)")
pca_model(X1, y1, "PCA Transformed Data (4 Components - SVR)", 4)

# Approach 2: Classification using binned 'Rings'
print("Classification Approach (Binned Rings using SVC)\n")

# Adjust the bins to ensure more even distribution and avoid single-class issues
y1_binned = pd.cut(y1, bins=[0, 6, 10, 15, 25], labels=[0, 1, 2, 3])  # Updated binning

# Ensure there are at least two classes in all splits before running the SVC model
if len(np.unique(y1_binned)) > 1:
    evaluate_svm_model(X1, y1_binned, "Original Data (Binned Rings - SVC)")
    pca_model(X1, y1_binned, "PCA Transformed Data (All Components - SVC)")
    pca_model(X1, y1_binned, "PCA Transformed Data (4 Components - SVC)", 4)
else:
    print("Not enough classes after binning to run classification.")

# Loading Breast Cancer dataset
print("For Breast Cancer dataset\n")
from sklearn.datasets import load_breast_cancer
cancer_data = load_breast_cancer()

X2 = pd.DataFrame(cancer_data.data, columns=cancer_data.feature_names)
y2 = pd.Series(cancer_data.target)

# Apply SVM on original data and PCA-transformed data
evaluate_svm_model(X2, y2, "Original Data (SVC)")
pca_model(X2, y2, "PCA Transformed Data (All Components - SVC)")
pca_model(X2, y2, "PCA Transformed Data (20 Components - SVC)", 20)
pca_model(X2, y2, "PCA Transformed Data (10 Components - SVC)", 10)


For Abalone dataset

Regression Approach (SVR)

Original Data (SVR) - Mean MSE: 0.0015, Std Dev: 0.0001

PCA Transformed Data (All Components - SVR) - Mean MSE: 0.0013, Std Dev: 0.0001

PCA Transformed Data (4 Components - SVR) - Mean MSE: 0.0022, Std Dev: 0.0001

Classification Approach (Binned Rings using SVC)

Not enough classes after binning to run classification.
For Breast Cancer dataset

Original Data (SVC) - Mean Accuracy: 0.9577, Std Dev: 0.0255

PCA Transformed Data (All Components - SVC) - Mean Accuracy: 0.9701, Std Dev: 0.0176

PCA Transformed Data (20 Components - SVC) - Mean Accuracy: 0.9719, Std Dev: 0.0141

PCA Transformed Data (10 Components - SVC) - Mean Accuracy: 0.9737, Std Dev: 0.0180

