## SVC

In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

# Load data
chd = pd.read_csv("data/CDC_python_clean.csv")

# Drop unnecessary columns
columns_to_drop = ['fips', 'CHD', 'county', 'UrbanRural']
chd = chd.drop(columns=columns_to_drop)

# Define features and target
X = chd.drop(columns=['CHD_Class'])
y = chd['CHD_Class']

# One-hot encode categorical variables
char_cols = X.select_dtypes(include=['object']).columns
X = pd.get_dummies(X, columns=char_cols, drop_first=True)

# Scale numerical variables
numeric_columns = X.select_dtypes(include=['float64', 'int64']).columns
scaler = StandardScaler()
X[numeric_columns] = scaler.fit_transform(X[numeric_columns])

# Split data into train and test sets
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=42)

# Define the SVC classifier
svc = SVC(kernel='linear')

# Define the parameter grid for the GridSearchCV
param_grid = {
    'C': [0.1, 1],  # You can extend this range if you have more computational resources
}

# Define the GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(
    estimator=svc,
    param_grid=param_grid,
    cv=5,        # Number of cross-validation folds
    n_jobs=-1,   # Use all available CPU cores for parallel processing
    verbose=True
)

# Fit the GridSearchCV to the training data
grid_search.fit(Xtrain, ytrain)

# Get the best SVC classifier
best_svc = grid_search.best_estimator_

# Predict the test set results
svc_pred = best_svc.predict(Xtest)

# Compute the accuracy of the SVC classifier
svc_accuracy = accuracy_score(ytest, svc_pred)

svc_accuracy


Fitting 5 folds for each of 2 candidates, totalling 10 fits


0.9210191082802548