In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Preprocessing Function
def preprocess_data(X, categorical_features=[], numerical_features=[]):
    """
    Preprocesses the dataset by imputing missing values, encoding categorical features,
    and scaling numerical features. Returns a preprocessed pipeline.
    """
    # Handling numerical features (scaling and imputing)
    numerical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),  # Fill missing numerical data with mean
        ('scaler', StandardScaler())  # Scale numerical features
    ])

    # Handling categorical features (imputing and one-hot encoding)
    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),  # Fill missing categorical data with most frequent
        ('onehot', OneHotEncoder(handle_unknown='ignore'))  # One-hot encode categorical features
    ])

    # Combining both transformers
    preprocessor = ColumnTransformer(transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

    return preprocessor

# Dataset Preparation Function
def prepare_data(X, y, test_size=0.2, random_state=42, categorical_features=[], numerical_features=[]):
    """
    Prepares the dataset by splitting, preprocessing, and scaling.
    Returns preprocessed train/test splits.
    """
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    preprocessor = preprocess_data(X, categorical_features, numerical_features)

    X_train = preprocessor.fit_transform(X_train)
    X_test = preprocessor.transform(X_test)

    return X_train, X_test, y_train, y_test

# Support Vector Machine Class
class SVMModel:
    def __init__(self, kernel='linear'):
        self.model = SVC(kernel=kernel)

    def train(self, X_train, y_train):
        """
        Trains the SVM model on the training data.
        """
        self.model.fit(X_train, y_train)

    def predict(self, X_test):
        """
        Makes predictions using the trained SVM model.
        """
        return self.model.predict(X_test)

    def evaluate(self, y_test, y_pred):
        """
        Evaluates the model using classification metrics.
        """
        report = classification_report(y_test, y_pred)
        cm = confusion_matrix(y_test, y_pred)
        return report, cm

    def plot_confusion_matrix(self, cm):
        """
        Plots a confusion matrix for visual interpretation.
        """
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.title('Confusion Matrix')
        plt.show()

# Usage Example Function
def run_svm_model(X, y, categorical_features=[], numerical_features=[], kernel='linear'):
    """
    Prepares data, trains the SVM model, evaluates and plots predictions.
    """
    X_train, X_test, y_train, y_test = prepare_data(X, y, categorical_features=categorical_features, numerical_features=numerical_features)

    svm_model = SVMModel(kernel=kernel)
    svm_model.train(X_train, y_train)

    y_pred = svm_model.predict(X_test)

    report, cm = svm_model.evaluate(y_test, y_pred)
    print("Classification Report:\n", report)

    svm_model.plot_confusion_matrix(cm)

# Sample usage with custom dataset
# df = pd.read_csv('your_dataset.csv')  # Replace this with your actual dataset
# X = df.drop('target_column', axis=1)  # Replace 'target_column' with the actual target column name
# y = df['target_column']
# categorical_features = ['col1', 'col2']  # Replace with your actual categorical columns
# numerical_features = ['col3', 'col4']    # Replace with your actual numerical columns
# run_svm_model(X, y, categorical_features, numerical_features, kernel='linear')  # Or another kernel type
