In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, accuracy_score

# Load the breast cancer dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing: Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Support Vector Regression (SVR) with different kernels
kernels = ['linear', 'poly', 'rbf', 'sigmoid']

for kernel in kernels:
    svr = SVR(kernel=kernel)
    parameters = {'C': [0.1, 1, 10, 100], 'gamma': [0.1, 0.01, 0.001, 0.0001]}
    grid_search = GridSearchCV(estimator=svr, param_grid=parameters, scoring='neg_mean_squared_error', cv=5)
    grid_search.fit(X_train_scaled, y_train)
    best_svr = grid_search.best_estimator_
    
    # Predictions
    y_pred_train = best_svr.predict(X_train_scaled)
    y_pred_test = best_svr.predict(X_test_scaled)
    
    # Evaluate SVR
    mse_train = mean_squared_error(y_train, y_pred_train)
    mse_test = mean_squared_error(y_test, y_pred_test)
    
    print(f"\nKernel: {kernel}")
    print("Best SVR Model:", best_svr)
    print("SVR MSE on training set:", mse_train)
    print("SVR MSE on testing set:", mse_test)

# Linear Regression
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)

# Predictions
lr_pred_train = lr.predict(X_train_scaled)
lr_pred_test = lr.predict(X_test_scaled)

# Convert predictions to binary (0 or 1) for classification
lr_pred_train_binary = np.where(lr_pred_train >= 0.5, 1, 0)
lr_pred_test_binary = np.where(lr_pred_test >= 0.5, 1, 0)

# Evaluate Linear Regression
lr_accuracy_train = accuracy_score(y_train, lr_pred_train_binary)
lr_accuracy_test = accuracy_score(y_test, lr_pred_test_binary)

print("\nLinear Regression Model:")
print("Linear Regression Accuracy on training set:", lr_accuracy_train)
print("Linear Regression Accuracy on testing set:", lr_accuracy_test)



Kernel: linear
Best SVR Model: SVR(C=0.1, gamma=0.1, kernel='linear')
SVR MSE on training set: 0.05563342481323135
SVR MSE on testing set: 0.06452031971671605

Kernel: poly
Best SVR Model: SVR(C=0.1, gamma=0.01, kernel='poly')
SVR MSE on training set: 0.2214765097370369
SVR MSE on testing set: 0.21945829820625198

Kernel: rbf
Best SVR Model: SVR(C=10, gamma=0.01)
SVR MSE on training set: 0.02006363547363434
SVR MSE on testing set: 0.03750152976576865

Kernel: sigmoid
Best SVR Model: SVR(C=100, gamma=0.0001, kernel='sigmoid')
SVR MSE on training set: 0.05987994351916845
SVR MSE on testing set: 0.06077448843873148

Linear Regression Model:
Linear Regression Accuracy on training set: 0.9626373626373627
Linear Regression Accuracy on testing set: 0.956140350877193
