In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.feature_selection import RFE

# Load the dataset
data = pd.read_csv('data.csv')

# Data Preprocessing
# Handle missing values
data = data.dropna()

# Encode target variable
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})

# Split features and target
X = data.drop(['diagnosis', 'id'], axis=1)
y = data['diagnosis']

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Feature Selection
# Create the RFE object and rank each feature
svc = SVC(kernel="linear")
rfe = RFE(estimator=svc, n_features_to_select=10, step=1)  # You can adjust the number of features
rfe.fit(X_scaled, y)

# Get the selected features
X_selected = rfe.transform(X_scaled)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Machine Learning Model (SVM)
# Create and train the SVM model
svm_model = SVC()
svm_model.fit(X_train, y_train)

# Evaluate the model
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(cm)


Accuracy: 0.9649122807017544
Confusion Matrix:
[[69  2]
 [ 2 41]]
