In [53]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
# Replace 'your_data.csv' with the actual path to your dataset
df = pd.read_csv('breast-cancer.csv')

# Select the relevant features
features = ['radius_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean', 'concavity_mean', 'symmetry_mean']
X = df[features]

# Target variable (diagnosis: M for malignant, B for benign)
y = df['diagnosis'].apply(lambda x: 1 if x == 'M' else 0)  # Convert 'M' to 1 and 'B' to 0 for classification

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data (important for SVM)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create and train the SVM model
svm_model = SVC(kernel='linear')  # Linear kernel (you can try other kernels like 'rbf' if needed)
svm_model.fit(X_train, y_train)

# Make predictions
y_pred = svm_model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.956140350877193
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.97      0.97        71
           1       0.95      0.93      0.94        43

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114



In [54]:
def predict_new_data(new_data):
    # Standardize the new data (use the same scaler as before)
    new_data_scaled = scaler.transform(new_data)
    
    # Predict using the trained SVM model
    prediction = svm_model.predict(new_data_scaled)
    
    # Return the predicted class (0 = Benign, 1 = Malignant)
    return 'Malignant' if prediction == 1 else 'Benign'

# Example: Test with a new sample of data
new_data = pd.DataFrame({
    'radius_mean': [15.4], 
    'perimeter_mean': [85.0], 
    'area_mean': [530.0], 
    'smoothness_mean': [0.097], 
    'concavity_mean': [0.057], 
    'symmetry_mean': [0.182]
})

# Call the function to get the prediction
result = predict_new_data(new_data)
print("Predicted Class for the given sample:", result)

Predicted Class for the given sample: Benign


In [55]:
import joblib

# Save the trained model and scaler to a .pkl file
joblib.dump(svm_model, 'svm_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

print("Model and Scaler saved successfully!")


Model and Scaler saved successfully!


In [56]:
# Load the saved model and scaler
svm_model = joblib.load('svm_model.pkl')
scaler = joblib.load('scaler.pkl')

# Test the loaded model
new_data = pd.DataFrame({
    'radius_mean': [15.4], 
    'perimeter_mean': [85.0], 
    'area_mean': [530.0], 
    'smoothness_mean': [0.097], 
    'concavity_mean': [0.057], 
    'symmetry_mean': [0.182]
})

result = predict_new_data(new_data)
print("Predicted Class for the given sample:", result)


Predicted Class for the given sample: Benign
