In [76]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb

dataframe = pd.read_csv("DSCancerDeMamaSinDatosPerdidos.csv",sep=';')
dataframe.head(10)

Unnamed: 0,BIRADS,Age,Shape,Margin,Density,Severity
0,5,67,3,5,3,1
1,5,58,4,5,3,1
2,4,28,1,1,3,0
3,5,57,1,5,3,1
4,5,76,1,4,3,1
5,3,42,2,1,3,1
6,4,36,3,1,2,0
7,4,60,2,1,2,0
8,4,54,1,1,3,0
9,3,52,3,4,3,0


In [78]:
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
import pickle

In [80]:
df_feat = dataframe.drop(columns=["Severity"])

df_target = dataframe["Severity"]

print("Feature Variables:") 
print(df_feat.info())


Feature Variables:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 820 entries, 0 to 819
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   BIRADS   820 non-null    int64
 1   Age      820 non-null    int64
 2   Shape    820 non-null    int64
 3   Margin   820 non-null    int64
 4   Density  820 non-null    int64
dtypes: int64(5)
memory usage: 32.2 KB
None


In [82]:
df_feat = dataframe.drop(columns=["Severity"])
df_target = dataframe["Severity"]

scaler = StandardScaler()
df_feat_scaled = scaler.fit_transform(df_feat)

X_train, X_test, y_train, y_test = train_test_split(
    df_feat_scaled, np.ravel(df_target),
    test_size=0.30, random_state=101
)

param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.1, 0.01],
    'kernel': ['rbf', 'linear']
}

svm = SVC(probability=True)
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

print("Mejores parámetros:", grid_search.best_params_)
print("Mejor precisión en validación cruzada:", grid_search.best_score_)

Mejores parámetros: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
Mejor precisión en validación cruzada: 0.836186117467582


In [83]:
best_model = grid_search.best_estimator_
predictions = best_model.predict(X_test)
print("Reporte de clasificación:")
print(classification_report(y_test, predictions))

scores = cross_val_score(best_model, df_feat_scaled, np.ravel(df_target), cv=5)
print("Precisión promedio con validación cruzada:", scores.mean())
print("Desviación estándar:", scores.std())

Reporte de clasificación:
              precision    recall  f1-score   support

           0       0.81      0.92      0.86       131
           1       0.90      0.75      0.82       115

    accuracy                           0.84       246
   macro avg       0.85      0.84      0.84       246
weighted avg       0.85      0.84      0.84       246

Precisión promedio con validación cruzada: 0.8353658536585366
Desviación estándar: 0.02643839437643755


In [87]:
with open('svm_model_optimized.pkl', 'wb') as f:
    pickle.dump(best_model, f)

sample_data = pd.DataFrame([[4, 65, 3, 5, 3]], columns=['BIRADS', 'Age', 'Shape', 'Margin', 'Density'])
sample_data_scaled = scaler.transform(sample_data)
print("Predicción para nuevos datos:", best_model.predict(sample_data_scaled))

Predicción para nuevos datos: [0]
