In [1]:
# Importamos las librerias a utilizar

import sys
import pandas as pd
import numpy as np
import joblib

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,

)

from PyQt5.QtWidgets import (
    QApplication,
    QWidget,
    QLabel,
    QLineEdit,
    QPushButton,
    QVBoxLayout,
    QGridLayout,
    QMessageBox,
)

In [2]:
# Cargamos el dataset de cancer de mama
cancer = load_breast_cancer()

# Creamos el dataframe
df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
df["target"] = cancer.target  # 0 es maligno, 1 benigno

# revisamos el dataset
print("Tamaño del dataset:", df.shape)
print("\nPrimeras filas del dataset:")
print(df.head())
print("\nDistribución de las clases:")
print(df["target"].value_counts())

Tamaño del dataset: (569, 31)

Primeras filas del dataset:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  w

In [3]:
X = df.drop("target", axis=1) # caracteristicas
y = df["target"] # objetivo

# dividimos el dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# visualizamos el tamaño de los conjuntos
print("Tamaño del conjunto de entrenamiento:", X_train.shape)
print("Tamaño del conjunto de prueba:", X_test.shape)

Tamaño del conjunto de entrenamiento: (398, 30)
Tamaño del conjunto de prueba: (171, 30)


In [4]:
# Normalizamos los datos

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Verificamos la normalizacion

print("Primeras 5 filas de las características de entrenamiento normalizadas:")
print(X_train_scaled[:5])

Primeras 5 filas de las características de entrenamiento normalizadas:
[[-0.12348985 -0.29680142 -0.17050713 -0.20861569 -1.2016799  -0.7731696
  -0.76231194 -0.93324109 -1.22994935 -0.94816603 -0.53359339 -0.86028757
  -0.61678096 -0.39177533 -1.35556152 -0.52503193 -0.4817033  -0.97940018
  -0.88459317 -0.68548672 -0.19761978 -0.5067476  -0.30791001 -0.27357592
  -1.50742388 -0.44926047 -0.57223884 -0.84082156 -0.8563616  -0.76574773]
 [-0.22826757 -0.65795149 -0.25377521 -0.2965028  -1.80463697 -0.58761605
  -0.09198533 -0.54268359 -1.41998468 -0.61249143 -0.83040055 -0.12266723
  -0.78254381 -0.53126109 -0.36490698  0.40861926  0.57668457 -0.2482875
  -1.03572382  0.10768859 -0.42291745 -0.45849468 -0.4652873  -0.43812681
  -1.27301714  0.02704209  0.31804488 -0.37706655 -1.3415819  -0.41480748]
 [ 0.14553402 -1.23056444  0.24583328 -0.01024193  0.5191843   1.57000613
   0.73231958  0.38658307  1.05420084  1.57422827  0.48747836  0.59258929
   0.90918448  0.18132474  0.93956737  1.

In [5]:
# Creamos el modelo de regresion logistica

model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

print("Modelo de Regresión Logística entrenado.")

Modelo de Regresión Logística entrenado.


In [6]:
# predecimos

y_pred = model.predict(X_test_scaled) 

# Metricas

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Mostramos las metricas
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print("\nMatriz de Confusión:")
print(conf_matrix)

# Guardamos el modelo y el scaler
joblib.dump(model, "modelo_cancer.joblib")
joblib.dump(scaler, "scaler_cancer.joblib")
print("\nModelo y scaler guardados.")

Accuracy: 0.9825
Precision: 0.9907
Recall: 0.9815
F1-Score: 0.9860

Matriz de Confusión:
[[ 62   1]
 [  2 106]]

Modelo y scaler guardados.


In [None]:
class CancerPredictor(QWidget):

    def __init__(self):
        super().__init__()
        self.model = joblib.load("modelo_cancer.joblib")
        self.scaler = joblib.load("scaler_cancer.joblib")
        self.feature_names_en = joblib.load("feature_names_cancer.joblib")
        self.feature_names_es = self.traducir_nombres(self.feature_names_en)
        self.inputs = {} 
        self.result_label = None
        self.accuracy = None
        self.precision = None
        self.recall = None
        self.f1 = None
        self.conf_matrix = None
        self.initUI()
        self.evaluate_model()

    def traducir_nombres(self, nombres_en):
        traducciones = {
            "mean radius": "Radio promedio",
            "mean texture": "Textura promedio",
            "mean perimeter": "Perímetro promedio",
            "mean area": "Área promedio",
            "mean smoothness": "Suavidad promedio",
            "mean compactness": "Compacidad promedio",
            "mean concavity": "Concavidad promedio",
            "mean concave points": "Puntos cóncavos promedio",
            "mean symmetry": "Simetría promedio",
            "mean fractal dimension": "Dimensión fractal promedio",
            "radius error": "Error de radio",
            "texture error": "Error de textura",
            "perimeter error": "Error de perímetro",
            "area error": "Error de área",
            "smoothness error": "Error de suavidad",
            "compactness error": "Error de compacidad",
            "concavity error": "Error de concavidad",
            "concave points error": "Error de puntos cóncavos",
            "symmetry error": "Error de simetría",
            "fractal dimension error": "Error de dimensión fractal",
            "worst radius": "Peor radio",
            "worst texture": "Peor textura",
            "worst perimeter": "Peor perímetro",
            "worst area": "Peor área",
            "worst smoothness": "Peor suavidad",
            "worst compactness": "Peor compacidad",
            "worst concavity": "Peor concavidad",
            "worst concave points": "Peores puntos cóncavos",
            "worst symmetry": "Peor simetría",
            "worst fractal dimension": "Peor dimensión fractal",
        }
        nombres_es = [traducciones.get(nombre, nombre) for nombre in nombres_en]
        return nombres_es

    def evaluate_model(self):
        cancer = load_breast_cancer()
        X = pd.DataFrame(cancer.data, columns=cancer.feature_names)
        y = cancer.target
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.3, random_state=42
        )
        X_test_scaled = self.scaler.transform(X_test)
        y_pred = self.model.predict(X_test_scaled)
        self.accuracy = accuracy_score(y_test, y_pred)
        self.precision = precision_score(y_test, y_pred)
        self.recall = recall_score(y_test, y_pred)
        self.f1 = f1_score(y_test, y_pred)
        self.conf_matrix = confusion_matrix(y_test, y_pred)

    def initUI(self):
        self.setWindowTitle("Predictor de Cáncer de Mama")

        main_layout = QVBoxLayout()
        grid_layout = QGridLayout()

        for i, feature in enumerate(self.feature_names_es):
            label = QLabel(feature)
            line_edit = QLineEdit()
            self.inputs[self.feature_names_en[i]] = line_edit
            row = i // 2
            col = i % 2
            grid_layout.addWidget(label, row, col * 2)
            grid_layout.addWidget(line_edit, row, col * 2 + 1)

        main_layout.addLayout(grid_layout)

        diagnose_btn = QPushButton("Diagnosticar")
        diagnose_btn.clicked.connect(self.predict)

        clear_btn = QPushButton("Borrar Campos")
        clear_btn.clicked.connect(self.clear_fields)

        instructions_btn = QPushButton("Instrucciones")
        instructions_btn.clicked.connect(self.show_instructions)

        metrics_btn = QPushButton("Mostrar Métricas")
        metrics_btn.clicked.connect(self.show_metrics)

        exit_btn = QPushButton("Salir")
        exit_btn.clicked.connect(self.close)

        self.result_label = QLabel("Resultado: ")

        main_layout.addWidget(diagnose_btn)
        main_layout.addWidget(clear_btn)
        main_layout.addWidget(instructions_btn)
        main_layout.addWidget(metrics_btn)
        main_layout.addWidget(self.result_label)
        main_layout.addWidget(exit_btn)

        self.setLayout(main_layout)
        self.show()

    def show_metrics(self):
        if self.accuracy is not None:
            message = (
                f"Accuracy: {self.accuracy:.4f}\n"
                f"Precision: {self.precision:.4f}\n"
                f"Recall: {self.recall:.4f}\n"
                f"F1-Score: {self.f1:.4f}\n\n"
                f"Matriz de Confusión:\n{self.conf_matrix}"
            )
            QMessageBox.information(self, "Métricas del Modelo", message)
        else:
            QMessageBox.warning(
                self, "Métricas del Modelo", "Las métricas no están disponibles."
            )

    def show_instructions(self):
        message = (
            "Ingrese los valores de las 30 características del tumor y haga clic en 'Diagnosticar'.\n"
            "El resultado mostrará si la predicción es Maligna (0) o Benigna (1)."
        )
        QMessageBox.information(self, "Instrucciones", message)

    def clear_fields(self):
        for line_edit in self.inputs.values():
            line_edit.clear()
        self.result_label.setText("Resultado: ")

    def predict(self):
        features = []
        try:
            for feature_en in self.feature_names_en:
                text = self.inputs[feature_en].text()
                if not text:
                    raise ValueError("Por favor, complete todos los campos.")
                features.append(float(text))

            scaled_features = self.scaler.transform(np.array(features).reshape(1, -1))
            prediction = self.model.predict(scaled_features)[0]

            result = "Benigno" if prediction == 1 else "Maligno"
            self.result_label.setText(f"Resultado: {result} ({prediction})")

        except ValueError as e:
            self.result_label.setText(f"Resultado: Error - {e}")
        except Exception as e:
            self.result_label.setText(f"Resultado: Error inesperado - {e}")


app = QApplication(sys.argv)

# Guardar los nombres de las características (solo se necesita ejecutar una vez si no existe el archivo)
try:
    joblib.load("feature_names_cancer.joblib")
except FileNotFoundError:
    cancer = load_breast_cancer()
    joblib.dump(cancer.feature_names, "feature_names_cancer.joblib")

window = CancerPredictor()
sys.exit(app.exec_())