In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score

file_path = './data.csv'
data = pd.read_csv(file_path)

label_encoder = LabelEncoder()
data['diagnosis'] = label_encoder.fit_transform(data['diagnosis'])

X = data[['radius_mean', 'texture_mean']]
y = data['diagnosis']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
roc_auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])

def predecir_probabilidad_tumor(radius_mean, texture_mean):
    valores = pd.DataFrame([[radius_mean, texture_mean]], columns=['radius_mean', 'texture_mean'])   
    probabilidades = model.predict_proba(valores)   
    
    return f'Probabilidad de que el tumor sea BENIGNO: {probabilidades[0][0]*100:.3f}%\nProbabilidades de que el tumor sea MALIGNO: {probabilidades[0][1]*100:.3f}%'


In [None]:
data = pd.read_csv(file_path)
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)  
plt.hist(data[data['diagnosis'] == 'M']['radius_mean'], bins=20, alpha=0.5, label='Maligno')
plt.hist(data[data['diagnosis'] == 'B']['radius_mean'], bins=20, alpha=0.5, label='Benigno')
plt.legend()
plt.title('Distribución del Radio Medio por Tipo de Diagnóstico')
plt.xlabel('Radio Medio')
plt.ylabel('Frecuencia')

plt.subplot(1, 2, 2)  
plt.hist(data[data['diagnosis'] == 'M']['texture_mean'], bins=20, alpha=0.5, label='Maligno')
plt.hist(data[data['diagnosis'] == 'B']['texture_mean'], bins=20, alpha=0.5, label='Benigno')
plt.legend()
plt.title('Distribución de la Textura Media por Tipo de Diagnóstico')
plt.xlabel('Textura Media')

plt.tight_layout()

plt.show()


In [None]:
print(predecir_probabilidad_tumor(11.5,16.1))