Load and Explore the Dataset

In [11]:
from sklearn.datasets import load_breast_cancer
import pandas as pd

# Load the dataset
data = load_breast_cancer()

# Convert to DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Display the first few rows of the DataFrame
print(df.head())


   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst texture  worst perimeter  worst area  \
0             

Data Exploration

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Display correlation heatmap
plt.figure(figsize=(15, 10))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()

# Pairplot for some features
sns.pairplot(df, vars=['mean radius', 'mean texture', 'mean perimeter', 'mean area'], hue='target')
plt.show()

Define Features and Target Variable

In [None]:
X = df[['mean radius', 'mean texture']]  # Features: mean radius and mean texture
y = df['target']  # Target: benign (0) or malignant (1)

 Split the Data into Training and Test Sets

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Train the Logistic Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression

# Initialize and fit the model
model = LogisticRegression()
model.fit(X_train, y_train)

# Output the coefficients
print(f"Intercept: {model.intercept_[0]}")
print(f"Coefficients: {model.coef_}")


Make Predictions

In [None]:
# Predict on the test set
y_pred = model.predict(X_test)

# Predict probabilities
y_pred_prob = model.predict_proba(X_test)[:, 1]  # Probability of class 1 (malignant)


Evaluate the Model

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score, roc_curve

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix:\n{conf_matrix}")

# Classification report
class_report = classification_report(y_test, y_pred)
print(f"Classification Report:\n{class_report}")

# ROC-AUC score
roc_auc = roc_auc_score(y_test, y_pred_prob)
print(f"ROC-AUC Score: {roc_auc:.2f}")

# ROC curve
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
plt.figure()
plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='grey', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()


In [12]:
# Step 1: Import necessary libraries
from sklearn.datasets import load_breast_cancer
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# Load the dataset
data = load_breast_cancer()

# Convert to DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Step 2: Define Features and Target Variable
X = df[['mean radius', 'mean texture']]  # Using 'mean radius' and 'mean texture' as features
y = df['target']  # Target: benign (0) or malignant (1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 3: Predict Cancer for a New Patient
def predict_cancer(mean_radius, mean_texture):
    # Create a DataFrame for the new patient's data
    new_patient = pd.DataFrame([[mean_radius, mean_texture]], columns=['mean radius', 'mean texture'])
    
    # Make the prediction (0 = benign, 1 = malignant)
    prediction = model.predict(new_patient)[0]
    
    # Get the predicted probability for class 1 (malignant)
    probability = model.predict_proba(new_patient)[0][1]
    
    # Interpret the result
    diagnosis = "Malignant" if prediction == 1 else "Benign"
    return diagnosis, probability

# Example: Predict for a new patient
mean_radius = 14.0  # Replace with actual values
mean_texture = 10.0  # Replace with actual values
diagnosis, probability = predict_cancer(mean_radius, mean_texture)

print(f"Diagnosis: {diagnosis}")
print(f"Probability of being malignant: {probability:.2f}")


Diagnosis: Malignant
Probability of being malignant: 0.95
