In [11]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score, f1_score,precision_score, recall_score

In [2]:
# Load the breast cancer dataset from scikit-learn
breast_cancer_data = load_breast_cancer(as_frame=True)

# Create DataFrame
df = breast_cancer_data.frame

# Display shape and description of the DataFrame
print("DataFrame shape:", df.shape)
print("\nDataFrame description:\n", df.describe())
print("\nTarget names:", breast_cancer_data.target_names)
print("\nFeature names:\n", breast_cancer_data.feature_names)


DataFrame shape: (569, 31)

DataFrame description:
        mean radius  mean texture  mean perimeter    mean area  \
count   569.000000    569.000000      569.000000   569.000000   
mean     14.127292     19.289649       91.969033   654.889104   
std       3.524049      4.301036       24.298981   351.914129   
min       6.981000      9.710000       43.790000   143.500000   
25%      11.700000     16.170000       75.170000   420.300000   
50%      13.370000     18.840000       86.240000   551.100000   
75%      15.780000     21.800000      104.100000   782.700000   
max      28.110000     39.280000      188.500000  2501.000000   

       mean smoothness  mean compactness  mean concavity  mean concave points  \
count       569.000000        569.000000      569.000000           569.000000   
mean          0.096360          0.104341        0.088799             0.048919   
std           0.014064          0.052813        0.079720             0.038803   
min           0.052630          0.0193

In [3]:
# Prepare the data
# The target is already a column in the DataFrame since we used `as_frame=True`
features = breast_cancer_data.feature_names.tolist()
target = 'target'
X = df[features]
y = df[target]

In [4]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("\nTest set shape:", X_test.shape)


Test set shape: (114, 30)


In [5]:
# Create and fit the Logistic Regression model
model = LogisticRegression(max_iter=5000) # Increased max_iter for convergence
model.fit(X_train, y_train)

In [6]:
# Predictions
y_pred = model.predict(X_test)
print("\nPredicted values:", y_pred)

# Predicted probabilities
y_pred_proba = model.predict_proba(X_test)
print("\nPredicted probabilities (first 5 samples):\n", y_pred_proba[:5])



Predicted values: [1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0
 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 0 0]

Predicted probabilities (first 5 samples):
 [[1.21154339e-01 8.78845661e-01]
 [9.99999969e-01 3.12295450e-08]
 [9.98394230e-01 1.60576954e-03]
 [1.20817362e-03 9.98791826e-01]
 [1.39651628e-04 9.99860348e-01]]


In [12]:
# Model evaluation
print("\nAccuracy:", model.score(X_test, y_test))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=breast_cancer_data.target_names))
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("Precision Score:", precision_score(y_test, y_pred))
print("Recall Score:", recall_score(y_test, y_pred))



Accuracy: 0.956140350877193

Confusion Matrix:
 [[39  4]
 [ 1 70]]

Classification Report:
               precision    recall  f1-score   support

   malignant       0.97      0.91      0.94        43
      benign       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

Accuracy Score: 0.956140350877193
F1 Score: 0.9655172413793104
Precision Score: 0.9459459459459459
Recall Score: 0.9859154929577465


In [13]:
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("Precision Score:", precision_score(y_test, y_pred))
print("Recall Score:", recall_score(y_test, y_pred))

Accuracy Score: 0.956140350877193
F1 Score: 0.9655172413793104
Precision Score: 0.9459459459459459
Recall Score: 0.9859154929577465
