**Import necessary libraries**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

**Load the dataset**

In [2]:
df = pd.read_csv("https://github.com/YBIFoundation/Dataset/raw/main/Cancer.csv")

**Display Dataframe**

In [3]:
print(df.head())

         id diagnosis  radius_mean  texture_mean  perimeter_mean  area_mean  \
0    842302         M        17.99         10.38          122.80     1001.0   
1    842517         M        20.57         17.77          132.90     1326.0   
2  84300903         M        19.69         21.25          130.00     1203.0   
3  84348301         M        11.42         20.38           77.58      386.1   
4  84358402         M        20.29         14.34          135.10     1297.0   

   smoothness_mean  compactness_mean  concavity_mean  concave points_mean  \
0          0.11840           0.27760          0.3001              0.14710   
1          0.08474           0.07864          0.0869              0.07017   
2          0.10960           0.15990          0.1974              0.12790   
3          0.14250           0.28390          0.2414              0.10520   
4          0.10030           0.13280          0.1980              0.10430   

   ...  texture_worst  perimeter_worst  area_worst  smoothness

In [4]:
df['diagnosis'] = df['diagnosis'].replace({'M': 0, 'B': 1}).infer_objects(copy=False)

In [5]:
print(df['diagnosis'].unique())

[0 1]


**Split the data into features (X) and target (y)**

In [6]:
X = df.drop(columns=['id', 'diagnosis', 'Unnamed: 32'])

In [7]:
y = df['diagnosis']

**Split the dataset into training and testing sets**

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Initialize the logistic regression model**

In [9]:
model = LogisticRegression(max_iter=10000)

**Train the model**

In [10]:
model.fit(X_train, y_train)

In [11]:
y_pred = model.predict(X_test)

**Evaluate the model**

In [12]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

**Print evaluation metrics**

In [13]:
print(f"Accuracy: {accuracy}")

Accuracy: 0.956140350877193


In [14]:
print("Confusion Matrix:")

Confusion Matrix:


In [15]:
print(conf_matrix)

[[39  4]
 [ 1 70]]


In [16]:
print("Classification Report:")

Classification Report:


In [17]:
print(class_report)

              precision    recall  f1-score   support

           0       0.97      0.91      0.94        43
           1       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

