# 🩺 Breast Cancer Detection: Data Science & ML Notebook
A modern, clear, and interactive notebook for breast cancer prediction.

## 1. Import Libraries
Let's start by importing the essential libraries.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

## 2. Load & Preview Data
Load the breast cancer dataset and preview its structure.

In [None]:
df = pd.read_csv('breast cancer.csv')
df.head()

## 3. Data Cleaning & Preprocessing
Remove unnecessary columns and check for missing values.

In [None]:
df.drop(['Unnamed: 32', 'id'], axis=1, inplace=True, errors='ignore')
df.isnull().sum()

## 4. Exploratory Data Analysis
Visualize diagnosis distribution and feature correlations.

In [None]:
sns.countplot(x='diagnosis', data=df)
plt.title('Diagnosis Distribution')
plt.show()

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(), cmap='coolwarm', annot=False)
plt.title('Feature Correlation Heatmap')
plt.show()

## 5. Prepare Data for Modeling
Encode labels and split data.

In [None]:
df['diagnosis'] = df['diagnosis'].map({'M':1, 'B':0})
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 6. Train & Evaluate Model
Fit a Random Forest and evaluate performance.

In [None]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Confusion Matrix:
', confusion_matrix(y_test, y_pred))
print('Classification Report:
', classification_report(y_test, y_pred))

## 7. Feature Importance
Visualize the most important features.

In [None]:
feat_importances = pd.Series(model.feature_importances_, index=X.columns)
feat_importances.nlargest(10).plot(kind='barh')
plt.title('Top 10 Feature Importances')
plt.show()

## 8. Save Model
Export the trained model for use in the web app.

In [None]:
import pickle
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

---
### Notebook by GitHub Copilot | 2025