# 🩺 Breast Cancer Prediction Assistant (Maryam Fagbo)
This project uses a neural network (AI model) to predict whether a breast tumor is **benign** or **malignant** using medical features.
You can upload your dataset from Google Drive and run the model directly in Google Colab.

In [ ]:
# ✅ Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.inspection import permutation_importance

## 📂 Step 2: Load Dataset from Google Drive
Paste your Google Drive share link below (make sure it’s shared as *Anyone with the link → Viewer*).

In [ ]:
drive_link = "https://drive.google.com/file/d/1I3vITms6liuQSeRByDxODFIGovOHU36o/view?usp=sharing"
file_id = drive_link.split('/d/')[1].split('/')[0]
direct_link = f"https://drive.google.com/uc?id={file_id}"

data = pd.read_csv(direct_link)
print("✅ Dataset loaded successfully!")
print("Shape:", data.shape)
data.head()

In [ ]:
# 🔍 Step 3: Explore Data
print(data.info())
print(data.describe())
sns.countplot(x='diagnosis', data=data, palette='viridis')
plt.title('Diagnosis Distribution')
plt.show()

In [ ]:
# 🧩 Step 4: Prepare Data
X = data.drop(columns=['diagnosis'])
y = data['diagnosis'].replace({'M':1, 'B':0})
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print('✅ Data prepared successfully.')

In [ ]:
# 🧠 Step 5: Train Neural Network
model = MLPClassifier(hidden_layer_sizes=(50, 25), activation='relu', max_iter=1000, random_state=42)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

In [ ]:
# 📊 Step 6: Evaluate Performance
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, cmap='coolwarm', fmt='d')
plt.title('Confusion Matrix')
plt.show()

In [ ]:
# 💡 Step 7: Explain Feature Importance
result = permutation_importance(model, X_test_scaled, y_test, n_repeats=10, random_state=42)
importance_df = pd.DataFrame({'feature': X.columns, 'importance': result.importances_mean})
importance_df = importance_df.sort_values(by='importance', ascending=False)
sns.barplot(x='importance', y='feature', data=importance_df.head(10), palette='mako')
plt.title('Top 10 Important Features')
plt.show()

In [ ]:
# 🧾 Step 8: Project Summary
print('✅ Project Summary:')
print(f'- Accuracy: {accuracy*100:.2f}%')
print('- Model predicts benign vs malignant tumors.')
print('- Uses feature importance to explain predictions.')