# Cancer Prediction Project

## Project Overview
This project implements a machine learning model to predict cancer diagnosis (malignant vs. benign) using the Breast Cancer Wisconsin dataset.

In [None]:
# Step 1: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [None]:
# Step 2: Load data
cancer = pd.read_csv('https://github.com/YBIFoundation/Dataset/raw/main/Cancer.csv')
print("Dataset loaded successfully!")
print(f"Dataset shape: {cancer.shape}")

In [None]:
# Step 3: Explore data
print("\nFirst 5 rows:")
print(cancer.head())

print("\nDataset info:")
print(cancer.info())

print("\nDataset description:")
print(cancer.describe())

In [None]:
# Step 4: Prepare data
y = cancer['diagnosis']
X = cancer.drop(['id','diagnosis','Unnamed: 32'], axis=1)
print("\nData prepared!")
print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")

In [None]:
# Step 5: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=2529)
print("\nData split completed!")
print(f"Training set: {X_train.shape}")
print(f"Testing set: {X_test.shape}")

In [None]:
# Step 6: Train model
model = LogisticRegression(max_iter=5000)
model.fit(X_train, y_train)
print("\nModel training completed!")

In [None]:
# Step 7: Make predictions
y_pred = model.predict(X_test)
print("Predictions made!")

In [None]:
# Step 8: Evaluate model
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("\n=== MODEL RESULTS ===")
print(f"Accuracy: {accuracy:.2%}")
print("\nConfusion Matrix:")
print(cm)
print("\nClassification Report:")
print(report)

## Results Summary
- **Accuracy**: 95.91%
- The model successfully predicts cancer diagnosis with high accuracy
- Good precision and recall for both malignant and benign classes