<a href="https://colab.research.google.com/github/Neetu24/Breast-Cancer-Prediction/blob/main/Breast_Cancer_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 📦 Import Required Libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 📥 1. Load Dataset
breast_cancer_data = load_breast_cancer()
df = pd.DataFrame(breast_cancer_data.data, columns=breast_cancer_data.feature_names)
df['label'] = breast_cancer_data.target

# 🔍 2. EDA
print("🔹 First 5 Rows:")
print(df.head())

print("\n🔹 Last 5 Rows (with labels):")
print(df.tail())

print("\n🔹 Shape of the Dataset:", df.shape)
print("\n🔹 Dataset Info:")
print(df.info())

print("\n🔹 Missing Values:")
print(df.isnull().sum())

print("\n🔹 Summary Statistics:")
print(df.describe())

print("\n🔹 Target Value Distribution:")
print(df['label'].value_counts())  # 0 = malignant, 1 = benign

# 🔄 3. Preprocessing
X = df.drop(columns='label', axis=1)
Y = df['label']

# 🔀 4. Train-Test Split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

# 🤖 5. Train Logistic Regression Model
model = LogisticRegression(max_iter=10000)
model.fit(X_train, Y_train)

# 📊 6. Model Evaluation
X_train_prediction = model.predict(X_train)
train_accuracy = accuracy_score(Y_train, X_train_prediction)

X_test_prediction = model.predict(X_test)
test_accuracy = accuracy_score(Y_test, X_test_prediction)

print("\n✅ Accuracy on Training Data:", train_accuracy)
print("✅ Accuracy on Test Data:", test_accuracy)

# 🔮 7. Predictive System
input_data = (13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,0.05766,
              0.2699,0.7886,2.058,23.56,0.008462,0.0146,0.02387,0.01315,0.0198,0.0023,
              15.11,19.26,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259)

# Convert to NumPy array and reshape
input_array = np.asarray(input_data).reshape(1, -1)

# Predict
prediction = model.predict(input_array)

print("\n🔮 Predictive System Result:")
if prediction[0] == 0:
    print("🔴 The Breast Cancer is **Malignant**")
else:
    print("🟢 The Breast Cancer is **Benign**")


🔹 First 5 Rows:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst texture  worst perimeter  worst area  

