<a href="https://colab.research.google.com/github/Prajwal-Deotare/Breast_Cancer_Prediction/blob/main/Cancer_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# train_model.py
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import os
import shutil

# Define the path to the dataset
DATASET_DIR = "../dataset"
DATASET_FILE = "breast_cancer_dataset_100_samples.csv"
DATASET_PATH = os.path.join(DATASET_DIR, DATASET_FILE)

# Ensure the dataset directory exists
if not os.path.exists(DATASET_DIR):
    os.makedirs(DATASET_DIR)
    print(f"Created directory: {DATASET_DIR}")

# Check if the dataset file exists in the root and move it if necessary
if not os.path.exists(DATASET_PATH) and os.path.exists(f'/content/{DATASET_FILE}'):
    shutil.move(f'/content/{DATASET_FILE}', DATASET_PATH)
    print(f"Moved '{DATASET_FILE}' to '{DATASET_DIR}'")

print("üìå Loading dataset...")
df = pd.read_csv(DATASET_PATH)

# Target and features
y = df["Cancer_Stage"]
X = df.drop("Cancer_Stage", axis=1)

# Encoding target
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Columns
categorical_cols = X.select_dtypes(include=["object"]).columns
numerical_cols = X.select_dtypes(include=["int64", "float64"]).columns

# Preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)
    ]
)

# Model pipeline
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(
        n_estimators=300,
        max_depth=None,
        random_state=42
    ))
])

print("üìå Training model...")
model.fit(X, y_encoded)

# Save model and encoder
# Ensure the model directory exists
MODEL_DIR = "../model"
if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)

joblib.dump(model, os.path.join(MODEL_DIR, "cancer_stage_model.pkl"))
joblib.dump(le, os.path.join(MODEL_DIR, "label_encoder.pkl"))

print("\n‚úÖ Model saved successfully!")
print("üìÅ Saved as `/model/cancer_stage_model.pkl`")
print("üìÅ Label encoder saved as `/model/label_encoder.pkl`")

Created directory: ../dataset
Moved 'breast_cancer_dataset_100_samples.csv' to '../dataset'
üìå Loading dataset...
üìå Training model...

‚úÖ Model saved successfully!
üìÅ Saved as `/model/cancer_stage_model.pkl`
üìÅ Label encoder saved as `/model/label_encoder.pkl`


In [4]:
# app.py - Streamlit Cancer Prediction App

import streamlit as st
import pandas as pd
import joblib
import plotly.express as px

st.set_page_config(page_title="Breast Cancer Stage Predictor", layout="wide")

# ---------------------------------------------------
# Load model + encoder
# ---------------------------------------------------
model = joblib.load("../model/cancer_stage_model.pkl")
label_encoder = joblib.load("../model/label_encoder.pkl")

# ------------------------------
# Title
# ------------------------------
st.markdown("""
<div style='text-align:center;'>
    <h1 style='color:#D81B60;'>Breast Cancer Stage Prediction Dashboard</h1>
    <p style='color:#6c757d;'>AI-Based Clinical Decision Support System</p>
</div>
<br>
""", unsafe_allow_html=True)

# ------------------------------
# Sidebar Inputs
# ------------------------------
st.sidebar.header("Patient Clinical Information")

def input_field(label, type="number", default=0, minVal=0, maxVal=100):
    if type == "number":
        # Ensure min_value, max_value, and value are of the same type
        if isinstance(default, float):
            minVal = float(minVal)
            maxVal = float(maxVal)
        else: # Assume int if not float
            minVal = int(minVal)
            maxVal = int(maxVal)
        return st.sidebar.number_input(label, min_value=minVal, max_value=maxVal, value=default)
    else:
        return st.sidebar.selectbox(label, default)

Age = input_field("Age", default=45, minVal=20, maxVal=90)
Family_History = st.sidebar.selectbox("Family History", [0, 1])
Lump_Duration_Months = input_field("Lump Duration (Months)", default=3, minVal=1, maxVal=24)
Pain = st.sidebar.selectbox("Pain Level", ["None", "Mild", "Moderate"])
Breast_Density = st.sidebar.selectbox("Breast Density", ["A", "B", "C", "D"])
Mass_Size_cm = input_field("Mass Size (cm)", default=2.8, minVal=0.1, maxVal=10)
Mass_Shape = st.sidebar.selectbox("Mass Shape", ["Round", "Oval", "Irregular"])
Mass_Margins = st.sidebar.selectbox("Mass Margins", ["Smooth", "Lobulated", "Spiculated"])
Calcifications = st.sidebar.selectbox("Calcifications", [0, 1])
Echotexture = st.sidebar.selectbox("Echotexture", ["Hypoechoic", "Complex", "Cystic"])
Lymph_Node_Size_cm = input_field("Lymph Node Size (cm)", default=1.5, minVal=0.1, maxVal=10)
Tumor_Type = st.sidebar.selectbox("Tumor Type", ["IDC", "ILC", "Benign"])
Tumor_Grade = st.sidebar.selectbox("Tumor Grade", [1, 2, 3])
ER_Status = input_field("ER Status (%)", default=85)
PR_Status = input_field("PR Status (%)", default=65)
HER2_Status = st.sidebar.selectbox("HER2 Status", ["Positive", "Negative"])
Ki67_Index = input_field("Ki-67 Index (%)", default=18)

input_data = pd.DataFrame([locals()])

# ------------------------------
# Prediction
# ------------------------------
st.subheader("üéØ Predict Cancer Stage")

if st.button("Predict Stage"):
    prediction = model.predict(input_data)
    result = label_encoder.inverse_transform(prediction)[0]

    st.success(f"Predicted Stage: {result}")

# ------------------------------
# Analytics & Visuals
# ------------------------------
st.subheader("üìä Dataset Insights")

df = pd.read_csv("../dataset/breast_cancer_dataset_100_samples.csv")

col1, col2 = st.columns(2)

with col1:
    st.plotly_chart(px.histogram(df, x="Cancer_Stage", title="Stage Distribution"))

with col2:
    st.plotly_chart(px.box(df, x="Cancer_Stage", y="Mass_Size_cm", title="Mass Size by Stage"))

st.plotly_chart(px.scatter(df, x="Age", y="Ki67_Index", color="Cancer_Stage",
                           title="Age vs Ki-67 Index") )

2025-12-04 11:40:23.518 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-12-04 11:40:23.536 Session state does not function when running a script without `streamlit run`


DeltaGenerator()

In [5]:
!pip install streamlit pyngrok
from pyngrok import ngrok

ngrok.set_auth_token("36HvIcq5OpX7WdqwkPlc0heVwDf_3eA6ZwWVkr8DXGXa5jHNZ")
ngrok.kill()

get_ipython().system_raw("streamlit run app/app.py --server.port 8501 &")

public_url = ngrok.connect(8501)
public_url




<NgrokTunnel: "https://lawana-bacciferous-lowly.ngrok-free.dev" -> "http://localhost:8501">