<a href="https://colab.research.google.com/github/2320030402/Breast-Cancer-Detection/blob/main/Breast_Cancer_Detection_Using_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing the Dependencies

In [1]:
import numpy as np
import pandas as pd
import sklearn.datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

Data Collection & Processing

In [2]:
# loading the data from sklearn
breast_cancer_dataset = sklearn.datasets.load_breast_cancer()

In [3]:
print(breast_cancer_dataset)

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
 

In [4]:
# loading the data to a data frame
data_frame = pd.DataFrame(breast_cancer_dataset.data, columns = breast_cancer_dataset.feature_names)

In [5]:
# print the first 5 rows of the dataframe
data_frame.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [6]:
# adding the 'target' column to the data frame
data_frame['label'] = breast_cancer_dataset.target

In [7]:
# print last 5 rows of the dataframe
data_frame.tail()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,label
564,21.56,22.39,142.0,1479.0,0.111,0.1159,0.2439,0.1389,0.1726,0.05623,...,26.4,166.1,2027.0,0.141,0.2113,0.4107,0.2216,0.206,0.07115,0
565,20.13,28.25,131.2,1261.0,0.0978,0.1034,0.144,0.09791,0.1752,0.05533,...,38.25,155.0,1731.0,0.1166,0.1922,0.3215,0.1628,0.2572,0.06637,0
566,16.6,28.08,108.3,858.1,0.08455,0.1023,0.09251,0.05302,0.159,0.05648,...,34.12,126.7,1124.0,0.1139,0.3094,0.3403,0.1418,0.2218,0.0782,0
567,20.6,29.33,140.1,1265.0,0.1178,0.277,0.3514,0.152,0.2397,0.07016,...,39.42,184.6,1821.0,0.165,0.8681,0.9387,0.265,0.4087,0.124,0
568,7.76,24.54,47.92,181.0,0.05263,0.04362,0.0,0.0,0.1587,0.05884,...,30.37,59.16,268.6,0.08996,0.06444,0.0,0.0,0.2871,0.07039,1


In [8]:
# number of rows and columns in the dataset
data_frame.shape

(569, 31)

In [9]:
# getting some information about the data
data_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   mean radius              569 non-null    float64
 1   mean texture             569 non-null    float64
 2   mean perimeter           569 non-null    float64
 3   mean area                569 non-null    float64
 4   mean smoothness          569 non-null    float64
 5   mean compactness         569 non-null    float64
 6   mean concavity           569 non-null    float64
 7   mean concave points      569 non-null    float64
 8   mean symmetry            569 non-null    float64
 9   mean fractal dimension   569 non-null    float64
 10  radius error             569 non-null    float64
 11  texture error            569 non-null    float64
 12  perimeter error          569 non-null    float64
 13  area error               569 non-null    float64
 14  smoothness error         5

In [10]:
# checking for missing values
data_frame.isnull().sum()

Unnamed: 0,0
mean radius,0
mean texture,0
mean perimeter,0
mean area,0
mean smoothness,0
mean compactness,0
mean concavity,0
mean concave points,0
mean symmetry,0
mean fractal dimension,0


In [11]:
# statistical measures about the data
data_frame.describe()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,label
count,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,...,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0
mean,14.127292,19.289649,91.969033,654.889104,0.09636,0.104341,0.088799,0.048919,0.181162,0.062798,...,25.677223,107.261213,880.583128,0.132369,0.254265,0.272188,0.114606,0.290076,0.083946,0.627417
std,3.524049,4.301036,24.298981,351.914129,0.014064,0.052813,0.07972,0.038803,0.027414,0.00706,...,6.146258,33.602542,569.356993,0.022832,0.157336,0.208624,0.065732,0.061867,0.018061,0.483918
min,6.981,9.71,43.79,143.5,0.05263,0.01938,0.0,0.0,0.106,0.04996,...,12.02,50.41,185.2,0.07117,0.02729,0.0,0.0,0.1565,0.05504,0.0
25%,11.7,16.17,75.17,420.3,0.08637,0.06492,0.02956,0.02031,0.1619,0.0577,...,21.08,84.11,515.3,0.1166,0.1472,0.1145,0.06493,0.2504,0.07146,0.0
50%,13.37,18.84,86.24,551.1,0.09587,0.09263,0.06154,0.0335,0.1792,0.06154,...,25.41,97.66,686.5,0.1313,0.2119,0.2267,0.09993,0.2822,0.08004,1.0
75%,15.78,21.8,104.1,782.7,0.1053,0.1304,0.1307,0.074,0.1957,0.06612,...,29.72,125.4,1084.0,0.146,0.3391,0.3829,0.1614,0.3179,0.09208,1.0
max,28.11,39.28,188.5,2501.0,0.1634,0.3454,0.4268,0.2012,0.304,0.09744,...,49.54,251.2,4254.0,0.2226,1.058,1.252,0.291,0.6638,0.2075,1.0


In [12]:
# checking the distribution of Target Varibale
data_frame['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
1,357
0,212


1 --> Benign

0 --> Malignant

In [13]:
data_frame.groupby('label').mean()

Unnamed: 0_level_0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,17.46283,21.604906,115.365377,978.376415,0.102898,0.145188,0.160775,0.08799,0.192909,0.06268,...,21.134811,29.318208,141.37033,1422.286321,0.144845,0.374824,0.450606,0.182237,0.323468,0.09153
1,12.146524,17.914762,78.075406,462.790196,0.092478,0.080085,0.046058,0.025717,0.174186,0.062867,...,13.379801,23.51507,87.005938,558.89944,0.124959,0.182673,0.166238,0.074444,0.270246,0.079442


Separating the features and target

In [14]:
X = data_frame.drop(columns='label', axis=1)
Y = data_frame['label']

In [15]:
print(X)

     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0          17.99         10.38          122.80     1001.0          0.11840   
1          20.57         17.77          132.90     1326.0          0.08474   
2          19.69         21.25          130.00     1203.0          0.10960   
3          11.42         20.38           77.58      386.1          0.14250   
4          20.29         14.34          135.10     1297.0          0.10030   
..           ...           ...             ...        ...              ...   
564        21.56         22.39          142.00     1479.0          0.11100   
565        20.13         28.25          131.20     1261.0          0.09780   
566        16.60         28.08          108.30      858.1          0.08455   
567        20.60         29.33          140.10     1265.0          0.11780   
568         7.76         24.54           47.92      181.0          0.05263   

     mean compactness  mean concavity  mean concave points  mea

In [16]:
print(Y)

0      0
1      0
2      0
3      0
4      0
      ..
564    0
565    0
566    0
567    0
568    1
Name: label, Length: 569, dtype: int64


Splitting the data into training data & Testing data

In [17]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [18]:
print(X.shape, X_train.shape, X_test.shape)

(569, 30) (455, 30) (114, 30)


Model Training

Logistic Regression

In [19]:
model = LogisticRegression()

In [20]:
# training the Logistic Regression model using Training data

model.fit(X_train, Y_train)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model Evaluation

Accuracy Score

In [21]:
# accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)

In [22]:
print('Accuracy on training data = ', training_data_accuracy)

Accuracy on training data =  0.9494505494505494


In [23]:
# accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)

In [24]:
print('Accuracy on test data = ', test_data_accuracy)

Accuracy on test data =  0.9298245614035088


Building a Predictive System

In [25]:
input_data = (13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,0.05766,0.2699,0.7886,2.058,23.56,0.008462,0.0146,0.02387,0.01315,0.0198,0.0023,15.11,19.26,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259)

# change the input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array as we are predicting for one datapoint
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0] == 0):
  print('The Breast cancer is Malignant')

else:
  print('The Breast Cancer is Benign')



[1]
The Breast Cancer is Benign




In [26]:
# Cell 1: Install Dependencies
!pip install flask joblib scikit-learn numpy pandas
# Install pyngrok to expose the local Flask server to the internet
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.4.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.4.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.4.0


In [27]:
# Cell 2: Data Processing, Scaling, and Model Training
import numpy as np
import pandas as pd
import sklearn.datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler # NEW
import joblib # NEW

# Data Collection & Processing
breast_cancer_dataset = sklearn.datasets.load_breast_cancer()
data_frame = pd.DataFrame(breast_cancer_dataset.data, columns = breast_cancer_dataset.feature_names)
data_frame['label'] = breast_cancer_dataset.target

# Separating the features and target
X = data_frame.drop(columns='label', axis=1)
Y = data_frame['label']

# Splitting the data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

# --- NEW: SCALING THE DATA ---
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Training - Logistic Regression (Increased max_iter and using scaled data)
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, Y_train)

# Model Evaluation (on scaled data)
training_data_accuracy = accuracy_score(Y_train, model.predict(X_train_scaled))
test_data_accuracy = accuracy_score(Y_test, model.predict(X_test_scaled))

print(f'Accuracy on training data (scaled): {training_data_accuracy:.4f}')
print(f'Accuracy on test data (scaled): {test_data_accuracy:.4f}')

# --- NEW: SAVE THE MODEL AND SCALER ---
joblib.dump(model, 'logistic_regression_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
print("\nModel and Scaler saved as 'logistic_regression_model.pkl' and 'scaler.pkl'.")

# Define feature names globally for the Flask app
feature_names = list(X.columns)

Accuracy on training data (scaled): 0.9890
Accuracy on test data (scaled): 0.9737

Model and Scaler saved as 'logistic_regression_model.pkl' and 'scaler.pkl'.


In [28]:
# Cell 3: Create HTML Templates (Updated with Detailed Introduction)
import os

# Create the templates directory
if not os.path.exists('templates'):
    os.makedirs('templates')

# --- 1. INTRO.HTML (Page 1: Introduction) ---
intro_html_content = """
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Breast Cancer Predictor - Welcome</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; background-color: #f4f4f9; }
        .container { max-width: 800px; margin: auto; background: white; padding: 40px; border-radius: 8px; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); text-align: center; }
        h1 { color: #007bff; margin-bottom: 20px; }
        h2 { margin-bottom: 20px; }
        p { margin-bottom: 25px; font-size: 1.0em; line-height: 1.5; text-align: justify; }
        .action-button { display: inline-block; padding: 10px 20px; text-decoration: none; border-radius: 8px; font-weight: bold; margin: 5px; cursor: pointer; }

        .start-button { background-color: #28a745; color: white; border: none; font-size: 1.2em; }
        .start-button:hover { background-color: #218838; }

        .back-button-js { background-color: #6c757d; color: white; border: none; font-size: 1.2em; }
        .back-button-js:hover { background-color: #5a6268; }

        /* Footer Styling */
        footer {
            margin-top: 40px;
            padding: 15px 0;
            border-top: 1px solid #eee;
            text-align: center;
            font-size: 0.9em;
            color: #666;
        }
        footer strong { color: #333; display: block; margin-bottom: 5px; }
    </style>
</head>
<body>
    <div class="container">
        <h1>🏥 Breast Cancer Diagnosis Tool</h1>
        <h2>Page 1: Introduction to the Predictive System</h2>

        <p>This web application utilizes a machine learning model to assist in the diagnosis of breast masses. The system is built upon the widely-used **Wisconsin Diagnostic Breast Cancer (WDBC)** dataset, which contains features computed from digitized images of fine needle aspirates (FNA) of breast masses.</p>

        <p>The core of this system is a **Logistic Regression** classifier. This algorithm is highly effective for binary classification tasks, making it ideal for distinguishing between two classes: **Malignant (Cancerous)** and **Benign (Non-cancerous)**.</p>

        <p>The model requires 30 distinct input features—including measurements like mean radius, mean texture, and worst area—to make a prediction. To ensure high accuracy and stability, all input data is **Standard Scaled** before being fed to the model, matching the process used during training.</p>

        <p>Click "Start Prediction" to proceed to the data input page, where you can submit patient measurements via manual entry or batch CSV upload.</p>

        <button class="action-button back-button-js" onclick="window.history.back()">← Go Back</button>
        <a href="/input" class="action-button start-button">Start Prediction →</a>
    </div>

    <footer>
        <strong>Project Team:</strong>
        K. DILIP<br>
        PLV. ABHIRAM<br>
        G. SAIABHIRAM REDDY
        <p>&copy; Copyright 2025</p>
    </footer>
</body>
</html>
"""

# --- 2. PREDICT_INPUT.HTML (Page 2: Input Forms - REMAINS THE SAME) ---
predict_input_html_content = """
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Breast Cancer Predictor - Input</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; background-color: #f4f4f9; }
        .container { max-width: 800px; margin: auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); }
        h1 { text-align: center; color: #333; }
        h2 { text-align: center; margin-top: 20px; border-bottom: 2px solid #eee; padding-bottom: 10px;}
        .section-separator { text-align: center; margin: 30px 0; font-weight: bold; }

        /* Input Styles */
        .manual-form { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin-top: 20px; }
        label { font-weight: bold; }
        input[type="number"] { width: 95%; padding: 8px; border: 1px solid #ccc; border-radius: 4px; box-sizing: border-box; }
        .submit-btn { grid-column: 1 / span 2; text-align: center; }
        .upload-form { text-align: center; padding: 20px; border: 1px dashed #ccc; margin-top: 20px; }

        .action-button { display: inline-block; padding: 10px 20px; text-decoration: none; border-radius: 4px; font-weight: bold; margin: 5px; cursor: pointer; }

        .submit-button { background-color: #007bff; color: white; border: none; font-size: 16px; margin-top: 10px; }
        .submit-button:hover { background-color: #0056b3; }

        .back-button { background-color: #6c757d; color: white; border: none; }

        /* Footer Styling */
        footer {
            margin-top: 40px;
            padding: 15px 0;
            border-top: 1px solid #eee;
            text-align: center;
            font-size: 0.9em;
            color: #666;
        }
        footer strong { color: #333; display: block; margin-bottom: 5px; }
    </style>
</head>
<body>
    <div class="container">
        <h1>Prediction Input</h1>
        <h2>Page 2: Input Forms</h2>

        <a href="/" class="action-button back-button">← Back to Introduction</a>

        <h2>1. Predict using Dataset Upload</h2>
        <div class="upload-form">
            <form action="/predict" method="post" enctype="multipart/form-data">
                <input type="file" name="file" accept=".csv" required>
                <input type="submit" value="Upload & Predict Batch" class="submit-button">
                <p style="font-size: smaller; color: gray;">(Upload a CSV file with feature columns)</p>
            </form>
        </div>

        <div class="section-separator">--- OR ---</div>

        <h2>2. Predict using Manual Entry</h2>
        <form action="/predict" method="post" class="manual-form">
            {% for name in feature_names %}
            <div>
                <label for="{{ name }}">{{ name.replace('_', ' ').title() }}:</label>
                <input type="number" id="{{ name }}" name="{{ name }}" step="any" required>
            </div>
            {% endfor %}
            <div class="submit-btn">
                <input type="submit" value="Predict Single Case" class="submit-button">
            </div>
        </form>
    </div>

    <footer>
        <strong>Project Team:</strong>
        K. DILIP<br>
        PLV. ABHIRAM<br>
        G. SAIABHIRAM REDDY
        <p>&copy; Copyright 2025</p>
    </footer>
</body>
</html>
"""

# --- 3. RESULTS.HTML (Page 3: Prediction Output Page - REMAINS THE SAME) ---
results_html_content = """
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Prediction Results</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; background-color: #f4f4f9; }
        .container { max-width: 800px; margin: auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); }
        h1 { text-align: center; color: #333; }
        h2 { text-align: center; margin-top: 20px; border-bottom: 2px solid #eee; padding-bottom: 10px;}
        .result { margin-top: 30px; padding: 15px; border-radius: 4px; text-align: center; }
        .malignant { background-color: #f8d7da; color: #721c24; border: 1px solid #f5c6cb; }
        .benign { background-color: #d4edda; color: #155724; border: 1px solid #c3e6cb; }
        .download-link { display: block; margin-top: 15px; font-weight: bold; }

        .action-button { display: inline-block; margin-top: 20px; padding: 10px 15px; background-color: #6c757d; color: white; text-decoration: none; border-radius: 4px; margin: 5px; }
        .action-button-main { background-color: #007bff; }

        /* Footer Styling */
        footer {
            margin-top: 40px;
            padding: 15px 0;
            border-top: 1px solid #eee;
            text-align: center;
            font-size: 0.9em;
            color: #666;
        }
        footer strong { color: #333; display: block; margin-bottom: 5px; }
    </style>
</head>
<body>
    <div class="container">
        <h1>Prediction Results and Analysis</h1>
        <h2>Page 3: Prediction and Results</h2>

        {% if prediction_text %}
        <div class="result {% if 'Malignant' in prediction_text %}malignant{% else %}benign{% endif %}">
            <h2>{{ prediction_text }}</h2>
            <p>{{ full_message }}</p>
            {% if download_link %}
                <a class="download-link" href="{{ download_link }}">Download Predicted Results CSV</a>
            {% endif %}
            <p><strong>Disclaimer: This is a Machine Learning prediction and should NOT replace a professional medical diagnosis.</strong></p>
        </div>
        {% endif %}

        <div style="text-align: center;">
            <a href="/" class="action-button action-button-main">← Go Back to Introduction Page</a>

            <a href="/input" class="action-button">← Go Back to Input Forms</a>
        </div>
    </div>

    <footer>
        <strong>Project Team:</strong>
        K. DILIP<br>
        PLV. ABHIRAM<br>
        G. SAIABHIRAM REDDY
        <p>&copy; Copyright 2025</p>
    </footer>
</body>
</html>
"""

# Write all three files
with open('templates/intro.html', 'w') as f:
    f.write(intro_html_content)
with open('templates/predict_input.html', 'w') as f:
    f.write(predict_input_html_content)
with open('templates/results.html', 'w') as f:
    f.write(results_html_content)

print("HTML templates updated with detailed introduction.")

HTML templates updated with detailed introduction.


In [29]:
# NEW CELL: ngrok Authorization

from pyngrok import ngrok

# --- IMPORTANT ---
# REPLACE 'YOUR_AUTHTOKEN_HERE' with the actual token you copied from ngrok dashboard.
ngrok.set_auth_token("31SCm7VlV4Fx1s14hittTpRzc7G_7X9cVbE49r5bJr1dk9niA")

print("ngrok authentication token set successfully.")

ngrok authentication token set successfully.


In [30]:
# Cell X: File Inspection

import pandas as pd
from google.colab import files
import io

print("Please select the 'data.csv' file you want to use for prediction:")
uploaded = files.upload()

# Assuming you uploaded 'data.csv'
file_name = next(iter(uploaded))

try:
    # Read the file content
    df_check = pd.read_csv(io.StringIO(uploaded[file_name].decode('utf-8')))

    # Print the first few rows and the columns
    print("\n--- First 5 Rows of the Uploaded File ---")
    print(df_check.head())

    print("\n--- Actual Column Names in the Uploaded File ---")
    print(list(df_check.columns))

except Exception as e:
    print(f"An error occurred while reading the file: {e}")

# The feature names your model expects (as generated in Cell 2)
# You can uncomment and run this if you need to remind yourself of the expected names
# print("\n--- Expected Column Names (feature_names from Cell 2) ---")
# print(feature_names)

Please select the 'data.csv' file you want to use for prediction:


Saving data.csv to data.csv

--- First 5 Rows of the Uploaded File ---
         id diagnosis  radius_mean  texture_mean  perimeter_mean  area_mean  \
0    842302         M        17.99         10.38          122.80     1001.0   
1    842517         M        20.57         17.77          132.90     1326.0   
2  84300903         M        19.69         21.25          130.00     1203.0   
3  84348301         M        11.42         20.38           77.58      386.1   
4  84358402         M        20.29         14.34          135.10     1297.0   

   smoothness_mean  compactness_mean  concavity_mean  concave points_mean  \
0          0.11840           0.27760          0.3001              0.14710   
1          0.08474           0.07864          0.0869              0.07017   
2          0.10960           0.15990          0.1974              0.12790   
3          0.14250           0.28390          0.2414              0.10520   
4          0.10030           0.13280          0.1980              0.1

In [None]:
# Cell 4: Deploy Web Application with Flask and ngrok (Final Robust Deployment)
from flask import Flask, request, render_template, send_file
from pyngrok import ngrok
from IPython.display import HTML, display
import joblib
import numpy as np
import pandas as pd
import io
import os
import sys

# --- DEFINITION OF ALL TEMPLATE CONTENT (Required to prevent TemplateNotFound) ---
# NOTE: This section is included here to make the cell standalone and avoid the previous error,
# but you should ensure the actual Cell 3 is also run.

def ensure_templates_exist():
    """Defines and writes all necessary HTML files to the templates directory."""
    template_dir = 'templates'
    if not os.path.exists(template_dir):
        os.makedirs(template_dir)

    # --- 1. INTRO.HTML (Page 1) ---
    intro_html_content = """
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Breast Cancer Predictor - Welcome</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; background-color: #f4f4f9; }
        .container { max-width: 800px; margin: auto; background: white; padding: 40px; border-radius: 8px; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); text-align: center; }
        h1 { color: #007bff; margin-bottom: 20px; }
        p { margin-bottom: 25px; font-size: 1.1em; }
        .start-button { display: inline-block; padding: 15px 30px; background-color: #28a745; color: white; text-decoration: none; border-radius: 8px; font-size: 1.2em; font-weight: bold; transition: background-color 0.3s; }
        .start-button:hover { background-color: #218838; }
        footer { margin-top: 40px; padding: 15px 0; border-top: 1px solid #eee; text-align: center; font-size: 0.9em; color: #666; }
        footer strong { color: #333; display: block; margin-bottom: 5px; }
    </style>
</head>
<body>
    <div class="container">
        <h1>🏥 Breast Cancer Diagnosis Tool</h1>
        <h2>Page 1: Introduction</h2>
        <p>This web application utilizes a machine learning model to assist in the diagnosis of breast masses. The system is built upon the widely-used Wisconsin Diagnostic Breast Cancer (WDBC) dataset, which contains features computed from digitized images of fine needle aspirates (FNA) of breast masses.</p>
        <p>The core of this system is a Logistic Regression classifier. This algorithm is highly effective for binary classification tasks, making it ideal for distinguishing between two critical classes: Malignant (Cancerous) and Benign (Non-cancerous).</p>
        <p>The model requires 30 distinct input features—including geometric and textural measurements like mean radius, mean texture, and worst area—to make a prediction. To ensure high accuracy and stability, all input data is Standard Scaled before being fed to the model, matching the process used during training.</p>
        <p>Click "Start Prediction" to proceed to the data input page, where you can submit patient measurements via manual entry or batch CSV upload.</p>
        <a href="/input" class="start-button">Start Prediction</a>
    </div>
    <footer>
        <strong>Project Team:</strong>
        K. DILIP<br>
        PLV. ABHIRAM<br>
        G. SAIABHIRAM REDDY
        <p>&copy; Copyright 2025</p>
    </footer>
</body>
</html>
"""
    # --- 2. PREDICT_INPUT.HTML (Page 2: Input Forms) ---
    predict_input_html_content = f"""
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Breast Cancer Predictor - Input</title>
    <style>
        body {{ font-family: Arial, sans-serif; margin: 20px; background-color: #f4f4f9; }}
        .container {{ max-width: 800px; margin: auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); }}
        h1 {{ text-align: center; color: #333; }}
        h2 {{ text-align: center; margin-top: 20px; border-bottom: 2px solid #eee; padding-bottom: 10px;}}
        .section-separator {{ text-align: center; margin: 30px 0; font-weight: bold; }}
        .manual-form {{ display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin-top: 20px; }}
        label {{ font-weight: bold; }}
        input[type="number"] {{ width: 95%; padding: 8px; border: 1px solid #ccc; border-radius: 4px; box-sizing: border-box; }}
        .submit-btn {{ grid-column: 1 / span 2; text-align: center; }}
        .upload-form {{ text-align: center; padding: 20px; border: 1px dashed #ccc; margin-top: 20px; }}
        input[type="submit"] {{ background-color: #007bff; color: white; padding: 10px 15px; border: none; border-radius: 4px; cursor: pointer; font-size: 16px; margin-top: 10px; }}
        input[type="submit"]:hover {{ background-color: #0056b3; }}
        footer {{ margin-top: 40px; padding: 15px 0; border-top: 1px solid #eee; text-align: center; font-size: 0.9em; color: #666; }}
        footer strong {{ color: #333; display: block; margin-bottom: 5px; }}
    </style>
</head>
<body>
    <div class="container">
        <h1>Prediction Input</h1>
        <h2>Page 2: Input Forms</h2>

        <h2>1. Predict using Dataset Upload</h2>
        <div class="upload-form">
            <form action="/predict" method="post" enctype="multipart/form-data">
                <input type="file" name="file" accept=".csv" required>
                <input type="submit" value="Upload & Predict Batch">
                <p style="font-size: smaller; color: gray;">(Upload a CSV file with feature columns)</p>
            </form>
        </div>

        <div class="section-separator">--- OR ---</div>

        <h2>2. Predict using Manual Entry</h2>
        <form action="/predict" method="post" class="manual-form">
            {''.join([f'''
            <div>
                <label for="{name}">{name.replace('_', ' ').title()}:</label>
                <input type="number" id="{name}" name="{name}" step="any" required>
            </div>
            ''' for name in feature_names])}
            <div class="submit-btn">
                <input type="submit" value="Predict Single Case">
            </div>
        </form>
    </div>
    <footer>
        <strong>Project Team:</strong>
        K. DILIP<br>
        PLV. ABHIRAM<br>
        G. SAIABHIRAM REDDY
        <p>&copy; Copyright 2025</p>
    </footer>
</body>
</html>
"""
    # --- 3. RESULTS.HTML (Page 3) ---
    results_html_content = """
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Prediction Results</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; background-color: #f4f4f9; }
        .container { max-width: 800px; margin: auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); }
        h1 { text-align: center; color: #333; }
        h2 { text-align: center; margin-top: 20px; border-bottom: 2px solid #eee; padding-bottom: 10px;}
        .result { margin-top: 30px; padding: 15px; border-radius: 4px; text-align: center; }
        .malignant { background-color: #f8d7da; color: #721c24; border: 1px solid #f5c6cb; }
        .benign { background-color: #d4edda; color: #155724; border: 1px solid #c3e6cb; }
        .download-link { display: block; margin-top: 15px; font-weight: bold; }
        .back-button { display: inline-block; margin-top: 20px; padding: 10px 15px; background-color: #6c757d; color: white; text-decoration: none; border-radius: 4px; }

        /* Footer Styling */
        footer {
            margin-top: 40px;
            padding: 15px 0;
            border-top: 1px solid #eee;
            text-align: center;
            font-size: 0.9em;
            color: #666;
        }
        footer strong { color: #333; display: block; margin-bottom: 5px; }
    </style>
</head>
<body>
    <div class="container">
        <h1>Prediction Results and Analysis</h1>
        <h2>Page 3: Prediction and Results</h2>

        {% if prediction_text %}
        <div class="result {% if 'Malignant' in prediction_text %}malignant{% else %}benign{% endif %}">
            <h2>{{ prediction_text }}</h2>
            <p>{{ full_message }}</p>
            {% if download_link %}
                <a class="download-link" href="{{ download_link }}">Download Predicted Results CSV</a>
            {% endif %}
            <p><strong>Disclaimer: This is a Machine Learning prediction and should NOT replace a professional medical diagnosis.</strong></p>
        </div>
        {% endif %}

        <div style="text-align: center;">
            <a href="/input" class="back-button">← Go Back to Input Page</a>
        </div>
    </div>

    <footer>
        <strong>Project Team:</strong>
        K. DILIP<br>
        PLV. ABHIRAM<br>
        G. SAIABHIRAM REDDY
        <p>&copy; Copyright 2025</p>
    </footer>
</body>
</html>
"""
    with open(os.path.join(template_dir, 'index.html'), 'w') as f:
        f.write(intro_html_content)
    with open(os.path.join(template_dir, 'predict_input.html'), 'w') as f:
        f.write(predict_input_html_content)
    with open(os.path.join(template_dir, 'results.html'), 'w') as f:
        f.write(results_html_content)

# Call the function to ensure templates are written before starting the app
ensure_templates_exist()


# Load the model and scaler
try:
    model = joblib.load('logistic_regression_model.pkl')
    scaler = joblib.load('scaler.pkl')
    # Assume feature_names is already defined in a previous cell
    if 'feature_names' not in globals():
        print("Error: 'feature_names' not found. Please run the data processing cell first.")
        sys.exit(1)
except FileNotFoundError:
    print("Error: Model or scaler file not found. Please run the model training cell first.")
    sys.exit(1)
except Exception as e:
    print(f"Error loading model or scaler: {e}")
    sys.exit(1)

# Flask App
app = Flask(__name__, template_folder='templates')
PORT = 5000

# Define a mapping for inconsistent column names
RENAME_MAP = {
    'radius_mean': 'mean radius',
    'texture_mean': 'mean texture',
    'perimeter_mean': 'mean perimeter',
    'area_mean': 'mean area',
    'smoothness_mean': 'mean smoothness',
    'compactness_mean': 'mean compactness',
    'concavity_mean': 'mean concavity',
    'concave points_mean': 'mean concave points',
    'symmetry_mean': 'mean symmetry',
    'fractal_dimension_mean': 'mean fractal dimension',
    'radius_se': 'radius error',
    'texture_se': 'texture error',
    'perimeter_se': 'perimeter error',
    'area_se': 'area error',
    'smoothness_se': 'smoothness error',
    'compactness_se': 'compactness error',
    'concavity_se': 'concavity error',
    'concave points_se': 'concave points error',
    'symmetry_se': 'symmetry error',
    'fractal_dimension_se': 'fractal dimension error',
    'radius_worst': 'worst radius',
    'texture_worst': 'worst texture',
    'perimeter_worst': 'worst perimeter',
    'area_worst': 'worst area',
    'smoothness_worst': 'worst smoothness',
    'compactness_worst': 'worst compactness',
    'concavity_worst': 'worst concavity',
    'concave points_worst': 'worst concave points',
    'symmetry_worst': 'worst symmetry',
    'fractal_dimension_worst': 'worst fractal dimension'
}


@app.route('/')
def home():
    return render_template('index.html')

@app.route('/input')
def input_page():
    return render_template('predict_input.html', feature_names=feature_names)


@app.route('/predict', methods=['POST'])
def predict():
    if 'file' in request.files:
        # Handle file upload
        file = request.files['file']
        if file.filename == '':
            return render_template('results.html', prediction_text="Error: No selected file.")

        try:
            df_upload = pd.read_csv(io.StringIO(file.stream.read().decode('utf-8')))

            # Clean column names (remove leading/trailing spaces)
            df_upload.columns = df_upload.columns.str.strip()

            # Rename columns using the defined map, ignoring errors for columns not in the map
            df_upload.rename(columns=RENAME_MAP, inplace=True)

            # Drop 'id' and 'diagnosis' columns if they exist in the uploaded data
            columns_to_drop = ['id', 'diagnosis', 'Unnamed: 32'] # Added 'Unnamed: 32'
            for col in columns_to_drop:
                if col in df_upload.columns:
                    df_upload = df_upload.drop(columns=col)

            # Ensure columns are in the same order as the training data features
            try:
                 df_upload = df_upload[feature_names]
            except KeyError as e:
                 missing_col = e.args[0]
                 return render_template('results.html', prediction_text=f"Error: Missing expected feature column '{missing_col}' in the uploaded file after renaming/dropping.")


            # Scale the uploaded data
            X_upload_scaled = scaler.transform(df_upload)

            # Make predictions
            predictions = model.predict(X_upload_scaled)

            # Add predictions to the DataFrame
            df_upload['Prediction'] = predictions
            df_upload['Predicted_Label'] = df_upload['Prediction'].apply(lambda x: 'Benign' if x == 1 else 'Malignant')


            # Save predictions to a temporary CSV file
            predictions_csv_path = 'predictions.csv'
            df_upload.to_csv(predictions_csv_path, index=False)

            full_message = f"Batch prediction completed for {len(df_upload)} entries."
            download_link = '/download-predictions' # Link to the download route

            return render_template(
                'results.html',
                prediction_text="Batch Prediction Results",
                full_message=full_message,
                download_link=download_link
            )


        except pd.errors.EmptyDataError:
             return render_template('results.html', prediction_text="Error: Uploaded file is empty.")
        except Exception as e:
            return render_template('results.html', prediction_text=f"An error occurred during file processing: {e}")


    else:
        # Handle manual input
        try:
            # Get input data from the form, ensuring the correct order
            input_data = [float(request.form[name]) for name in feature_names]
            input_data_as_numpy_array = np.asarray(input_data)
            input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

            # Scale the input data
            input_data_scaled = scaler.transform(input_data_reshaped)

            # Make prediction
            prediction = model.predict(input_data_scaled)

            if (prediction[0] == 0):
                prediction_text = 'Malignant'
                full_message = 'Based on the input features, the model predicts that the breast mass is Malignant.'
            else:
                prediction_text = 'Benign'
                full_message = 'Based on the input features, the model predicts that the breast mass is Benign.'

            return render_template('results.html', prediction_text=f'Predicted Diagnosis: {prediction_text}', full_message=full_message)

        except ValueError:
            return render_template('results.html', prediction_text="Error: Invalid input. Please ensure all fields are numbers.")
        except Exception as e:
            return render_template('results.html', prediction_text=f"An error occurred during prediction: {e}")

@app.route('/download-predictions')
def download_predictions():
    predictions_csv_path = 'predictions.csv'
    if os.path.exists(predictions_csv_path):
        return send_file(predictions_csv_path, as_attachment=True, download_name='predicted_breast_cancer_data.csv')
    else:
        return "Error: Predictions file not found.", 404


# Setup ngrok tunnel and display the public URL
try:
    # Disconnect previous tunnels if any
    ngrok.kill()
    # Open a new tunnel
    public_url = ngrok.connect(PORT).public_url
    print(f" * ngrok tunnel established at {public_url}")
    # Display the link in the Colab output
    display(HTML(f'<h3>Open your ngrok tunnel here: <a href="{public_url}" target="_blank">{public_url}</a></h3>'))

    # Run the Flask app
    # Use a separate thread or process if needed, or run in the main thread for simplicity in Colab
    # Note: Running directly with app.run() might block the notebook.
    # A common pattern is to run it in a separate thread or use a tool like `waitress`.
    # For this example, we'll use a simple workaround for demonstration in Colab.
    # You might need to manually stop the cell execution if it blocks.

    # A simple way to run Flask in a Colab cell without blocking forever is to
    # use run_simple from werkzeug. This is still somewhat blocking but might be manageable
    # for quick testing. For persistent apps, consider alternative deployment methods.
    from werkzeug.serving import run_simple
    print(f" * Running Flask app on http://127.0.0.1:{PORT}/")
    # Note: This run_simple call is somewhat blocking. You may need to
    # interrupt the cell execution to stop the server.
    run_simple('127.0.0.1', PORT, app, use_reloader=False, use_debugger=False)

except Exception as e:
    print(f"An error occurred setting up ngrok or running the Flask app: {e}")

 * ngrok tunnel established at https://7c88d0fcda7e.ngrok-free.app


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


 * Running Flask app on http://127.0.0.1:5000/


INFO:werkzeug:127.0.0.1 - - [23/Oct/2025 07:36:14] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Oct/2025 07:36:15] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [23/Oct/2025 07:36:33] "GET /input HTTP/1.1" 200 -
