## Table of Contents:
<font size="4"><a href="#1">1.) Import Necessary Packages and Libraries</a></font><br>
<font size="4"><a href="#2">2.) Data Preprocessing and Cleaning</a></font><br>
<font size="4"><a href="#3">3.) Model Building</a></font><br>
<font size="4"><a href="#4">4.) Tooling</a></font><br>

In [1]:
import warnings
warnings.filterwarnings('ignore')

## 1.) Import Necessary Packages and Libraries <a class="anchor" id="1"></a>

In [2]:
#Import necessary libraries
import pandas as pd
import numpy as np
import dash
import dash_core_components as dcc
import dash_html_components as html
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.ensemble import RandomForestClassifier
from dash.dependencies import Output
from dash.dependencies import Input
from dash.dependencies import State

## 2.) Data Preprocessing and Cleaning <a class="anchor" id="2"></a>

In [3]:
# Load the dataset
df = pd.read_csv('LINK_TO_DATASET/survey lung cancer.csv')
# Preprocess the data
for categorical_column in df:
    if categorical_column == 'AGE': # Skip the 'AGE' column as it is continuous numerical attribute
        continue
    df[categorical_column]=LabelEncoder().fit_transform(df[categorical_column])

X = df[["YELLOW_FINGERS", "CHEST PAIN", "PEER_PRESSURE", "ALLERGY ", "WHEEZING", "COUGHING", "SWALLOWING DIFFICULTY"]]
y = df['LUNG_CANCER']

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Resample minority class in both test and train
X_train, y_train = RandomOverSampler().fit_resample(X_train, y_train)
X_test, y_test = RandomOverSampler().fit_resample(X_test, y_test)

## 3.) Model Building <a class="anchor" id="3"></a>

In [4]:
# Train the model
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

RandomForestClassifier()

## 4.) Tooling <a class="anchor" id="4"></a>

In [None]:
# Create the app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1('Lung Cancer Prediction'),
    html.Div([
        html.H4('Yellow Fingers:'),
        dcc.Dropdown(
            id='yellow-fingers',
            options=[
                {'label': 'Yes', 'value': 1},
                {'label': 'No', 'value': 0}
            ],
            value=0
        ),
    ], style={'width': '25%', 'display': 'inline-block'}),
    html.Div([
        html.H4('Chest Pain:'),
        dcc.Dropdown(
            id='chest-pain',
            options=[
                {'label': 'Yes', 'value': 1},
                {'label': 'No', 'value': 0}
            ],
            value=0
        ),
    ], style={'width': '25%', 'display': 'inline-block'}),
    html.Div([
        html.H4('Peer Pressure:'),
        dcc.Dropdown(
            id='peer-pressure',
            options=[
                {'label': 'Yes', 'value': 1},
                {'label': 'No', 'value': 0}
            ],
            value=0
        ),
    ], style={'width': '25%', 'display': 'inline-block'}),
    html.Div([
        html.H4('Allergy:'),
        dcc.Dropdown(
            id='allergy',
            options=[
                {'label': 'Yes', 'value': 1},
                {'label': 'No', 'value': 0}
            ],
            value=0
        ),
    ], style={'width': '25%', 'display': 'inline-block'}),
    html.Div([
        html.H4('Wheezing:'),
        dcc.Dropdown(
            id='wheezing',
            options=[
                {'label': 'Yes', 'value': 1},
                {'label': 'No', 'value': 0}
            ],
            value=0
        ),
    ], style={'width': '25%', 'display': 'inline-block'}),
    html.Div([
        html.H4('Coughing:'),
        dcc.Dropdown(
            id='coughing',
            options=[
                {'label': 'Yes', 'value': 1},
                {'label': 'No', 'value': 0}
            ],
            value=0
        ),
    ], style={'width': '25%', 'display': 'inline-block'}),
    html.Div([
        html.H4('Swallowing Difficulty:'),
        dcc.Dropdown(
            id='swallowing-difficulty',
            options=[
                {'label': 'Yes', 'value': 1},
                {'label': 'No', 'value': 0}
            ],
            value=0
        ),
    ], style={'width': '25%', 'display': 'inline-block'}),
    html.Div([
        html.Button('Predict', id='predict-button', n_clicks=0)
    ], style={'display': 'inline-block'}),
    html.Div([
        html.H2(id='result')
    ])
])

# Define the callback
@app.callback(
    Output('result', 'children'),
    Input('predict-button', 'n_clicks'),
    State('yellow-fingers', 'value'),
    State('chest-pain', 'value'),
    State('peer-pressure', 'value'),
    State('allergy', 'value'),
    State('wheezing', 'value'),
    State('coughing', 'value'),
    State('swallowing-difficulty', 'value')
)
def predict_lung_cancer(n_clicks, yellow_fingers, chest_pain, peer_pressure, allergy, wheezing, coughing, swallowing_difficulty):
    # Make a prediction using the model
    features = np.array([yellow_fingers, chest_pain, peer_pressure, allergy, wheezing, coughing, swallowing_difficulty]).reshape(1, -1)
    probabilities = model.predict_proba(features)[:, 1]
    prediction = 'Yes' if probabilities > 0.5 else 'No'
    return f"The probability of having lung cancer is {probabilities[0]:.2f}. Prediction: {prediction}"

# Run the app
if __name__ == '__main__':
    app.run_server(debug=False)