# Interactive Dashboard for Lung Cancer Survival Analysis
### DSA 2040A Group Project - Data Mining and Visualization
### Team Members: Calvin, Tanveer, Samantha, Patricia, Susan and Arlen

## Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import joblib

## Load Data and Model

In [None]:
# Load the cleaned data
df = pd.read_csv('../data/transformed.csv')

# Load the trained model
model = joblib.load('../models/survival_predictor.pkl')

print("Data and model loaded successfully!")
print(f"Dataset shape: {df.shape}")

## 1. Survival Rate Analysis by Demographics

In [None]:
# Create survival rate by age group and gender
survival_by_age_gender = df.groupby(['age_group', 'gender'])['survived'].mean().reset_index()

# Create an interactive bar plot
fig = px.bar(survival_by_age_gender,
             x='age_group',
             y='survived',
             color='gender',
             barmode='group',
             title='Survival Rate by Age Group and Gender',
             labels={'survived': 'Survival Rate', 'age_group': 'Age Group'})

fig.show()

## 2. Treatment Effectiveness Analysis

In [None]:
# Calculate survival rates by treatment type and cancer stage
treatment_effectiveness = df.groupby(['treatment_type', 'cancer_stage'])['survived'].agg(['mean', 'count']).reset_index()
treatment_effectiveness.columns = ['treatment_type', 'cancer_stage', 'survival_rate', 'patient_count']

# Create bubble chart
fig = px.scatter(treatment_effectiveness,
                x='cancer_stage',
                y='survival_rate',
                size='patient_count',
                color='treatment_type',
                title='Treatment Effectiveness by Cancer Stage',
                labels={'survival_rate': 'Survival Rate'},
                hover_data=['patient_count'])

fig.show()

## 3. Risk Factor Analysis

In [None]:
# Create a correlation matrix for numerical features
numerical_cols = ['treatment_duration', 'comorbidities_count']
correlation_matrix = df[numerical_cols + ['survived']].corr()

# Create heatmap
fig = px.imshow(correlation_matrix,
                title='Correlation Heatmap of Risk Factors',
                color_continuous_scale='RdBu')

fig.show()

## 4. Survival Prediction Dashboard

In [None]:
from ipywidgets import widgets
from IPython.display import display

# Create input widgets
age_group = widgets.Dropdown(
    options=df['age_group'].unique(),
    description='Age Group:'
)

gender = widgets.Dropdown(
    options=df['gender'].unique(),
    description='Gender:'
)

cancer_stage = widgets.Dropdown(
    options=df['cancer_stage'].unique(),
    description='Stage:'
)

# Display widgets
display(age_group, gender, cancer_stage)

# Add prediction button and output
button = widgets.Button(description='Predict Survival')
output = widgets.Output()

display(button, output)

def on_button_clicked(b):
    with output:
        # Create a sample input based on widget values
        sample_input = pd.DataFrame({
            'age_group': [age_group.value],
            'gender': [gender.value],
            'cancer_stage': [cancer_stage.value]
        })
        
        # Make prediction
        prediction = model.predict_proba(sample_input)[0]
        
        print(f"Survival Probability: {prediction[1]:.2%}")

button.on_click(on_button_clicked)