In [44]:
import streamlit as st
import pandas as pd
import numpy as np
from pages import data_input, diagnosis, treatment, survival, dashboard
# Configure the page
st.set_page_config(
    page_title="Lung Cancer ML Platform",
    page_icon="ü´Å",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Initialize session state
if 'patient_data' not in st.session_state:
    st.session_state.patient_data = {}
if 'diagnosis_results' not in st.session_state:
    st.session_state.diagnosis_results = None
if 'treatment_results' not in st.session_state:
    st.session_state.treatment_results = None
if 'survival_results' not in st.session_state:
    st.session_state.survival_results = None

def main():
    # Sidebar navigation
    st.sidebar.title("ü´Å Lung Cancer ML Platform")
    st.sidebar.markdown("---")
    
    page = st.sidebar.selectbox(
        "Navigate to:",
        ["Data Input", "Diagnosis", "Treatment Recommendation", "Survival Prediction", "Dashboard"]
    )
    
    # Display current patient data status
    if st.session_state.patient_data:
        st.sidebar.success("‚úÖ Patient data loaded")
        data_types = list(st.session_state.patient_data.keys())
        st.sidebar.write("Available data types:")
        for dtype in data_types:
            st.sidebar.write(f"‚Ä¢ {dtype}")
    else:
        st.sidebar.warning("‚ö†Ô∏è No patient data loaded")
    
    st.sidebar.markdown("---")
    
    # Clear all data button
    if st.sidebar.button("üóëÔ∏è Clear All Data", type="secondary"):
        st.session_state.patient_data = {}
        st.session_state.diagnosis_results = None
        st.session_state.treatment_results = None
        st.session_state.survival_results = None
        st.sidebar.success("All data cleared!")
        st.rerun()
    
    # Route to appropriate page
    if page == "Data Input":
        data_input.show()
    elif page == "Diagnosis":
        diagnosis.show()
    elif page == "Treatment Recommendation":
        treatment.show()
    elif page == "Survival Prediction":
        survival.show()
    elif page == "Dashboard":
        dashboard.show()

if __name__ == "__main__":
    main()





In [43]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
import joblib
import warnings
warnings.filterwarnings('ignore')

class LungCancerDiagnosisModel:
    def __init__(self):
        self.models = {
            'random_forest': RandomForestClassifier(n_estimators=100, random_state=42),
            'gradient_boosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
            'logistic_regression': LogisticRegression(random_state=42, max_iter=1000)
        }
        self.scaler = StandardScaler()
        self.feature_importance = {}
        self.confidence_scores = {}
        
    def prepare_features(self, patient_data):
        """Prepare features from patient data for diagnosis"""
        features = []
        feature_names = []
        
        # ctDNA fragmentation features
        if 'ctdna_fragmentation' in patient_data:
            ctdna_data = patient_data['ctdna_fragmentation']
            features.extend([
                ctdna_data.get('fragment_size_ratio', 0),
                ctdna_data.get('short_fragment_percentage', 0),
                ctdna_data.get('nucleosome_positioning_score', 0),
                ctdna_data.get('fragment_entropy', 0)
            ])
            feature_names.extend(['ctdna_fragment_ratio', 'ctdna_short_frag', 'ctdna_nucleosome', 'ctdna_entropy'])
        
        # DNA methylation features
        if 'dna_methylation' in patient_data:
            meth_data = patient_data['dna_methylation']
            features.extend([
                meth_data.get('global_methylation_level', 0),
                meth_data.get('cpg_island_methylation', 0),
                meth_data.get('promoter_methylation_score', 0),
                meth_data.get('tumor_suppressor_methylation', 0)
            ])
            feature_names.extend(['global_meth', 'cpg_meth', 'promoter_meth', 'tumor_supp_meth'])
        
        # CNV features
        if 'cnv' in patient_data:
            cnv_data = patient_data['cnv']
            features.extend([
                cnv_data.get('amplification_score', 0),
                cnv_data.get('deletion_score', 0),
                cnv_data.get('oncogene_amplification', 0),
                cnv_data.get('tumor_suppressor_deletion', 0)
            ])
            feature_names.extend(['cnv_amp', 'cnv_del', 'oncogene_amp', 'tumor_supp_del'])
                # Somatic mutation features
        if 'somatic_mutations' in patient_data:
            mut_data = patient_data['somatic_mutations']
            features.extend([
                mut_data.get('total_mutation_burden', 0),
                mut_data.get('driver_mutations', 0),
                mut_data.get('tp53_mutation_status', 0),
                mut_data.get('egfr_mutation_status', 0),
                mut_data.get('kras_mutation_status', 0)
            ])
            feature_names.extend(['mut_burden', 'driver_muts', 'tp53_status', 'egfr_status', 'kras_status'])
        
        # CT imaging features
        if 'ct_imaging' in patient_data:
            ct_data = patient_data['ct_imaging']
            features.extend([
                ct_data.get('nodule_size', 0),
                ct_data.get('nodule_density', 0),
                ct_data.get('spiculation_score', 0),
                ct_data.get('lymph_node_involvement', 0)
            ])
            feature_names.extend(['nodule_size', 'nodule_density', 'spiculation', 'lymph_nodes'])
        
        # Histological imaging features
        if 'histological_imaging' in patient_data:
            hist_data = patient_data['histological_imaging']
            features.extend([
                hist_data.get('cellular_density', 0),
                hist_data.get('nuclear_atypia_score', 0),
                hist_data.get('mitotic_count', 0),
                hist_data.get('necrosis_percentage', 0)
            ])
            feature_names.extend(['cell_density', 'nuclear_atypia', 'mitotic_count', 'necrosis'])
        
        # RNA features
        if 'rna' in patient_data:
            rna_data = patient_data['rna']
            features.extend([
                rna_data.get('oncogene_expression', 0),
                rna_data.get('tumor_suppressor_expression', 0),
                rna_data.get('immune_signature_score', 0),
                rna_data.get('metabolic_signature_score', 0)
            ])
            feature_names.extend(['oncogene_expr', 'tumor_supp_expr', 'immune_score', 'metabolic_score'])
        return np.array(features).reshape(1, -1), feature_names  #numpy
    
    def predict_diagnosis(self, patient_data):
        """Predict lung cancer diagnosis with confidence scores"""
        try:
            features, feature_names = self.prepare_features(patient_data)
            
            if features.shape[1] == 0:
                return {
                    'error': 'No valid features found in patient data',
                    'predictions': {},
                    'confidence_scores': {},
                    'feature_importance': {}
                }
            
            # Simulate trained models with realistic predictions
            predictions = {}
            confidence_scores = {}
            feature_importance = {}
            
            # Generate synthetic but realistic predictions
            np.random.seed(42)  # For reproducible results  #numpy
            
            for model_name, model in self.models.items():
                # Simulate model training with synthetic data
                X_train = np.random.randn(1000, features.shape[1])  #numpy
                y_train = np.random.choice([0, 1], size=1000, p=[0.7, 0.3])  # 30% cancer rate
                
                if model_name == 'logistic_regression':
                    # Scale features for logistic regression
                    X_train_scaled = self.scaler.fit_transform(X_train)
                    features_scaled = self.scaler.transform(features)
                    model.fit(X_train_scaled, y_train)
                    
                    prob = model.predict_proba(features_scaled)[0]
                    pred = model.predict(features_scaled)[0]
                else:
                    model.fit(X_train, y_train)
                    prob = model.predict_proba(features)[0]
                    pred = model.predict(features)[0]
                
                predictions[model_name] = {
                    'prediction': int(pred),
                    'probability_benign': float(prob[0]),
                    'probability_malignant': float(prob[1])
                }
                confidence_scores[model_name] = float(max(prob))
                
                # Feature importance (for tree-based models)
                if hasattr(model, 'feature_importances_'):
                    importance_dict = {}
                    for i, importance in enumerate(model.feature_importances_):
                        if i < len(feature_names):
                            importance_dict[feature_names[i]] = float(importance)
                    feature_importance[model_name] = importance_dict
            
            # Ensemble prediction
            ensemble_prob_malignant = np.mean([pred['probability_malignant'] for pred in predictions.values()])
            ensemble_prediction = 1 if ensemble_prob_malignant > 0.5 else 0
            
            predictions['ensemble'] = {
                'prediction': ensemble_prediction,
                'probability_benign': 1 - ensemble_prob_malignant,
                'probability_malignant': ensemble_prob_malignant
            }
            confidence_scores['ensemble'] = max(ensemble_prob_malignant, 1 - ensemble_prob_malignant)
            
            return {
                'predictions': predictions,
                'confidence_scores': confidence_scores,
                'feature_importance': feature_importance,
                'feature_names': feature_names,
                'interpretation': self._generate_interpretation(predictions, feature_importance)
            }
            
        except Exception as e:
            return {
                'error': f'Error in diagnosis prediction: {str(e)}',
                'predictions': {},
                'confidence_scores': {},
                'feature_importance': {}
            }
    
    def _generate_interpretation(self, predictions, feature_importance):
        """Generate human-readable interpretation of the diagnosis"""
        ensemble_pred = predictions.get('ensemble', {})
        prob_malignant = ensemble_pred.get('probability_malignant', 0)
        
        if prob_malignant > 0.8:
            risk_level = "High"
            interpretation = "Strong indicators of malignancy detected. Immediate clinical evaluation recommended."
        elif prob_malignant > 0.6:
            risk_level = "Moderate-High"
            interpretation = "Moderate to high probability of malignancy. Further diagnostic workup advised."
        elif prob_malignant > 0.4:
            risk_level = "Moderate"
            interpretation = "Inconclusive results. Additional testing may be needed for definitive diagnosis."
        elif prob_malignant > 0.2:
            risk_level = "Low-Moderate"
            interpretation = "Low to moderate probability of malignancy. Monitoring may be appropriate."
        else:
            risk_level = "Low"
            interpretation = "Low probability of malignancy. Routine follow-up recommended."
        
        return {
            'risk_level': risk_level,
            'interpretation': interpretation,
            'recommendation': self._get_clinical_recommendation(risk_level)
        }
    
    def _get_clinical_recommendation(self, risk_level):
        """Get clinical recommendations based on risk level"""
        recommendations = {
            "High": "Urgent referral to oncology. Consider tissue biopsy and staging workup.",
            "Moderate-High": "Referral to pulmonologist. Consider CT-guided biopsy.",
            "Moderate": "Close monitoring with repeat imaging in 3-6 months. Consider multidisciplinary review.",
            "Low-Moderate": "Follow-up imaging in 6-12 months. Patient counseling on symptoms to watch.",
            "Low": "Routine annual screening. Lifestyle counseling for risk reduction."
        }
        return recommendations.get(risk_level, "Consult with healthcare provider for appropriate follow-up.")
    print(_get_clinical_recommendation(None, "High"))





Urgent referral to oncology. Consider tissue biopsy and staging workup.


In [26]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
import warnings
warnings.filterwarnings('ignore')

class TreatmentRecommendationModel:
    def __init__(self):
        self.treatment_options = [
            'Surgery', 'Chemotherapy', 'Radiation Therapy', 'Immunotherapy', 
            'Targeted Therapy', 'Combination Therapy'
        ]
        self.model = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, random_state=42))
        
    def prepare_treatment_features(self, patient_data, diagnosis_results):
        """Prepare features for treatment recommendation"""
        features = []
        
        # Diagnosis probability
        if diagnosis_results and 'predictions' in diagnosis_results:
            ensemble_pred = diagnosis_results['predictions'].get('ensemble', {})
            features.append(ensemble_pred.get('probability_malignant', 0))
        else:
            features.append(0)
        
        # Patient demographics (if available)
        demographics = patient_data.get('demographics', {})
        features.extend([
            demographics.get('age', 65) / 100,  # Normalized age
            demographics.get('smoking_history', 0),  # 0: never, 1: former, 2: current
            demographics.get('performance_status', 1)  # ECOG performance status
        ])
        
        # Molecular markers for targeted therapy
        molecular_features = []
        
        # EGFR status
        somatic_data = patient_data.get('somatic_mutations', {})
        molecular_features.append(somatic_data.get('egfr_mutation_status', 0))
        
        # ALK rearrangement
        molecular_features.append(somatic_data.get('alk_rearrangement', 0))
        
        # PD-L1 expression for immunotherapy
        rna_data = patient_data.get('rna', {})
        molecular_features.append(rna_data.get('pdl1_expression', 0))
        
        # Tumor mutational burden
        molecular_features.append(somatic_data.get('total_mutation_burden', 0) / 100)  # Normalized
        
        features.extend(molecular_features)
        
        # Staging information
        ct_data = patient_data.get('ct_imaging', {})
        features.extend([
            ct_data.get('tumor_size', 0) / 10,  # Normalized tumor size
            ct_data.get('lymph_node_involvement', 0),
            ct_data.get('metastasis_present', 0)
        ])
        
        # Histological subtype
        hist_data = patient_data.get('histological_imaging', {})
        features.append(hist_data.get('adenocarcinoma', 0))  # 1 if adenocarcinoma, 0 otherwise
        
        return np.array(features).reshape(1, -1)
    
    def recommend_treatment(self, patient_data, diagnosis_results):
        "Recommend treatment options based on patient data and diagnosis"
        try:
            features = self.prepare_treatment_features(patient_data, diagnosis_results)
            
            # Check if patient has malignant diagnosis
            if diagnosis_results and 'predictions' in diagnosis_results:
                ensemble_pred = diagnosis_results['predictions'].get('ensemble', {})
                malignancy_prob = ensemble_pred.get('probability_malignant', 0)
                
                if malignancy_prob < 0.5:
                    return {
                        'recommendations': [],
                        'reasoning': 'No treatment recommended - low probability of malignancy',
                        'treatment_scores': {},
                        'contraindications': []
                    }
            
            # Simulate treatment recommendation model
            np.random.seed(42)
            
            # Generate treatment scores based on patient features
            treatment_scores = {}
            recommendations = []
            reasoning = []
            contraindications = []
            
            # Extract key features for decision making
            age = patient_data.get('demographics', {}).get('age', 65)
            performance_status = patient_data.get('demographics', {}).get('performance_status', 1)
            egfr_status = patient_data.get('somatic_mutations', {}).get('egfr_mutation_status', 0)
            pdl1_expression = patient_data.get('rna', {}).get('pdl1_expression', 0)
            tumor_burden = patient_data.get('somatic_mutations', {}).get('total_mutation_burden', 0)
            tumor_size = patient_data.get('ct_imaging', {}).get('tumor_size', 0)
            lymph_nodes = patient_data.get('ct_imaging', {}).get('lymph_node_involvement', 0)
            metastasis = patient_data.get('ct_imaging', {}).get('metastasis_present', 0)
            
            # Surgery recommendation
            surgery_score = 0.8
            if age > 75:
                surgery_score -= 0.2
                contraindications.append("Advanced age may increase surgical risk")
            if performance_status > 2:
                surgery_score -= 0.3
                contraindications.append("Poor performance status may preclude surgery")
            if metastasis:
                surgery_score = 0.1
                contraindications.append("Metastatic disease - surgery not typically indicated")
            if tumor_size < 3 and not lymph_nodes and not metastasis:
                surgery_score = 0.9
                reasoning.append("Early stage disease - excellent surgical candidate")
            
            treatment_scores['Surgery'] = max(0, surgery_score)
            
            # Chemotherapy recommendation
            chemo_score = 0.7
            if age > 70:
                chemo_score -= 0.1
            if performance_status > 2:
                chemo_score -= 0.4
                contraindications.append("Poor performance status may limit chemotherapy tolerance")
            if lymph_nodes or metastasis:
                chemo_score += 0.2
                reasoning.append("Advanced disease - systemic therapy indicated")
            treatment_scores['Chemotherapy'] = max(0, chemo_score)
            
            # Radiation therapy recommendation
            radiation_score = 0.6
            if tumor_size > 5:
                radiation_score += 0.2
                reasoning.append("Large tumor size - radiation therapy may provide local control")
            if not metastasis:
                radiation_score += 0.1
            if age > 80:
                radiation_score += 0.1  # Better tolerated than surgery in elderly
                reasoning.append("Elderly patient - radiation well tolerated")
            
            treatment_scores['Radiation Therapy'] = radiation_score
            
            # Immunotherapy recommendation
            immuno_score = 0.5
            if pdl1_expression > 0.5:
                immuno_score += 0.4
                reasoning.append("High PD-L1 expression - good immunotherapy candidate")
            if tumor_burden > 10:
                immuno_score += 0.2
                reasoning.append("High tumor mutational burden - may respond to immunotherapy")
            if age > 75:
                immuno_score += 0.1  # Generally well tolerated in elderly
            
            treatment_scores['Immunotherapy'] = immuno_score
            
            # Targeted therapy recommendation
            targeted_score = 0.3
            if egfr_status:
                targeted_score = 0.9
                reasoning.append("EGFR mutation detected - excellent targeted therapy candidate")
            alk_status = patient_data.get('somatic_mutations', {}).get('alk_rearrangement', 0)
            if alk_status:
                targeted_score = 0.9
                reasoning.append("ALK rearrangement detected - targeted therapy indicated")
            
            treatment_scores['Targeted Therapy'] = targeted_score
            
            # Combination therapy recommendation
            combo_score = 0.4
            if metastasis:
                combo_score += 0.3
                reasoning.append("Metastatic disease - combination therapy may improve outcomes")
            if performance_status <= 1:
                combo_score += 0.2
                reasoning.append("Good performance status - can tolerate combination therapy")
            if age > 75:
                combo_score -= 0.2
                contraindications.append("Advanced age - combination therapy may be poorly tolerated")
            
            treatment_scores['Combination Therapy'] = max(0, combo_score)
            
            # Select top recommendations (score > 0.6)
            for treatment, score in treatment_scores.items():
                if score > 0.6:
                    recommendations.append({
                        'treatment': treatment,
                        'score': score,
                        'confidence': min(score * 1.2, 1.0)
                    })
            
            # Sort by score
            recommendations.sort(key=lambda x: x['score'], reverse=True)
            
            return {
                'recommendations': recommendations,
                'treatment_scores': treatment_scores,
                'reasoning': reasoning,
                'contraindications': contraindications,
                'staging_info': {
                    'tumor_size': tumor_size,
                    'lymph_node_involvement': bool(lymph_nodes),
                    'metastasis_present': bool(metastasis),
                    'estimated_stage': self._estimate_stage(tumor_size, lymph_nodes, metastasis)
                }
            }
            
        except Exception as e:
            return {
                'error': f'Error in treatment recommendation: {str(e)}',
                'recommendations': [],
                'treatment_scores': {},
                'reasoning': [],
                'contraindications': []
            }
    
    def _estimate_stage(self, tumor_size, lymph_nodes, metastasis):
        """Estimate cancer stage based on TNM criteria"""
        if metastasis:
            return "Stage IV"
        elif lymph_nodes:
            if tumor_size > 5:
                return "Stage IIIB"
            else:
                return "Stage IIIA"
        elif tumor_size > 7:
            return "Stage IIB"
        elif tumor_size > 3:
            return "Stage IIA"
        else:
            return "Stage I"
    
    def compare_treatments(self, recommendations):
        """Compare recommended treatments with pros and cons"""
        treatment_comparison = {}
        
        treatment_details = {
            'Surgery': {
                'pros': ['Curative potential for early stage', 'Definitive local control', 'Staging information'],
                'cons': ['Surgical risks', 'Recovery time', 'Not suitable for advanced disease'],
                'typical_duration': '1 day (surgery) + 4-6 weeks recovery'
            },
            'Chemotherapy': {
                'pros': ['Systemic treatment', 'Effective for advanced disease', 'Can shrink tumors'],
                'cons': ['Side effects', 'Immunosuppression', 'Multiple cycles required'],
                'typical_duration': '3-6 months (multiple cycles)'
            },
            'Radiation Therapy': {
                'pros': ['Local control', 'Non-invasive', 'Good for elderly patients'],
                'cons': ['Limited to local disease', 'Potential lung damage', 'Multiple sessions'],
                'typical_duration': '6-7 weeks (daily treatments)'
            },
            'Immunotherapy': {
                'pros': ['Immune system activation', 'Durable responses', 'Generally well tolerated'],
                'cons': ['Immune-related side effects', 'Not effective for all patients', 'Expensive'],
                'typical_duration': 'Ongoing (until progression)'
            },
            'Targeted Therapy': {
                'pros': ['Specific molecular targeting', 'Oral administration', 'Fewer side effects'],
                'cons': ['Requires specific mutations', 'Resistance development', 'Limited patient population'],
                'typical_duration': 'Ongoing (until resistance)'
            },
            'Combination Therapy': {
                'pros': ['Enhanced efficacy', 'Multiple mechanisms', 'Better outcomes in advanced disease'],
                'cons': ['Increased toxicity', 'Complex management', 'Higher cost'],
                'typical_duration': 'Variable (depends on combination)'
            }
        }
        
        for rec in recommendations:
            treatment = rec['treatment']
            if treatment in treatment_details:
                treatment_comparison[treatment] = {
                    **treatment_details[treatment],
                    'recommendation_score': rec['score'],
                    'confidence': rec['confidence']
                }
        
        return treatment_comparison

