In [None]:
import pandas as pd
import numpy as np
import pickle
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

df = pd.read_csv('lung_cancer_data.csv')
df = df.dropna()

comorbidity_cols = [
    'Comorbidity_Diabetes',
    'Comorbidity_Heart_Disease',
    'Comorbidity_Chronic_Lung_Disease'
]
for col in comorbidity_cols:
    df[col] = df[col].map({'Yes': 1, 'No': 0, 1: 1, 0: 0})

label_encoders = {}
categorical_cols = ['Smoking_History', 'Stage']
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

df['Comorbidity_Score'] = df[comorbidity_cols].sum(axis=1)

def simulate_treatment(row):
    if row['Stage'] in [0, 1] and row['Comorbidity_Score'] <= 1:
        return 'Surgery'
    elif row['Stage'] == 2 and row['Comorbidity_Score'] <= 2:
        return 'Radiation Therapy'
    elif row['Stage'] == 3 or row['Comorbidity_Score'] > 2:
        return 'Chemotherapy'
    else:
        return None

df['Simulated_Treatment'] = df.apply(simulate_treatment, axis=1)
df = df[df['Simulated_Treatment'].notnull()]

le_treatment = LabelEncoder()
df['Simulated_Treatment'] = le_treatment.fit_transform(df['Simulated_Treatment'])

selected_features = [
    'Tumor_Size_mm',
    'Smoking_History',
    'Stage',
    'Blood_Pressure_Systolic',
    'White_Blood_Cell_Count',
    'LDH_Level',
    'Comorbidity_Score'
]

X = df[selected_features]
y = df['Simulated_Treatment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=3,
    eval_metric='mlogloss',
    n_estimators=300,
    max_depth=6,
    learning_rate=0.1,
    use_label_encoder=False,
    random_state=42
)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\n✅ Accuracy: {accuracy * 100:.2f}%")
print("\n📋 Classification Report:")
print(classification_report(y_test, y_pred, target_names=le_treatment.classes_))

with open('model.pkl', 'wb') as f:
    pickle.dump({
        'model': model,
        'label_encoders': label_encoders,
        'le_treatment': le_treatment,
        'selected_features': selected_features
    }, f)

print("\n✅ Model, encoders, and features saved to model.pkl")


Parameters: { "use_label_encoder" } are not used.




✅ Accuracy: 100.00%

📋 Classification Report:
                   precision    recall  f1-score   support

     Chemotherapy       1.00      1.00      1.00      1631
Radiation Therapy       1.00      1.00      1.00      1052
          Surgery       1.00      1.00      1.00      1171

         accuracy                           1.00      3854
        macro avg       1.00      1.00      1.00      3854
     weighted avg       1.00      1.00      1.00      3854


✅ Model, encoders, and features saved to model.pkl
