<a href="https://colab.research.google.com/github/BrunoRobertoGoncalves/EyeCancerSurvivalEstimator/blob/main/eye_cancer_survival_estimator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Training Process


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import shap


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import files

sns.set(style="whitegrid")

uploaded = files.upload()

df = pd.read_csv('eye_cancer_patients.csv')
df.head()


In [None]:
df.info()
df.isnull().sum()
df.describe(include='all')


In [None]:
# Copiar o DataFrame original
df_model = df.copy()

# Remover ID
df_model.drop(columns=['Patient_ID'], inplace=True)

# Converter data
df_model['Date_of_Diagnosis'] = pd.to_datetime(df_model['Date_of_Diagnosis'])

# Criar nova coluna com "anos desde diagnóstico"
df_model['Years_Since_Diagnosis'] = (pd.Timestamp('today') - df_model['Date_of_Diagnosis']).dt.days / 365.25

# Substituir valores nulos em Genetic_Markers por "Unknown"
df_model['Genetic_Markers'] = df_model['Genetic_Markers'].fillna('Unknown')

# Encoding para categóricas simples
cat_cols = ['Gender', 'Cancer_Type', 'Laterality', 'Stage_at_Diagnosis', 'Treatment_Type',
            'Genetic_Markers', 'Country', 'Outcome_Status']

df_model = pd.get_dummies(df_model, columns=cat_cols, drop_first=True)

# Garantir que booleanos estão como inteiros
df_model['Surgery_Status'] = df_model['Surgery_Status'].astype(int)
df_model['Radiation_Therapy'] = df_model['Radiation_Therapy'].astype(int)
df_model['Family_History'] = df_model['Family_History'].astype(int)

# Remover coluna de data original
df_model.drop(columns=['Date_of_Diagnosis'], inplace=True)

# Visualizar colunas finais
df_model.head(5)


In [None]:
y = df_model['Survival_Time_Months']
X = df_model.drop(columns=['Survival_Time_Months'])

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

xgb_model = XGBRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=4,
    random_state=42
)

# Treinar
xgb_model.fit(X_train, y_train)

y_pred = xgb_model.predict(X_test)

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.2f}")


In [None]:
obj_cols = X.select_dtypes(include='object').columns

for col in obj_cols:
    X[col] = X[col].astype('category').cat.codes

print("Tipos restantes:", X.dtypes.value_counts())

bool_cols = X.select_dtypes(include='bool').columns

for col in bool_cols:
    X[col] = X[col].astype(int)

print("Tipos finais:", X.dtypes.value_counts())


In [None]:
explainer = shap.Explainer(xgb_model, X)

shap_values = explainer(X_test)

shap.plots.bar(shap_values)

shap.plots.beeswarm(shap_values)


## 🧠 Eye Cancer Survival Estimator

This interactive tool allows you to simulate a patient with ocular cancer and estimate their expected survival time (in months) using a machine learning model trained on clinical data.  

You can enter values such as age, treatment history, and time since diagnosis. The system will generate a personalized survival prediction and explain the key factors influencing the outcome using SHAP values.

⚠️ **Note**: This model is predictive, not causal. It reflects patterns in historical data, not medical advice.


In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

idade = widgets.IntText(description='Idade:', value=50)
quimio = widgets.IntText(description='Ciclos de Quimio:', value=20)
anos_diagnostico = widgets.FloatText(description='Anos desde Dx:', value=2.5)

cirurgia = widgets.Checkbox(description='Fez Cirurgia?', value=True)
radiacao = widgets.Checkbox(description='Fez Radioterapia?', value=False)
historico = widgets.Checkbox(description='Hist. Familiar?', value=True)

botao = widgets.Button(description='Prever Sobrevida', button_style='success')
saida = widgets.Output()

def ao_clicar(b):
    with saida:
        clear_output()

        nova_entrada = X.iloc[0:1].copy()
        nova_entrada[:] = 0  # zera tudo

        nova_entrada['Age'] = idade.value
        nova_entrada['Chemotherapy'] = quimio.value
        nova_entrada['Surgery_Status'] = int(cirurgia.value)
        nova_entrada['Radiation_Therapy'] = int(radiacao.value)
        nova_entrada['Family_History'] = int(historico.value)
        nova_entrada['Years_Since_Diagnosis'] = anos_diagnostico.value

        pred = xgb_model.predict(nova_entrada)[0]
        print(f"Previsão de Sobrevida: {pred:.1f} meses")

        shap_values_single = explainer(nova_entrada)
        shap.plots.waterfall(shap_values_single[0])


botao.on_click(ao_clicar)

ui = widgets.VBox([
    idade, quimio, cirurgia, radiacao, historico, anos_diagnostico, botao, saida
])

display(ui)


In [None]:
!jupyter nbconvert --ClearMetadataPreprocessor.enabled=True \
                   --ClearOutputPreprocessor.enabled=True \
                   --to notebook --inplace eye_cancer_survival_estimator.ipynb
