<a href="https://colab.research.google.com/github/Officialwax/Officialwax/blob/main/Base_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import files
import pandas as pd
uploaded = files.upload()
for filename in uploaded.keys():
    df = pd.read_csv(filename)

df.head(2)

Saving eye_cancer_patients.csv to eye_cancer_patients.csv


Unnamed: 0,Patient_ID,Age,Gender,Cancer_Type,Laterality,Date_of_Diagnosis,Stage_at_Diagnosis,Treatment_Type,Surgery_Status,Radiation_Therapy,Chemotherapy,Outcome_Status,Survival_Time_Months,Genetic_Markers,Family_History,Country
0,PID00001,58,F,Retinoblastoma,Left,2019-01-25,Stage IV,Radiation,False,15,3,Deceased,85,,True,UK
1,PID00002,15,Other,Retinoblastoma,Right,2021-10-21,Stage III,Chemotherapy,True,69,6,In Remission,10,,True,Japan


In [None]:
# IMPORTS
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from catboost import CatBoostClassifier
from imblearn.combine import SMOTEENN


In [None]:
# Drop non-feature columns
df_proc = df.drop(columns=[
    'Patient_ID', 'Date_of_Diagnosis', 'Surgery_Status',
    'Radiation_Therapy', 'Chemotherapy', 'Country'
])

# Encode categorical features
df_proc['Gender'] = df_proc['Gender'].map({'M':0, 'F':1, 'Other':2})
df_proc['Cancer_Type'] = df_proc['Cancer_Type'].astype('category').cat.codes
df_proc['Laterality'] = df_proc['Laterality'].map({'Left':0, 'Right':1, 'Bilateral':2})
df_proc['Stage_at_Diagnosis'] = df_proc['Stage_at_Diagnosis'].map({
    'Stage I':1, 'Stage II':2, 'Stage III':3, 'Stage IV':4
})
df_proc['Genetic_Markers'] = (
    df_proc['Genetic_Markers']
    .fillna('None')
    .map({'None':0, 'BRAF Mutation':1})
)
df_proc['Family_History'] = df_proc['Family_History'].astype(int)

# Feature engineering
df_proc['Stage_Genetic'] = df_proc['Stage_at_Diagnosis'] * df_proc['Genetic_Markers']
df_proc['Type_Stage'] = df_proc['Cancer_Type'] * df_proc['Stage_at_Diagnosis']
df_proc['Family_Genetic'] = df_proc['Family_History'] * df_proc['Genetic_Markers']

# Encode target
le = LabelEncoder()
df_proc['Treatment_Label'] = le.fit_transform(df_proc['Treatment_Type'])

# Define X and y  âœ…
X = df_proc.drop(columns=[
    'Treatment_Type',
    'Treatment_Label',
    'Survival_Time_Months',
    'Outcome_Status'
])
y = df_proc['Treatment_Label'].values

class_names = le.classes_


In [None]:
rf = RandomForestClassifier(
    n_estimators=1800,
    max_depth=None,
    min_samples_split=4,
    min_samples_leaf=2,
    max_features="sqrt",
    class_weight="balanced_subsample",
    n_jobs=-1,
    random_state=42
)

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

rf_preds = np.zeros(len(y))
rf_true = np.zeros(len(y))

for train_idx, val_idx in kf.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    X_res, y_res = SMOTEENN(random_state=42).fit_resample(X_train, y_train)
    rf.fit(X_res, y_res)

    rf_preds[val_idx] = rf.predict(X_val)
    rf_true[val_idx] = y_val

print("\n=== Random Forest ===")
print("Accuracy:", accuracy_score(rf_true, rf_preds))
print(classification_report(rf_true, rf_preds, target_names=class_names))



=== Random Forest ===
Accuracy: 0.3388
              precision    recall  f1-score   support

Chemotherapy       0.34      0.36      0.35      1665
   Radiation       0.34      0.39      0.36      1656
     Surgery       0.34      0.26      0.29      1679

    accuracy                           0.34      5000
   macro avg       0.34      0.34      0.34      5000
weighted avg       0.34      0.34      0.34      5000



In [None]:
cat = CatBoostClassifier(
    iterations=1500,
    learning_rate=0.03,
    depth=7,
    l2_leaf_reg=5,
    random_state=42,
    verbose=0
)

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

cat_preds = np.zeros(len(y), dtype=int)
cat_true = np.zeros(len(y), dtype=int)

for train_idx, val_idx in kf.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    X_res, y_res = SMOTEENN(random_state=42).fit_resample(X_train, y_train)

    cat.fit(X_res, y_res)

    preds = cat.predict(X_val).ravel()   # ðŸ”‘ FIX
    cat_preds[val_idx] = preds
    cat_true[val_idx] = y_val

print("\n=== CatBoost Standalone Results ===")
print("Accuracy:", accuracy_score(cat_true, cat_preds))
print(classification_report(cat_true, cat_preds, target_names=class_names))



=== CatBoost Standalone Results ===
Accuracy: 0.3372
              precision    recall  f1-score   support

Chemotherapy       0.34      0.37      0.35      1665
   Radiation       0.34      0.41      0.37      1656
     Surgery       0.33      0.24      0.28      1679

    accuracy                           0.34      5000
   macro avg       0.34      0.34      0.33      5000
weighted avg       0.34      0.34      0.33      5000



In [None]:
mlp = MLPClassifier(
    hidden_layer_sizes=(256,128),
    activation="relu",
    solver="adam",
    max_iter=800,
    random_state=42
)

mlp_preds = np.zeros(len(y))
mlp_true = np.zeros(len(y))

for train_idx, val_idx in kf.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    X_res, y_res = SMOTEENN(random_state=42).fit_resample(X_train, y_train)
    mlp.fit(X_res, y_res)

    mlp_preds[val_idx] = mlp.predict(X_val)
    mlp_true[val_idx] = y_val

print("\n=== MLP ===")
print("Accuracy:", accuracy_score(mlp_true, mlp_preds))
print(classification_report(mlp_true, mlp_preds, target_names=class_names))



=== MLP ===
Accuracy: 0.3404
              precision    recall  f1-score   support

Chemotherapy       0.34      0.42      0.37      1665
   Radiation       0.35      0.30      0.32      1656
     Surgery       0.34      0.30      0.32      1679

    accuracy                           0.34      5000
   macro avg       0.34      0.34      0.34      5000
weighted avg       0.34      0.34      0.34      5000

