In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import RobustScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import f1_score, precision_recall_curve, classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from imblearn.combine import SMOTETomek
import tensorflow as tf

# Load the dataset
file_path = '/content/Tongue Data.xlsx'  # File path
df = pd.read_excel(file_path)

# Separate features and target
X = df.drop(columns=['Survived >5 Years'])
y = df['Survived >5 Years']

# Ensure `y` has the correct shape
y = y.values.reshape(-1, 1)

# Identify categorical and numerical columns
categorical_columns = ['Sex(Female)', 'Sex(Male)', 'Diagnostic mapping',
                       'Marital status mapping', 'Race maping',
                       'Radiation Data', 'Toungue Site ', 'Surgery mapping']
numerical_columns = ['Age Range', 'Tumour Size']

# Preprocessing: One-Hot Encoding for categorical columns and robust scaling for numerical columns
preprocessor = ColumnTransformer([
    ('num', RobustScaler(), numerical_columns),
    ('cat', OneHotEncoder(drop='first'), categorical_columns)
])
X_processed = preprocessor.fit_transform(X)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42, stratify=y)

# Address class imbalance using SMOTE and Tomek Links
smote_tomek = SMOTETomek(random_state=42)
X_train_resampled, y_train_resampled = smote_tomek.fit_resample(X_train, y_train)

# Compute class weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train.flatten()), y=y_train.flatten())
class_weights = dict(enumerate(class_weights))

# Define focal loss function
def focal_loss(gamma=1.5, alpha=0.5):
    @tf.function
    def focal_loss_fixed(y_true, y_pred):
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.keras.backend.clip(y_pred, epsilon, 1. - epsilon)
        y_true = tf.keras.backend.cast(y_true, tf.float32)
        alpha_t = y_true * alpha + (1 - y_true) * (1 - alpha)
        p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred)
        focal_loss = -alpha_t * tf.keras.backend.pow((1 - p_t), gamma) * tf.keras.backend.log(p_t)
        return tf.keras.backend.mean(focal_loss)
    return focal_loss_fixed

# Define the neural network model
def build_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(shape=(input_shape,)),

        tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),

        tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),

        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),

        tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
                  loss=focal_loss(),
                  metrics=['accuracy', tf.keras.metrics.AUC(name='auc_pr', curve='PR')])
    return model

# Convert to NumPy arrays for compatibility
X_train_resampled_np = np.array(X_train_resampled)
y_train_resampled_np = np.array(y_train_resampled).flatten()

# Train the model using k-fold cross-validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
history_list = []

for train_index, val_index in kfold.split(X_train_resampled_np, y_train_resampled_np):
    X_fold_train, X_fold_val = X_train_resampled_np[train_index], X_train_resampled_np[val_index]
    y_fold_train, y_fold_val = y_train_resampled_np[train_index], y_train_resampled_np[val_index]
    
    model = build_model(X_train_resampled_np.shape[1])
    history = model.fit(
        X_fold_train, y_fold_train,
        epochs=200,
        batch_size=128,
        validation_data=(X_fold_val, y_fold_val),
        class_weight=class_weights,
        verbose=1
    )
    history_list.append(history)

# Evaluate the model on test data
y_test = y_test.flatten()
evaluation_results = model.evaluate(X_test, y_test, verbose=0)
test_loss, test_accuracy, test_auc_pr = evaluation_results

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")
print(f"Test AUC-PR: {test_auc_pr}")

# Predict probabilities and calculate F1 score
y_pred_probs = model.predict(X_test).flatten()
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_probs)
f1_scores = 2 * (precision * recall) / (precision + recall + 1e-7)
optimal_threshold = thresholds[np.argmax(f1_scores)]

y_pred = (y_pred_probs > optimal_threshold).astype(int)
f1 = f1_score(y_test, y_pred)
print(f"F1 Score: {f1}")

# Display classification report and confusion matrix
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Plot Training Metrics
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history_list[0].history['loss'], label='Train Loss')
plt.plot(history_list[0].history['val_loss'], label='Validation Loss')
plt.title('Training & Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history_list[0].history['accuracy'], label='Train Accuracy')
plt.plot(history_list[0].history['val_accuracy'], label='Validation Accuracy')
plt.title('Training & Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

# Plot AUC-PR
plt.figure(figsize=(12, 6))
plt.plot(history_list[0].history['auc_pr'], label='Train AUC-PR')
plt.plot(history_list[0].history['val_auc_pr'], label='Validation AUC-PR')
plt.title('Training & Validation AUC-PR')
plt.xlabel('Epochs')
plt.ylabel('AUC-PR')
plt.legend()
plt.show()
