In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
from tensorflow.keras import layers, models

# Load embeddings and labels
embeddings_1 = np.load('embeddings_1.npy')
embeddings_2 = np.load('embeddings_2.npy')
labels_1 = open('icd_codes_1.txt').read().splitlines()
labels_2 = open('icd_codes_2.txt').read().splitlines()

# Combine embeddings and labels
embeddings = np.concatenate([embeddings_1, embeddings_2], axis=0)
labels = labels_1 + labels_2

# scaler=StandardScaler()
# embeddings=scaler.fit_transform(embeddings)

# Extract unique ICD10 codes and binarize labels
all_labels = [set(l.split(';')) for l in labels]
mlb = MultiLabelBinarizer()
multi_hot_labels = mlb.fit_transform(all_labels)

# Check number of unique codes (should match ~1400)
assert multi_hot_labels.shape[1] == len(mlb.classes_)

# Split data for training/validation (80-20 split)
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(embeddings, multi_hot_labels, test_size=0.2, random_state=42)

In [None]:
print(X_train.shape)
test_data = np.load('test_data.npy')


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers

# Common input shape and output classes
input_shape = (1024,)
num_classes = len(mlb.classes_)
fitted_models = {}

# Model 17
model17 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='swish', kernel_regularizer=regularizers.l2(1e-4)),
    layers.Dropout(0.3),
    layers.Dense(1024, activation='swish', kernel_regularizer=regularizers.l2(1e-4)),
    layers.Dropout(0.3),
    layers.Dense(512, activation='swish'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model17.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 18
model18 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='selu'),
    layers.AlphaDropout(0.2),
    layers.Dense(1024, activation='selu'),
    layers.AlphaDropout(0.2),
    layers.Dense(512, activation='selu'),
    layers.AlphaDropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model18.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 19
model19 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model19.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 20
model20 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048),
    layers.BatchNormalization(),
    layers.LeakyReLU(alpha=0.1),
    layers.Dropout(0.3),
    layers.Dense(1024),
    layers.BatchNormalization(),
    layers.LeakyReLU(alpha=0.1),
    layers.Dropout(0.3),
    layers.Dense(512),
    layers.BatchNormalization(),
    layers.LeakyReLU(alpha=0.1),
    layers.Dense(num_classes, activation='sigmoid')
])
model20.compile(optimizer='nadam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 21
model21 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model21.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.0001), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 22
model22 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
    layers.Dropout(0.3),
    layers.Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(1e-4)),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])
model22.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 23
model23 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='swish'),
    layers.Dropout(0.2),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='swish'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model23.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 24
model24 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(num_classes, activation='sigmoid')
])
model24.compile(optimizer='nadam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# List of models and their names
models_list = [
    # (model19, 'model19'),
    (model20, 'model20'), (model21, 'model21'), (model22, 'model22'),
    (model23, 'model23'), (model24, 'model24')
]

# Dictionary to store fitted models
fitted_models = {}

# Loop to train and store each fitted model
for model, name in models_list:
    print(f"Training {name}")
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=64)
    fitted_models[name] = model  # Store the fitted model
    # Generate predictions on the test data
    preds = model.predict(test_data)
    # model_name="model16"
    pred_labels = (preds >= 0.5).astype(int)

    # Decode multi-hot predictions back to ICD10 codes
    submission = []
    for pred in pred_labels:
        codes = [mlb.classes_[j] for j, val in enumerate(pred) if val == 1]
        codes.sort()  # Sort lexicographically
        label_string = ';'.join(codes).upper()  # Uppercase and format as required
        submission.append(label_string)

    # Generate sequential IDs (e.g., 1 to number of test samples)
    num_test_samples = len(pred_labels)
    ids = range(1, num_test_samples + 1)

    # Create the submission DataFrame
    submission_df = pd.DataFrame({'id': ids, 'labels': submission})

    # Save the submission file
    submission_filename = f'submission_{name}.csv'
    submission_df.to_csv(submission_filename, index=False)
    print(f"Saved {submission_filename}")
    print(f"Fitted model {name} saved.")

# Access any fitted model using fitted_models['model17'], fitted_models['model18'], etc.


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Common input shape and output classes
input_shape = (1024,)
num_classes = len(mlb.classes_)

# Define 16 models with explicit configurations

# Model 25: lr=0.00005, dropout=0.2, dense_size=2048, optimizer=adam
model25 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model25.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 26: lr=0.00005, dropout=0.2, dense_size=2048, optimizer=nadam
model26 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model26.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.00005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 27: lr=0.00005, dropout=0.2, dense_size=1024, optimizer=adam
model27 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model27.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 28: lr=0.00005, dropout=0.2, dense_size=1024, optimizer=nadam
model28 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model28.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.00005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 29: lr=0.00005, dropout=0.3, dense_size=2048, optimizer=adam
model29 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model29.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 30: lr=0.00005, dropout=0.3, dense_size=2048, optimizer=nadam
model30 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model30.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.00005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 31: lr=0.00005, dropout=0.3, dense_size=1024, optimizer=adam
model31 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model31.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 32: lr=0.00005, dropout=0.3, dense_size=1024, optimizer=nadam
model32 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model32.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.00005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 33: lr=0.0001, dropout=0.2, dense_size=2048, optimizer=adam
model33 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model33.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 34: lr=0.0001, dropout=0.2, dense_size=2048, optimizer=nadam
model34 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model34.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.0001), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 35: lr=0.0001, dropout=0.2, dense_size=1024, optimizer=adam
model35 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model35.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 36: lr=0.0001, dropout=0.2, dense_size=1024, optimizer=nadam
model36 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model36.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.0001), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 37: lr=0.0001, dropout=0.3, dense_size=2048, optimizer=adam
model37 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model37.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 38: lr=0.0001, dropout=0.3, dense_size=2048, optimizer=nadam
model38 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model38.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.0001), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 39: lr=0.0001, dropout=0.3, dense_size=1024, optimizer=adam
model39 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model39.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 40: lr=0.0001, dropout=0.3, dense_size=1024, optimizer=nadam
model40 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model40.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.0001), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])


# List of models and their names
models_list = [
    # (model25, 'model25'), (model26, 'model26'), (model27, 'model27'),
    # (model28, 'model28'), (model29, 'model29'), (model30, 'model30'),
    # (model31, 'model31'), (model32, 'model32'), (model33, 'model33'),
    # (model34, 'model34'),
    (model35, 'model35'), (model36, 'model36'),
    (model37, 'model37'), (model38, 'model38'), (model39, 'model39'),
    (model40, 'model40')
]
fitted_models = {}

# Loop to train and store each fitted model
for model, name in models_list:
    print(f"Training {name}")
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=64)
    fitted_models[name] = model  # Store the fitted model
    # Generate predictions on the test data
    preds = model.predict(test_data)
    # model_name="model16"
    pred_labels = (preds >= 0.5).astype(int)

    # Decode multi-hot predictions back to ICD10 codes
    submission = []
    for pred in pred_labels:
        codes = [mlb.classes_[j] for j, val in enumerate(pred) if val == 1]
        codes.sort()  # Sort lexicographically
        label_string = ';'.join(codes).upper()  # Uppercase and format as required
        submission.append(label_string)

    # Generate sequential IDs (e.g., 1 to number of test samples)
    num_test_samples = len(pred_labels)
    ids = range(1, num_test_samples + 1)

    # Create the submission DataFrame
    submission_df = pd.DataFrame({'id': ids, 'labels': submission})

    # Save the submission file
    submission_filename = f'submission_{name}.csv'
    submission_df.to_csv(submission_filename, index=False)
    print(f"Saved {submission_filename}")
    print(f"Fitted model {name} saved.")



In [None]:
model = fitted_models['model37']
preds = model.predict(test_data)
name="model37"
# model_name="model16"
pred_labels = (preds >= 0.6).astype(int)

# Decode multi-hot predictions back to ICD10 codes
submission = []
for pred in pred_labels:
    codes = [mlb.classes_[j] for j, val in enumerate(pred) if val == 1]
    codes.sort()  # Sort lexicographically
    label_string = ';'.join(codes).upper()  # Uppercase and format as required
    submission.append(label_string)

# Generate sequential IDs (e.g., 1 to number of test samples)
num_test_samples = len(pred_labels)
ids = range(1, num_test_samples + 1)

# Create the submission DataFrame
submission_df = pd.DataFrame({'id': ids, 'labels': submission})

# Save the submission file
submission_filename = f'submission2_{name}.csv'
submission_df.to_csv(submission_filename, index=False)
print(f"Saved {submission_filename}")
print(f"Fitted model {name} saved.")

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers

# Define a common input shape
input_shape = (1024,)
num_classes = len(mlb.classes_)

# Model 1: Baseline Dense Network with Dropout and L2 Regularization
model1 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.BatchNormalization(),
    layers.Dropout(0.2),
    layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 2: Deep Feedforward Network with Skip Connections
input_layer = layers.Input(shape=input_shape)
x = layers.Dense(512, activation='relu')(input_layer)
x = layers.Dropout(0.3)(x)
x = layers.Dense(256, activation='relu')(x)
skip = layers.Concatenate()([input_layer, x])
x = layers.Dense(128, activation='relu')(skip)
x = layers.Dropout(0.3)(x)
output_layer = layers.Dense(num_classes, activation='sigmoid')(x)
model2 = models.Model(inputs=input_layer, outputs=output_layer)
model2.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 3: Wide and Deep Network
input_layer = layers.Input(shape=input_shape)
wide = layers.Dense(256, activation='relu')(input_layer)
deep = layers.Dense(512, activation='relu')(input_layer)
deep = layers.Dropout(0.3)(deep)
deep = layers.Dense(256, activation='relu')(deep)
combined = layers.Concatenate()([wide, deep])
x = layers.Dense(128, activation='relu')(combined)
output_layer = layers.Dense(num_classes, activation='sigmoid')(x)
model3 = models.Model(inputs=input_layer, outputs=output_layer)
model3.compile(optimizer='adam', loss='focal_loss', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 4: LSTM-based Network
model4 = models.Sequential([
    layers.Reshape((32, 32), input_shape=input_shape),
    layers.LSTM(128, return_sequences=True),
    layers.LSTM(64),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model4.compile(optimizer='nadam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 5: Transformer-based Model
input_layer = layers.Input(shape=input_shape)
x = layers.Reshape((32, 32))(input_layer)
transformer_block = layers.MultiHeadAttention(num_heads=4, key_dim=32)(x, x)
x = layers.GlobalAveragePooling1D()(transformer_block)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.3)(x)
output_layer = layers.Dense(num_classes, activation='sigmoid')(x)
model5 = models.Model(inputs=input_layer, outputs=output_layer)
model5.compile(optimizer='adamw', loss='binary_focal_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 6: 1D CNN-based Network
model6 = models.Sequential([
    layers.Reshape((32, 32), input_shape=input_shape),
    layers.Conv1D(64, kernel_size=3, activation='relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(128, kernel_size=3, activation='relu'),
    layers.GlobalMaxPooling1D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model6.compile(optimizer='sgd', loss='hinge', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 7: Shallow Network with RMSProp Optimizer
model7 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])
model7.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=[tf.keras.metrics.PrecisionAtRecall(0.8, name="PrecisionAtRecall")])

# Model 8: Deep Neural Network with Learning Rate Decay
model8 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    # layers.Dense(128, activation='relu'),
    # layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model8.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, decay=1e-6), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# layers were initially 3 relu, metric doesn't matter, learning rate, decay, preprocess labels

# Model 9: Ensemble (Voting Classifier using Averaged Predictions)
def ensemble_predict(X):
    preds1 = model1.predict(X)
    preds2 = model2.predict(X)
    preds3 = model3.predict(X)
    preds4 = model4.predict(X)
    preds5 = model5.predict(X)
    preds6 = model6.predict(X)
    preds7 = model7.predict(X)
    preds8 = model8.predict(X)
    return (preds1 + preds2 + preds3 + preds4 + preds5 + preds6 + preds7 + preds8) / 8

# List of models
models_list = [model1, model2, model3, model4, model5, model6, model7, model8]
model_names = ["model1", "model2", "model3", "model4", "model5", "model6", "model7", "model8"]


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers

# Common input shape and output classes
input_shape = (1024,)
num_classes = len(mlb.classes_)

# Model 9: Deeper Network with Increased Dropout
model9 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])
model9.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# # Model 10: Residual Connections for Better Gradient Flow
# input_layer = layers.Input(shape=input_shape)
# x = layers.Dense(1024, activation='relu')(input_layer)
# x = layers.Dropout(0.3)(x)
# residual = layers.Dense(512, activation='relu')(x)
# x = layers.Add()([x, residual])
# x = layers.Dense(256, activation='relu')(x)
# x = layers.Dropout(0.3)(x)
# output_layer = layers.Dense(num_classes, activation='sigmoid')(x)
# model10 = models.Model(inputs=input_layer, outputs=output_layer)
# model10.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 10: Residual Connections with Shape Matching
input_layer = layers.Input(shape=input_shape)
x = layers.Dense(1024, activation='relu')(input_layer)
x = layers.Dropout(0.3)(x)

# Residual branch
residual = layers.Dense(1024, activation='relu')(x)  # Match shape to 1024

# Add residual connection
x = layers.Add()([x, residual])
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.3)(x)

# Output layer
output_layer = layers.Dense(num_classes, activation='sigmoid')(x)
model10 = models.Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model10.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])


# Model 11: Using LeakyReLU for Better Handling of Negative Activations
model11 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024),
    layers.LeakyReLU(alpha=0.1),
    layers.Dropout(0.3),
    layers.Dense(512),
    layers.LeakyReLU(alpha=0.1),
    layers.Dropout(0.3),
    layers.Dense(256),
    layers.LeakyReLU(alpha=0.1),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model11.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 12: Learning Rate Scheduler with Adam Optimizer
model12 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.0001,
    decay_steps=1000,
    decay_rate=0.9
)
model12.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 13: Weight Regularization (L1 and L2 Regularization)
model13 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu', kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4)),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4)),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4)),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model13.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 14: Swish Activation Function for Smoother Gradient Flow
model14 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='swish'),
    layers.Dropout(0.3),
    layers.Dense(512, activation='swish'),
    layers.Dropout(0.3),
    layers.Dense(256, activation='swish'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model14.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 15: Using Nadam Optimizer with Batch Normalization
model15 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(1024, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='sigmoid')
])
model15.compile(optimizer='nadam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# Model 16: 
model16 = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(2048, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='sigmoid')
])
model16.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00005), loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC(name="AUC", multi_label=True)])

# List of new models
new_models = [model9, model10, model11, model12, model13, model14, model15, model16]

# # Training loop for new models
# for i, model in enumerate(new_models, 9):
#     print(f"Training Model {i}")
#     history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=64)


In [None]:
# Load test data
test_data = np.load('test_data.npy')

# Generate predictions
threshold = 0.5  # Adjust threshold if needed based on validation performance


In [None]:
# List of models
models_list = [model8]
new_models = [model9, model10, model11, model12, model13, model14, model15, model16]
model_names = ["model9", "model10", "model11", "model12", "model13", "model14", "model15", "model16"]
# Training loop and submission generation
for i, (model, model_name) in enumerate(zip(models_list, model_names), 1):
    print(f"Training {model_name}")
    
    # Train the model
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=64)

    # Generate predictions on the test data
    preds = model.predict(test_data)
    pred_labels = (preds >= 0.3).astype(int)

In [None]:
# List of models
models_list = [model16]
model_names = ["model16"]
# models_list = [model13, model14, model15, model16]
# model_names = ["model13", "model14", "model15", "model16"]
# Training loop and submission generation
# for i, (model, model_name) in enumerate(zip(models_list, model_names), 1):
#     print(f"Training {model_name}")

model=model16    
# Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=64)

# Generate predictions on the test data
preds = model.predict(test_data)
# model_name="model16"
pred_labels = (preds >= 0.4).astype(int)

# 0.45 --- 0.438
# 0.475 --- 0.440
# 0.49 --- 0.441
# 0.495 --- 0.442
#  0.5 --- 0.442
#  0.55 --- 0.442
#  0.57 --- 0.442
# 0.6 --- 0.441

# Decode multi-hot predictions back to ICD10 codes
submission = []
for pred in pred_labels:
    codes = [mlb.classes_[j] for j, val in enumerate(pred) if val == 1]
    codes.sort()  # Sort lexicographically
    label_string = ';'.join(codes).upper()  # Uppercase and format as required
    submission.append(label_string)

# Generate sequential IDs (e.g., 1 to number of test samples)
num_test_samples = len(pred_labels)
ids = range(1, num_test_samples + 1)

# Create the submission DataFrame
submission_df = pd.DataFrame({'id': ids, 'labels': submission})

# Save the submission file
submission_filename = f'submission2_{model_name}.csv'
submission_df.to_csv(submission_filename, index=False)
print(f"Saved {submission_filename}")

# Ensemble prediction example
# ensemble_predictions = ensemble_predict(test_data)


In [None]:
model_name="model16"
# Generate predictions on the test data
preds = model.predict(test_data)
# model_name="model16"
pred_labels = (preds >= 0.44).astype(int)

# 0.45 --- 0.438
# 0.475 --- 0.440
# 0.49 --- 0.441
# 0.495 --- 0.442
#  0.5 --- 0.442
#  0.55 --- 0.442
#  0.57 --- 0.442
# 0.6 --- 0.441

# for new model16, 0.45 - 0.47 yielded 0.460

# Decode multi-hot predictions back to ICD10 codes
submission = []
for pred in pred_labels:
    codes = [mlb.classes_[j] for j, val in enumerate(pred) if val == 1]
    codes.sort()  # Sort lexicographically
    label_string = ';'.join(codes).upper()  # Uppercase and format as required
    submission.append(label_string)

# Generate sequential IDs (e.g., 1 to number of test samples)
num_test_samples = len(pred_labels)
ids = range(1, num_test_samples + 1)

# Create the submission DataFrame
submission_df = pd.DataFrame({'id': ids, 'labels': submission})

# Save the submission file
submission_filename = f'submission2_{model_name}.csv'
submission_df.to_csv(submission_filename, index=False)
print(f"Saved {submission_filename}")

In [None]:
def ensemble_predict(X):
    # Get predictions from all models
    preds1 = model1.predict(X)
    preds2 = model2.predict(X)
    preds3 = model3.predict(X)
    preds4 = model4.predict(X)
    preds5 = model5.predict(X)
    preds6 = model6.predict(X)
    preds7 = model7.predict(X)
    preds8 = model8.predict(X)

    # Initialize an empty list to store final predictions
    final_predictions = []

    # Iterate through each sample's predictions and take the union of labels
    for i in range(len(preds1)):
        # Collect predictions for the current sample across all models
        sample_preds = set(preds1[i]) | set(preds2[i]) | set(preds3[i]) | set(preds4[i]) | \
                       set(preds5[i]) | set(preds6[i]) | set(preds7[i]) | set(preds8[i])

        # Convert the set back to a list and add to final predictions
        final_predictions.append(list(sample_preds))

    return final_predictions


In [None]:
# Generate predictions on the test data
preds = model.predict(test_data)
pred_labels = (preds >= threshold).astype(int)

# Decode multi-hot predictions back to ICD10 codes
submission = []
for pred in pred_labels:
    codes = [mlb.classes_[j] for j, val in enumerate(pred) if val == 1]
    codes.sort()  # Sort lexicographically
    label_string = ';'.join(codes).upper()  # Uppercase and format as required
    submission.append(label_string)

# Generate sequential IDs (e.g., 1 to number of test samples)
num_test_samples = len(pred_labels)
ids = range(1, num_test_samples + 1)

# Create the submission DataFrame
submission_df = pd.DataFrame({'id': ids, 'labels': submission})

# Save the submission file
submission_filename = 'submission_ensemble1.csv'
submission_df.to_csv(submission_filename, index=False)
print(f"Saved {submission_filename}")

In [None]:
import pandas as pd

# List of submission files
submission_files = [
    'ensemble_submission3.csv',
    'submission2_model8.csv'
]

# Read all submissions into a list of DataFrames
submissions = [pd.read_csv(file) for file in submission_files]

# Initialize an empty DataFrame for the ensemble predictions
ensemble_df = pd.DataFrame()
ensemble_df['id'] = submissions[0]['id']
ensemble_predictions = []

# Iterate through each row by index
for i in range(len(submissions[0])):
    # Initialize a set to store the union of predictions for this row
    combined_predictions = set()

    # Iterate through each model's prediction for the current sample
    for submission in submissions:
        # Get the predicted labels for the current sample, handling NaN values
        labels_str = str(submission.loc[i, 'labels']).strip()
        if labels_str:  # Check if it's not an empty string
            labels = labels_str.split(';')
            # Add labels to the combined set
            combined_predictions.update(labels)

    # Convert the set back to a sorted list and join using semicolons
    ensemble_prediction = ';'.join(sorted(combined_predictions))
    ensemble_predictions.append(ensemble_prediction)

# Store the ensemble predictions in the DataFrame
ensemble_df['labels'] = ensemble_predictions

# Save the ensemble predictions to a CSV file
ensemble_df.to_csv('ensemble_submission4.csv', index=False)


In [None]:
import pandas as pd
from collections import Counter

# List of submission files
# submission_files = [
#     'submission_model1.csv',
#     'submission_model2.csv',
#     'submission_model8.csv',
#     'submission_model4.csv',
#     'submission_model5.csv',
#     'submission_model6.csv',
#     'submission_model7.csv',
#     'submission_model8.csv'
# ]

submission_files = [
    'submission1_model8.csv',
    'submission2_model8.csv'
]

# Read all submissions into a list of DataFrames
submissions = [pd.read_csv(file) for file in submission_files]

# Initialize an empty DataFrame for the ensemble predictions
ensemble_df = pd.DataFrame()
ensemble_df['id'] = submissions[0]['id']
ensemble_predictions = []

# Iterate through each row by index
for i in range(len(submissions[0])):
    # Initialize a Counter to track the frequency of each label
    label_counter = Counter()

    # Iterate through each model's prediction for the current sample
    for submission in submissions:
        # Get the predicted labels for the current sample, handling NaN values
        labels_str = str(submission.loc[i, 'labels']).strip()
        if labels_str:  # Check if it's not an empty string
            labels = labels_str.split(';')
            # Update the counter with the labels from this prediction
            label_counter.update(labels)

    # Select only labels that appear more than twice (count > 2)
    selected_labels = [label for label, count in label_counter.items() if count > 2]

    # Convert the list back to a sorted string of selected labels
    ensemble_prediction = ';'.join(sorted(selected_labels))
    ensemble_predictions.append(ensemble_prediction)

# Store the ensemble predictions in the DataFrame
ensemble_df['labels'] = ensemble_predictions

# Save the ensemble predictions to a CSV file
ensemble_df.to_csv('ensemble_submission3.csv', index=False)
