In [1]:
import os
# Set environment variables
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ["TF_DISABLE_METAL"] = "1"  # Mac-specific

import tensorflow as tf
# Explicitly disable GPU devices
tf.config.set_visible_devices([], 'GPU')

# Verify
print("GPU devices:", tf.config.list_physical_devices('GPU'))
print("Note: Environment variables set for this entire notebook session")

GPU devices: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Note: Environment variables set for this entire notebook session


In [2]:
# Quick test to confirm CPU-only execution
with tf.device('/GPU:0'):
    a = tf.constant([[1.0, 2.0], [3.0, 4.0]])
    b = tf.constant([[5.0, 6.0], [7.0, 8.0]])
    c = tf.matmul(a, b)
    
print("Test matrix multiplication result:")
print(c)
print("Device used:", c.device)  # Should show CPU device

Test matrix multiplication result:
tf.Tensor(
[[19. 22.]
 [43. 50.]], shape=(2, 2), dtype=float32)
Device used: /job:localhost/replica:0/task:0/device:CPU:0


In [21]:
# Load from pkl files
import pandas as pd
import pickle
import pandas as pd
import numpy as np
import seaborn as sns
import time
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt


with open('./Input/8-mers/mic_dframe.pkl', 'rb') as file:
    mic_dframe = pickle.load(file)
with open('./Input/8-mers/suscep_classes.pkl', 'rb') as file:
    suscep_classes = pd.read_pickle(file)
    

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

antibiotics = mic_dframe.columns[-12:]

def prob_to_onehot(y):
    for i in range(0, y.shape[0]):
        greater = 0
        pos = 0
        for j in range(0, y.shape[1]):
            if y[i][j] >= greater:
                greater = y[i][j]
                pos = j
            y[i][j] = 0.0
        y[i][pos] = 1.0
    return y

def best_N_features(target_df, antibiotic, N):
    path = './input/8-mers/counts/'
    genome_ids = target_df['Genome ID'].loc[pd.notnull(target_df[antibiotic])].values
    X = np.array([np.load(path + genome_id + '.npy') for genome_id in genome_ids])
    y = target_df[antibiotic].loc[pd.notnull(target_df[antibiotic])].values
    n_best = SelectKBest(chi2, k=N)
    X_new = n_best.fit_transform(X, y*10**5)
    kmers = np.load('./input/8-mers/kmers_basis.npy')
    selected_kmers = [column[0]  for column in zip(kmers, n_best.get_support()) if column[1]]
    #scores = k_best.fit(X,y).scores_
    best_feature_df = pd.DataFrame(X_new, columns = selected_kmers)
    return best_feature_df

def class_weighting(df, antibiotic, cv):
    # Unique mic values
    mics = df[antibiotic].loc[pd.notnull(df[antibiotic])].unique()
    # Samples per class
    samples = {mic : len(df.loc[df[antibiotic]==mic]) for mic in mics}
    # Sorted classes
    mics = sorted([key for key in samples.keys()])
    # total data
    total = len(df.loc[pd.notnull(df[antibiotic])])
    # class weights
    class_weight = {i: (1 / samples[mic])*(total/len(mics))*(1/cv) for i, mic in enumerate(mics)}
    
    return class_weight

def prepare_training(antibiotic):
    kmer_dframe = best_N_features(target_df=mic_dframe, antibiotic=antibiotic, N=2000)

    # Features
    X = kmer_dframe.values

    # Standardize the input data
    scaler_X = StandardScaler().fit(X)
    X_scaled = scaler_X.transform(X)

    # Target
    # list of MIC values
    y = mic_dframe[antibiotic].loc[pd.notnull(mic_dframe[antibiotic])].values

    # reshape the list of mics
    y_reshape = np.reshape(y,(-1,1))

    # define encoder function
    encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')

    # transform the target categorical data to onehot code
    y_onehot = encoder.fit_transform(y_reshape)

    # Split into the training and test data
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_onehot, test_size=0.1, random_state=0)
    
    return X_train, X_test, y_train, y_test, y_onehot, X



**ciprofloxacin**

In [4]:
antibiotic = 'ciprofloxacin'

model_dir = 'models'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)

if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic)

    # Model building functionu
    def make_model_ciprofloxacin(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(0.0001, decay_steps=10000, decay_rate=0.99, staircase=True)
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.01),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.7))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=150,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )

    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_ciprofloxacin()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=5000,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))
        
    # Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")

    
else:
    print(f"Model already exists at {model_dir}")


No model found at models. Saving new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Time: 51.18 seconds
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Test set accuracy: 0.6736694677871149
Model successfully saved to models


**trimethoprim/sulfamethoxazole**

In [7]:

antibiotic = 'trimethoprim/sulfamethoxazole'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)


if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic)

    # Model building function
    def make_model_trimethoprim_sulfamethoxazole(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(0.0001, decay_steps=10000, decay_rate=0.99, staircase=True)
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.007),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.5))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=150,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )

    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_trimethoprim_sulfamethoxazole()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=2000,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))
    
    # Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")

else:
    print(f"Model already exists at {model_dir}")


Model already exists at models


**ceftriaxone**

In [8]:

antibiotic = 'ceftriaxone'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)

if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic)
    
    # Model building function
    def make_model_ceftriaxone(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(0.0001, decay_steps=10000, decay_rate=0.99, staircase=True)
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.01),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.5))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=100,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )

    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_ceftriaxone()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=2000,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))
    
    # Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")
    
else:
    print(f"Model already exists at {model_dir}")




No model found at models. Saving new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Time: 52.70 seconds
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 995us/step
Test set accuracy: 0.888243831640058
Model successfully saved to models


**gentamicin**

In [9]:

antibiotic = 'gentamicin'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)

if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic)
    # Model building function
    def make_model_gentamicin(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(0.0001, decay_steps=1000, decay_rate=0.99, staircase=True)
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.02),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.7))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=500,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )

    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_gentamicin()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=4000,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))
    
    # Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")
    
else:
    print(f"Model already exists at {model_dir}")


No model found at models. Saving new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Time: 159.93 seconds
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Test set accuracy: 0.6232771822358346
Model successfully saved to models


**ceftiofur**

In [None]:

antibiotic = 'ceftiofur'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)

if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic)

    # Model building function
    def make_model_ceftiofur(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(0.0001, decay_steps=10000, decay_rate=0.99, staircase=True)
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.01),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.4))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=150,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )

    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_ceftiofur()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=1000,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))


    # Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")
    
else:
    print(f"Model already exists at {model_dir}")

No model found at models. Saving new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Time: 32.65 seconds
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Test set accuracy: 0.7145061728395061
Model successfully saved to models


**Ampicillin**


In [11]:

antibiotic = 'ampicillin'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)

if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic)
    
    # Model building function
    def make_model_ampicillin(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(0.0001, decay_steps=10000, decay_rate=0.99, staircase=True)
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.01),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.5))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=100,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )

    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_ampicillin()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=1000,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))
    
    # Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")
    
else:
    print(f"Model already exists at {model_dir}")




No model found at models. Saving new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Time: 46.06 seconds
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Test set accuracy: 0.8205882352941176
Model successfully saved to models


**amoxicillin/clavulanic acid**

In [12]:
antibiotic = 'amoxicillin/clavulanic acid'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)

if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic)
    # Model building function
    def make_model_amoxicillin(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(0.0001, decay_steps=10000, decay_rate=0.99, staircase=True)
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.02),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.9, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.7))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=600,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )


    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_amoxicillin()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=1000,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))
    
    # Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")
    
else:
    print(f"Model already exists at {model_dir}")


No model found at models. Saving new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Time: 52.11 seconds
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Test set accuracy: 0.8695652173913043
Model successfully saved to models


**cefoxitin**

In [13]:
antibiotic = 'cefoxitin'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)

if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic)

    # Model building function
    def make_model_cefoxitin(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(0.0001, decay_steps=100000, decay_rate=0.99, staircase=True)
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.01),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.5))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=1000,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )


    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_cefoxitin()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=2500,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))
    
    # Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")
    
else:
    print(f"Model already exists at {model_dir}")


No model found at models. Saving new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Time: 89.47 seconds
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Test set accuracy: 0.7215384615384616
Model successfully saved to models


**nalidixic acid**

In [14]:
antibiotic = 'nalidixic acid'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)

if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic)

    # Model building function
    def make_model_nalidixic_acid(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = 0.00001
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.01),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.5))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=500,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )

    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_nalidixic_acid()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=7000,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))
    
    # Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")
    
else:
    print(f"Model already exists at {model_dir}")


No model found at models. Saving new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Time: 318.49 seconds
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Test set accuracy: 0.6816816816816816
Model successfully saved to models


**tetracycline**

In [15]:

antibiotic = 'tetracycline'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)

if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic)

    # Model building function
    def make_model_tetracycline(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(0.0001, decay_steps=10000, decay_rate=0.99, staircase=True)
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.02),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.7))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=100,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )

    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_tetracycline()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=1200,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))
    
    # Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")
    
else:
    print(f"Model already exists at {model_dir}")



No model found at models. Saving new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Time: 56.86 seconds
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Test set accuracy: 0.9373088685015291
Model successfully saved to models


**chloramphenicol**

In [16]:
antibiotic = 'chloramphenicol'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)

if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic)


    # Model building function
    def make_model_chloramphenicol(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(0.0001, decay_steps=10000, decay_rate=0.99, staircase=True)
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.01),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.7))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=100,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )

    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_chloramphenicol()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=1000,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))

# Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")
    
else:
    print(f"Model already exists at {model_dir}")


No model found at models. Saving new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Time: 41.20 seconds
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Test set accuracy: 0.7631578947368421
Model successfully saved to models


**sulfisoxazole**

In [17]:
antibiotic = 'sulfisoxazole'

model_filepath = os.path.join(model_dir, 'model_'+ antibiotic[0:5] + '.keras')
    
# Check if the model file exists
model_exists = os.path.exists(model_filepath)

if not model_exists:
    print(f"No model found at {model_dir}. Saving new model...")
    
    X_train, X_test, y_train, y_test, y_onehot, X = prepare_training(antibiotic) 

    # Model building function
    def make_model_sulfisoxazole(loss='categorical_crossentropy', metrics=['categorical_accuracy']):
        
        model = tf.keras.Sequential()
        initializer = tf.keras.initializers.HeNormal()
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(0.0001, decay_steps=10000, decay_rate=0.99, staircase=True)
        optimizer = tf.keras.optimizers.Adam(learning_rate)
        model.add(tf.keras.layers.Dense(64, activation='relu', kernel_initializer=initializer,
                                        kernel_regularizer=tf.keras.regularizers.l2(0.01),
                                        use_bias=False, input_shape=[X.shape[1]]))
        model.add(tf.keras.layers.BatchNormalization(axis=-1, momentum=0.9, epsilon=0.001,
                                                    center=True, scale=True, 
                                                    beta_initializer='zeros', 
                                                    gamma_initializer='ones', 
                                                    moving_mean_initializer='zeros', 
                                                    moving_variance_initializer='ones'))
        model.add(tf.keras.layers.Dropout(0.7))
        model.add(tf.keras.layers.Dense(y_onehot.shape[1], activation='softmax'))
        

        # Add the cross-entropy loss and accuracy metric for threshold probability
        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics,
        )
        
        return model

    # Include an early stopping callback for convenience
    early_stopping = tf.keras.callbacks.EarlyStopping(
        # monitor (loss or val_loss)
        monitor='val_loss',
        # how many epochs to wait before stopping (minimum epochs)
        patience=200,
        # minimium amount of change to count as an improvement
        min_delta=0.001,
        restore_best_weights=True,
    )

    starttime = time.time()

    class_weight = class_weighting(mic_dframe, antibiotic, cv=1)
    # Define the model
    model = make_model_sulfisoxazole()

    history = model.fit(
        X_train, y_train,
        shuffle=True,
        validation_split=0.3,
        #validation_data=(X_valid, y_valid),
        batch_size=512,
        epochs=1000,
        callbacks=[early_stopping],
        class_weight = class_weight,
        verbose=0, # hide the output because we have so many epochs
    )

    print('Training Time: {:0.2f} seconds'.format(time.time() - starttime))

    y_pred = model.predict(X_test)
    y_pred = prob_to_onehot(y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    print("Test set accuracy: " + str(accuracy))


# Create directory if it doesn't exist
    os.makedirs(model_dir, exist_ok=True)
    # Save the model
    model.save(model_filepath)
    print(f"Model successfully saved to {model_dir}")
    
else:
    print(f"Model already exists at {model_dir}")


No model found at models. Saving new model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Time: 27.41 seconds
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Test set accuracy: 0.6117455138662317
Model successfully saved to models


**Teste com novos dados**

In [48]:
mic_dframe = pd.read_csv('/Users/febagnatori/Documents/GitHub/ScientificResearchAtibiotics/Input/dados_teste/mic_modelo.csv')
#mic_dframe.to_excel('./Output/mic_modelo.xlsx')

suscep_classes = pd.read_excel('/Users/febagnatori/Documents/GitHub/ScientificResearchAtibiotics/Input/dados_teste/suscep_classes_new_alt.xlsx')

antibiotics_new = mic_dframe.columns[-18:]
antibiotics_new = antibiotics_new.to_list()
antibiotics_new.pop(2)
antibiotics_new.pop(4)
antibiotics_new.pop(5)
antibiotics_new.pop(9)
antibiotics_new.pop(12)

# print(antibiotics)
# print(antibiotics_new)

commom_antibiotics = []

for antibiotic_aux in antibiotics_new:
    if antibiotic_aux in antibiotics:
        commom_antibiotics.append(antibiotic_aux)

print(commom_antibiotics)

commom_index = []
for i in range(len(antibiotics_new)):
    if antibiotics_new[i] in antibiotics:
        commom_index.append(i)
        
print(commom_index)



['ciprofloxacin', 'gentamicin', 'trimethoprim/sulfamethoxazole']
[1, 4, 12]


In [49]:


def best_N_features_new(target_df, antibiotic, N):
    path = '/Users/febagnatori/Documents/GitHub/ScientificResearchAtibiotics/Input/dados_teste/Kmers 8/counts_k8_npy/counts_k8_npy/'
    genome_ids = target_df['Gene_ID'].loc[pd.notnull(target_df[antibiotic])].values
    
    # Modified approach to load text files
    X = []
    for genome_id in genome_ids:
        try:
            file_path = path + genome_id + '.npy'
            # Load as text file instead of numpy binary
            with open(file_path, 'r') as f:
                data = np.array([float(x) for x in f.read().split()])
            X.append(data)
        except Exception as e:
            print(f"Error loading {genome_id}.npy: {str(e)}")
            
    if not X:  # If no files loaded successfully
        raise ValueError("No files could be loaded successfully")
        
    X = np.array(X)
    y = target_df[antibiotic].loc[pd.notnull(target_df[antibiotic])].values
    
    n_best = SelectKBest(chi2, k=min(N, X.shape[1]))  # Ensure k is not larger than features count
    X_new = n_best.fit_transform(X, y*10**5)
    
    # Load kmers file - check if it's also a text file
    try:
        with open('/Users/febagnatori/Documents/GitHub/ScientificResearchAtibiotics/Input/dados_teste/Kmers 8/combinations_8.txt', 'r') as f:
            kmers = np.array(f.read().split())
    except Exception as e:
        print(f"Error loading kmers file: {str(e)}")
        # Fallback to creating generic column names
        kmers = np.array([f'feature_{i}' for i in range(X.shape[1])])
        
    # Get selected features
    selected_kmers = [column[0] for column in zip(kmers, n_best.get_support()) if column[1]]
    
    # Create dataframe with selected features
    best_feature_df = pd.DataFrame(X_new, columns=selected_kmers)
    return best_feature_df

In [54]:
antibiotic = antibiotics_new[1]

kmer_dframe = best_N_features_new(target_df=mic_dframe, antibiotic=antibiotic, N=2000)

X = kmer_dframe.values

# Standardize the input data
scaler_X = StandardScaler().fit(X)
X_scaled = scaler_X.transform(X)

# Target
# list of MIC values
y = mic_dframe[antibiotic].loc[pd.notnull(mic_dframe[antibiotic])].values

# reshape the list of mics
y_reshape = np.reshape(y,(-1,1))

# define encoder function
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')

# transform the target categorical data to onehot code
y_onehot = encoder.fit_transform(y_reshape)

X_test = X_scaled
y_test = y_onehot

**Ciprofloxacin**

In [None]:
loaded_model = tf.keras.models.load_model('./models/model_cipro.keras')

y_pred = loaded_model.predict(X_test)
y_pred = prob_to_onehot(y_pred)

accuracy = accuracy_score(y_test, y_pred)

print("Test set accuracy: " + str(accuracy))

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


ValueError: inconsistent shapes