In [2]:
! pip install keras-tuner



# **Loading the data**

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


In [4]:
# Load the cleaned dataset
joined_df = pd.read_csv('joined_df.csv', index_col='respondent_id')
print("joined_df.shape", joined_df.shape)
joined_df.head()


joined_df.shape (26707, 35)


Unnamed: 0_level_0,h1n1_concern,h1n1_knowledge,behavioral_antiviral_meds,behavioral_avoidance,behavioral_face_mask,behavioral_wash_hands,behavioral_large_gatherings,behavioral_outside_home,behavioral_touch_face,doctor_recc_h1n1,...,income_poverty,marital_status,rent_or_own,employment_status,hhs_geo_region,census_msa,household_adults,household_children,h1n1_vaccine,seasonal_vaccine
respondent_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,Below Poverty,Not Married,Own,Not in Labor Force,oxchjgsf,Non-MSA,0.0,0.0,0,0
1,3.0,2.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,...,Below Poverty,Not Married,Rent,Employed,bhuqouqj,"MSA, Not Principle City",0.0,0.0,0,1
2,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,"<= $75,000, Above Poverty",Not Married,Own,Employed,qufhixun,"MSA, Not Principle City",2.0,0.0,0,0
3,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,...,Below Poverty,Not Married,Rent,Not in Labor Force,lrircsnp,"MSA, Principle City",0.0,0.0,0,1
4,2.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,...,"<= $75,000, Above Poverty",Married,Own,Employed,qufhixun,"MSA, Not Principle City",1.0,0.0,0,0


## **Preprocessing Categorical Data**

In [5]:
# Separate numeric and categorical columns
numeric_cols = joined_df.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = joined_df.select_dtypes(include=['object']).columns

In [6]:
# Display value counts for each categorical column
for col in categorical_cols:
    print(f"\nValue counts in column '{col}':")
    print(joined_df[col].value_counts())



Value counts in column 'age_group':
age_group
65+ Years        6843
55 - 64 Years    5563
45 - 54 Years    5238
18 - 34 Years    5215
35 - 44 Years    3848
Name: count, dtype: int64

Value counts in column 'education':
education
College Graduate    10097
Some College         7043
12 Years             5797
< 12 Years           2363
missing              1407
Name: count, dtype: int64

Value counts in column 'race':
race
White                21222
Black                 2118
Hispanic              1755
Other or Multiple     1612
Name: count, dtype: int64

Value counts in column 'sex':
sex
Female    15858
Male      10849
Name: count, dtype: int64

Value counts in column 'income_poverty':
income_poverty
<= $75,000, Above Poverty    12777
> $75,000                     6810
missing                       4423
Below Poverty                 2697
Name: count, dtype: int64

Value counts in column 'marital_status':
marital_status
Married        13555
Not Married    11744
missing         1408
Name: c

In [7]:
# One-hot encode categorical columns
new_joined_df = pd.get_dummies(joined_df, columns=categorical_cols, drop_first=True)


In [8]:
new_joined_df.head()

Unnamed: 0_level_0,h1n1_concern,h1n1_knowledge,behavioral_antiviral_meds,behavioral_avoidance,behavioral_face_mask,behavioral_wash_hands,behavioral_large_gatherings,behavioral_outside_home,behavioral_touch_face,doctor_recc_h1n1,...,hhs_geo_region_dqpwygqj,hhs_geo_region_fpwskwrf,hhs_geo_region_kbazzjca,hhs_geo_region_lrircsnp,hhs_geo_region_lzgpxyit,hhs_geo_region_mlyzmhmf,hhs_geo_region_oxchjgsf,hhs_geo_region_qufhixun,"census_msa_MSA, Principle City",census_msa_Non-MSA
respondent_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,False,False,False,False,False,False,True,False,False,True
1,3.0,2.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,...,False,False,False,False,False,False,False,False,False,False
2,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,False,False,False,False,False,False,False,True,False,False
3,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,...,False,False,False,True,False,False,False,False,True,False
4,2.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,...,False,False,False,False,False,False,False,True,False,False


In [9]:
new_joined_df.describe()

Unnamed: 0,h1n1_concern,h1n1_knowledge,behavioral_antiviral_meds,behavioral_avoidance,behavioral_face_mask,behavioral_wash_hands,behavioral_large_gatherings,behavioral_outside_home,behavioral_touch_face,doctor_recc_h1n1,...,opinion_h1n1_vacc_effective,opinion_h1n1_risk,opinion_h1n1_sick_from_vacc,opinion_seas_vacc_effective,opinion_seas_risk,opinion_seas_sick_from_vacc,household_adults,household_children,h1n1_vaccine,seasonal_vaccine
count,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,...,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0
mean,1.618486,1.262532,0.048714,0.727749,0.068933,0.825888,0.35864,0.337315,0.677264,0.202494,...,3.850623,2.342566,2.35767,4.025536,2.719162,2.118112,0.886499,0.529599,0.212454,0.465608
std,0.908741,0.616805,0.215273,0.445127,0.253345,0.379213,0.478828,0.472076,0.46641,0.401866,...,1.000034,1.276167,1.35265,1.077131,1.371662,1.31948,0.749901,0.925264,0.409052,0.498825
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
25%,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,3.0,1.0,1.0,4.0,2.0,1.0,0.0,0.0,0.0,0.0
50%,2.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,...,4.0,2.0,2.0,4.0,2.0,2.0,1.0,0.0,0.0,0.0
75%,2.0,2.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,...,5.0,4.0,4.0,5.0,4.0,2.118112,1.0,1.0,0.0,1.0
max,3.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,5.0,5.0,5.0,5.0,5.0,5.0,3.0,3.0,1.0,1.0


In [10]:
data_type = new_joined_df.dtypes
print(data_type)

h1n1_concern                            float64
h1n1_knowledge                          float64
behavioral_antiviral_meds               float64
behavioral_avoidance                    float64
behavioral_face_mask                    float64
behavioral_wash_hands                   float64
behavioral_large_gatherings             float64
behavioral_outside_home                 float64
behavioral_touch_face                   float64
doctor_recc_h1n1                        float64
doctor_recc_seasonal                    float64
chronic_med_condition                   float64
child_under_6_months                    float64
health_worker                           float64
health_insurance                        float64
opinion_h1n1_vacc_effective             float64
opinion_h1n1_risk                       float64
opinion_h1n1_sick_from_vacc             float64
opinion_seas_vacc_effective             float64
opinion_seas_risk                       float64
opinion_seas_sick_from_vacc             

In [11]:
def identify_numerical_columns(df):
    """
    Identifies numerical columns in a DataFrame.

    Parameters:
    df (pd.DataFrame): The input DataFrame.

    Returns:
    list: A list of numerical column names.
    """
    # Select columns with numeric data types
    numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
    return numeric_cols

# Example usage:
numerical_columns = identify_numerical_columns(new_joined_df)
print("Numerical Columns:", numerical_columns)


Numerical Columns: ['h1n1_concern', 'h1n1_knowledge', 'behavioral_antiviral_meds', 'behavioral_avoidance', 'behavioral_face_mask', 'behavioral_wash_hands', 'behavioral_large_gatherings', 'behavioral_outside_home', 'behavioral_touch_face', 'doctor_recc_h1n1', 'doctor_recc_seasonal', 'chronic_med_condition', 'child_under_6_months', 'health_worker', 'health_insurance', 'opinion_h1n1_vacc_effective', 'opinion_h1n1_risk', 'opinion_h1n1_sick_from_vacc', 'opinion_seas_vacc_effective', 'opinion_seas_risk', 'opinion_seas_sick_from_vacc', 'household_adults', 'household_children', 'h1n1_vaccine', 'seasonal_vaccine']


In [12]:
from sklearn.preprocessing import MinMaxScaler

# Identify numerical columns
numeric_cols = ['h1n1_concern', 'h1n1_knowledge', 'behavioral_antiviral_meds',
                'behavioral_avoidance', 'behavioral_face_mask', 'behavioral_wash_hands',
                'behavioral_large_gatherings', 'behavioral_outside_home', 'behavioral_touch_face',
                'doctor_recc_h1n1', 'doctor_recc_seasonal', 'chronic_med_condition',
                'child_under_6_months', 'health_worker', 'health_insurance',
                'opinion_h1n1_vacc_effective', 'opinion_h1n1_risk',
                'opinion_h1n1_sick_from_vacc', 'opinion_seas_vacc_effective',
                'opinion_seas_risk', 'opinion_seas_sick_from_vacc', 'household_adults',
                'household_children']

In [13]:
# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Scale only numeric columns
new_joined_df[numeric_cols] = scaler.fit_transform(new_joined_df[numeric_cols])


In [14]:
new_joined_df.describe()

Unnamed: 0,h1n1_concern,h1n1_knowledge,behavioral_antiviral_meds,behavioral_avoidance,behavioral_face_mask,behavioral_wash_hands,behavioral_large_gatherings,behavioral_outside_home,behavioral_touch_face,doctor_recc_h1n1,...,opinion_h1n1_vacc_effective,opinion_h1n1_risk,opinion_h1n1_sick_from_vacc,opinion_seas_vacc_effective,opinion_seas_risk,opinion_seas_sick_from_vacc,household_adults,household_children,h1n1_vaccine,seasonal_vaccine
count,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,...,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0,26707.0
mean,0.539495,0.631266,0.048714,0.727749,0.068933,0.825888,0.35864,0.337315,0.677264,0.202494,...,0.712656,0.335642,0.339417,0.756384,0.42979,0.279528,0.2955,0.176533,0.212454,0.465608
std,0.302914,0.308403,0.215273,0.445127,0.253345,0.379213,0.478828,0.472076,0.46641,0.401866,...,0.250008,0.319042,0.338163,0.269283,0.342915,0.32987,0.249967,0.308421,0.409052,0.498825
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.333333,0.5,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.5,0.0,0.0,0.75,0.25,0.0,0.0,0.0,0.0,0.0
50%,0.666667,0.5,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.75,0.25,0.25,0.75,0.25,0.25,0.333333,0.0,0.0,0.0
75%,0.666667,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,...,1.0,0.75,0.75,1.0,0.75,0.279528,0.333333,0.333333,0.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Training Data



In [15]:
# Separate features (X) and targets (y)
X = new_joined_df.drop(['h1n1_vaccine', 'seasonal_vaccine'], axis=1)
y_h1n1 = new_joined_df['h1n1_vaccine']  # H1N1 vaccine
y_seasonal = new_joined_df['seasonal_vaccine']  # Seasonal vaccine


In [16]:
from sklearn.model_selection import train_test_split

# Split for H1N1 vaccine prediction
X_train, X_test, y_h1n1_train, y_h1n1_test = train_test_split(X, y_h1n1, test_size=0.2, random_state=42)

# Split for Seasonal vaccine prediction
X_train, X_test, y_seasonal_train, y_seasonal_test = train_test_split(X, y_seasonal, test_size=0.2, random_state=42)

print("H1N1 Training Data:", X_train.shape, y_h1n1_train.shape)
print("Seasonal Training Data:", X_train.shape, y_seasonal_train.shape)

H1N1 Training Data: (21365, 56) (21365,)
Seasonal Training Data: (21365, 56) (21365,)


# **Build the Neural Network Model**


In [46]:
import tensorflow as tf
import sklearn as skl
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [47]:
import keras_tuner as kt

In [48]:
# Create the Keras Sequential model
model = tf.keras.models.Sequential()

## Single Prediction Model for H1N1

In [68]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','softmax'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=30,
        step=5), activation=activation, input_dim=56))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 10)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=30,
            step=5),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])

    return nn_model

In [69]:
tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

Reloading Tuner from ./untitled_project/tuner0.json


In [72]:
tuner.search(
    X_train, y_h1n1_train,
    epochs=20,
    validation_data=(X_test, y_h1n1_test)
)

Trial 56 Complete [00h 00m 53s]
val_accuracy: 0.8390116095542908

Best val_accuracy So Far: 0.8401347994804382
Total elapsed time: 00h 33m 58s


In [74]:
# Get top model hyperparameters and print the values
top_hyper = tuner.get_best_hyperparameters(1)
for param in top_hyper:
    print(param.values)

{'activation': 'relu', 'first_units': 21, 'num_layers': 2, 'units_0': 6, 'units_1': 21, 'units_2': 26, 'units_3': 21, 'units_4': 16, 'units_5': 26, 'units_6': 26, 'units_7': 11, 'units_8': 6, 'units_9': 21, 'tuner/epochs': 20, 'tuner/initial_epoch': 0, 'tuner/bracket': 0, 'tuner/round': 0}


In [80]:
best_H1N1_model = tuner.get_best_models(num_models=1)[0]
best_H1N1_model

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


<Sequential name=sequential, built=True>

In [81]:
best_H1N1_model.summary()

In [82]:
loss, accuracy, auc = best_H1N1_model.evaluate(X_test, y_h1n1_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}, Test AUC: {auc}')

[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8475 - auc: 0.8433 - loss: 0.3687
Test Loss: 0.38107654452323914, Test Accuracy: 0.8401347994804382, Test AUC: 0.8313204050064087


## Single Prediction Model for Seasonal

In [49]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','softmax'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=30,
        step=5), activation=activation, input_dim=56))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 10)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=30,
            step=5),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])

    return nn_model

In [51]:
tuner = kt.RandomSearch(
    create_model,
    objective="val_accuracy",
    max_trials=25,                   # Specify the number of trials explicitly
    executions_per_trial=1,          # Number of times each trial is executed
    directory="my_tuner_logs",
    project_name="random_search_tuning"
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [53]:
tuner.search(
    X_train, y_seasonal_train,
    epochs=10,
    validation_data=(X_test, y_seasonal_test)
)

Trial 25 Complete [00h 00m 32s]
val_accuracy: 0.5411830544471741

Best val_accuracy So Far: 0.7839760184288025
Total elapsed time: 00h 14m 27s


In [54]:
# Get top model hyperparameters and print the values
top_hyper2 = tuner.get_best_hyperparameters(1)
for param in top_hyper2:
    print(param.values)

{'activation': 'softmax', 'first_units': 16, 'num_layers': 3, 'units_0': 16, 'units_1': 11, 'units_2': 26, 'units_3': 21, 'units_4': 11, 'units_5': 11, 'units_6': 1, 'units_7': 26, 'units_8': 11, 'units_9': 1}


In [55]:
best_seasonal_model = tuner.get_best_models(num_models=1)[0]
best_seasonal_model

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


<Sequential name=sequential, built=True>

In [56]:
best_seasonal_model.summary()

In [57]:
loss, accuracy, auc = best_seasonal_model.evaluate(X_test, y_seasonal_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}, Test AUC: {auc}')

[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7920 - auc: 0.8560 - loss: 0.4753
Test Loss: 0.4833662509918213, Test Accuracy: 0.7839760184288025, Test AUC: 0.8508289456367493


## Multilabel Prediction Model- Unfinished

In [20]:
import tensorflow as tf

def create_multi_output_model(hp):
    # Input layer
    inputs = tf.keras.Input(shape=(X_train.shape[1],))  # Use the number of features in your data

    # First layer
    x = tf.keras.layers.Dense(
        units=hp.Int('first_units', min_value=10, max_value=50, step=10),
        activation=hp.Choice('activation', ['relu', 'tanh'])
    )(inputs)

    # Add hidden layers dynamically
    for i in range(hp.Int('num_layers', 1, 5)):  # Dynamically determine the number of hidden layers
        x = tf.keras.layers.Dense(
            units=hp.Int(f'units_{i}', min_value=10, max_value=50, step=10),
            activation=hp.Choice('activation', ['relu', 'tanh'])
        )(x)

    # Two output layers
    output_h1n1 = tf.keras.layers.Dense(1, activation='sigmoid', name='h1n1_output')(x)
    output_seasonal = tf.keras.layers.Dense(1, activation='sigmoid', name='seasonal_output')(x)

    # Build the model
    multi_output_model = tf.keras.Model(inputs=inputs, outputs=[output_h1n1, output_seasonal])

    # Compile the model
    multi_output_model.compile(
        optimizer='adam',
        loss={'h1n1_output': 'binary_crossentropy', 'seasonal_output': 'binary_crossentropy'},
        metrics={'h1n1_output': 'accuracy', 'seasonal_output': 'accuracy'}
    )

    return multi_output_model


In [21]:
# Set up the tuner
tuner = kt.RandomSearch(
    create_multi_output_model,
    objective=kt.Objective("val_h1n1_output_accuracy", direction="max"),  # Optimize for maximum accuracy
    max_trials=25,                                                      # Limit to 25 trials
    executions_per_trial=1,                                             # Run each trial only once
    directory="my_tuner_logs",                                          # Log directory
    project_name="multilabel_tuning"                                    # Project name
)


Reloading Tuner from my_tuner_logs/multilabel_tuning/tuner0.json


In [23]:
# Start hyperparameter tuning
tuner.search(
    X_train,  # Input features
    {'h1n1_output': y_h1n1_train, 'seasonal_output': y_seasonal_train},  # Multi-output labels
    validation_data=(X_test, {'h1n1_output': y_h1n1_test, 'seasonal_output': y_seasonal_test}),
    epochs=20 # Number of epochs per trial
)

Trial 25 Complete [00h 01m 04s]
val_h1n1_output_accuracy: 0.8367652297019958

Best val_h1n1_output_accuracy So Far: 0.8427554965019226
Total elapsed time: 00h 35m 36s


In [26]:
# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Print the best hyperparameters
print("Best Hyperparameters for Multilabel Prediction Model:", best_hps.values)


Best Hyperparameters for Multilabel Prediction Model: {'first_units': 20, 'activation': 'tanh', 'num_layers': 2, 'units_0': 20, 'units_1': 10, 'units_2': 40, 'units_3': 10, 'units_4': 50}


In [29]:
# Build the best model using the retrieved hyperparameters
best_multilabel_model = tuner.hypermodel.build(best_hps)


In [37]:
from sklearn.utils.class_weight import compute_class_weight

# Compute class weights for H1N1
class_weights_h1n1 = compute_class_weight(
    class_weight="balanced",
    classes=np.array([0, 1]),  # Convert list to numpy array
    y=y_h1n1_train
)

# Compute class weights for Seasonal
class_weights_seasonal = compute_class_weight(
    class_weight="balanced",
    classes=np.array([0, 1]),  # Convert list to numpy array
    y=y_seasonal_train
)

# Print the computed class weights
print("H1N1 Class Weights:", dict(zip([0, 1], class_weights_h1n1)))
print("Seasonal Class Weights:", dict(zip([0, 1], class_weights_seasonal)))


H1N1 Class Weights: {0: 0.6350692586647643, 1: 2.3509022887323945}
Seasonal Class Weights: {0: 0.9386257798084526, 1: 1.069961939102564}


In [39]:

# Weighted binary cross-entropy for H1N1
def weighted_binary_crossentropy_h1n1(y_true, y_pred):
    weights = tf.where(tf.equal(y_true, 1), 2.351, 0.635)  # Use H1N1 weights
    return tf.keras.losses.binary_crossentropy(y_true, y_pred) * weights

# Weighted binary cross-entropy for Seasonal
def weighted_binary_crossentropy_seasonal(y_true, y_pred):
    weights = tf.where(tf.equal(y_true, 1), 1.070, 0.939)  # Use Seasonal weights
    return tf.keras.losses.binary_crossentropy(y_true, y_pred) * weights


In [41]:
best_multilabel_model.compile(
    optimizer='adam',
    loss={
        'h1n1_output': weighted_binary_crossentropy_h1n1,
        'seasonal_output': weighted_binary_crossentropy_seasonal
    },
    metrics={
        'h1n1_output': 'accuracy',
        'seasonal_output': 'accuracy'
    }
)


In [42]:
history = best_multilabel_model.fit(
    X_train,
    {'h1n1_output': y_h1n1_train, 'seasonal_output': y_seasonal_train},
    validation_data=(X_test, {'h1n1_output': y_h1n1_test, 'seasonal_output': y_seasonal_test}),
    epochs=20
)


Epoch 1/20
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - h1n1_output_accuracy: 0.8459 - h1n1_output_loss: 0.3737 - loss: 0.8470 - seasonal_output_accuracy: 0.7772 - seasonal_output_loss: 0.4734 - val_h1n1_output_accuracy: 0.8392 - val_h1n1_output_loss: 0.3873 - val_loss: 0.8650 - val_seasonal_output_accuracy: 0.7821 - val_seasonal_output_loss: 0.4776
Epoch 2/20
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - h1n1_output_accuracy: 0.8422 - h1n1_output_loss: 0.3762 - loss: 0.8520 - seasonal_output_accuracy: 0.7752 - seasonal_output_loss: 0.4758 - val_h1n1_output_accuracy: 0.8371 - val_h1n1_output_loss: 0.3904 - val_loss: 0.8684 - val_seasonal_output_accuracy: 0.7814 - val_seasonal_output_loss: 0.4780
Epoch 3/20
[1m668/668[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - h1n1_output_accuracy: 0.8425 - h1n1_output_loss: 0.3791 - loss: 0.8526 - seasonal_output_accuracy: 0.7785 - seasonal_output_loss: 0.4734 - val_h

In [43]:
# Evaluate the final model
evaluation = best_multilabel_model.evaluate(
    X_test,
    {'h1n1_output': y_h1n1_test, 'seasonal_output': y_seasonal_test}
)

# Print test performance
print("Evaluation Results:")
print(f"Test Loss: {evaluation[0]:.4f}")
print(f"H1N1 Test Accuracy: {evaluation[1]:.4f}")
print(f"Seasonal Test Accuracy: {evaluation[2]:.4f}")


[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - h1n1_output_accuracy: 0.8443 - h1n1_output_loss: 0.3776 - loss: 0.8500 - seasonal_output_accuracy: 0.7898 - seasonal_output_loss: 0.4725
Evaluation Results:
Test Loss: 0.8710
H1N1 Test Accuracy: 0.3901
Seasonal Test Accuracy: 0.4808
