In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense
from keras_tuner import RandomSearch
from keras_tuner import Objective
from tensorflow.keras.regularizers import L2

# Load Data

In [2]:
train = pd.read_csv('train.csv', index_col='id')
test = pd.read_csv('test.csv', index_col='id')

In [3]:
train.shape

(414, 7)

In [4]:
train.head()

Unnamed: 0_level_0,gravity,ph,osmo,cond,urea,calc,target
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,1.013,6.19,443,14.8,124,1.45,0
1,1.025,5.4,703,23.6,394,4.18,0
2,1.009,6.13,371,24.5,159,9.04,0
3,1.021,4.91,442,20.8,398,6.63,1
4,1.021,5.53,874,17.8,385,2.21,1


In [5]:
train.target.value_counts()

0    230
1    184
Name: target, dtype: int64

In [6]:
def create_new_features(data):
    # Ion product of calcium and urea
    data["ion_product"] = data["calc"] * data["urea"]

    # Calcium-to-urea ratio
    data["calcium_to_urea_ratio"] = data["calc"] / data["urea"]

    # Electrolyte balance
    data["electrolyte_balance"] = data["cond"] / (10 ** (-data["ph"]))

    # Osmolality-to-specific gravity ratio
    data["osmolality_to_sg_ratio"] = data["osmo"] / data["gravity"]
    
    ## Add Feature engineering part 
    # The product of osmolarity and density is created as a new property
    data['osmo_density'] = data['osmo'] * data['gravity']
    
    # Converting pH column to categorical variable
    data['pH_cat'] = pd.cut(data['ph'], bins=[0, 4.5, 6.5, 8.5, 14], labels=['sangat acidic', 'acidic', 'neutral', 'basic'])
    dummies = pd.get_dummies(data['pH_cat'])
    data = pd.concat([data, dummies], axis=1)
    
    # Deleting columns using dummy variables.
    data = data.drop(['pH_cat', 'sangat acidic' , 'basic','neutral','ph'], axis=1)
    
    return data

train = create_new_features(train)

In [6]:
X = train.drop('target', axis=1).copy()
y = train.target

# Feature Scaling

In [7]:
#First we need to combine train and test, scale then split again
X1 = pd.concat([X, test])

scaler = StandardScaler()
X2 = pd.DataFrame(scaler.fit_transform(X1) ,columns = X.columns)

X = X2.iloc[:len(train), :]
test = X2.iloc[len(train):, :]

# Define MLP model

In [8]:
def build_model(hp):
    model = Sequential()
    
    model.add(Dense(units=hp.Int('units_input', min_value=32, max_value=512, step=32), activation='relu', input_dim=X_train.shape[1]))

    for i in range(hp.Int('num_hidden_layers', min_value=0, max_value=3)):
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=32, max_value=512, step=32), activation=hp.Choice(f'activation_{i}', values=['relu', 'sigmoid', 'tanh'])))

    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['AUC'])
    return model

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=0)

# Initialize the RandomSearch tuner
tuner = RandomSearch(
    build_model,
    objective=Objective("val_auc", direction="max"),
    max_trials=10
)

# Search for the best model
tuner.search(X_train, y_train,
             epochs=20,
             batch_size=32,
             validation_data=(X_test, y_test))

# Get the best model architecture and hyperparameters
best_model = tuner.get_best_models(1)[0]
best_hyperparams = tuner.get_best_hyperparameters(1)[0]

Trial 10 Complete [00h 00m 02s]
val_auc: 0.7837209105491638

Best val_auc So Far: 0.7915697693824768
Total elapsed time: 00h 00m 18s
INFO:tensorflow:Oracle triggered exit


In [9]:
# Access the best hyperparameters
best_input_units = best_hyperparams.get('units_input')
best_hidden_layers = best_hyperparams.get('num_hidden_layers')

best_activation = []
best_units = []

# Extract the values of activation functions and units for each hidden layer
for i in range(best_hidden_layers):
    best_activation.append(best_hyperparams.get(f'activation_{i}'))
    best_units.append(best_hyperparams.get(f'units_{i}'))


# Print the best hyperparameters
print("Best Hyperparameters:")
print("input units =", best_input_units)
print("number of hidden layers =", best_hidden_layers)
print("activation =", best_activation)
print("units =", best_units)

Best Hyperparameters:
input units = 224
number of hidden layers = 0
activation = []
units = []


In [10]:
model = Sequential()
model.add(Dense(units=224, activation='relu', input_dim=X.shape[1]))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC'])

model.fit(X, y, epochs=20, batch_size=32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x276b10bb988>

# Training

# Final Model

# Prediction

In [11]:
final_predictions = model.predict(test)



In [12]:
final_predictions = [pred[0] for pred in final_predictions]

In [13]:
submission = pd.concat([pd.Series(test.index, name='id'), pd.Series(final_predictions, name='target')], axis=1)
submission

Unnamed: 0,id,target
0,414,0.208395
1,415,0.397568
2,416,0.941999
3,417,0.455105
4,418,0.365173
...,...,...
271,685,0.732250
272,686,0.172086
273,687,0.430951
274,688,0.260388


# Submission

In [14]:
submission.to_csv('submission_MLP_model.csv', index=False)