# Predicting Optimal Fertilizers 

# Keras - 3 Attempt

# Attempt - 1 Attempt

In [7]:
# reproducibility
import random
random.seed(42)
import numpy as np
np.random.seed(42)
import tensorflow as tf
tf.random.set_seed(42)

# imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MultiLabelBinarizer
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import time
import json
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout

# load data
data_files = ["playground-series-s5e6/sample_submission.csv","playground-series-s5e6/test.csv","playground-series-s5e6/train.csv"]
train_dfs = []
df_test = None
for f in data_files:
    if f.endswith('train.csv'):
        train_dfs.append(pd.read_csv(f))
    elif f.endswith('test.csv'):
        df_test = pd.read_csv(f)
# infer id and target
id_col = 'id'
target_columns = ['Fertilizer Name']
# prepare train
df = pd.concat(train_dfs, ignore_index=True)

# multi-label encoding
col = target_columns[0]
df[col] = df[col].astype(str).str.split()
mlb = MultiLabelBinarizer()
y_enc = mlb.fit_transform(df[col])
classes_ = mlb.classes_

# features
X = df.drop(columns=target_columns + [id_col], errors='ignore')

# split
if df_test is None:
    X_train, X_val, y_train, y_val = train_test_split(
        X, y_enc, test_size=0.2, random_state=42
    )
    test_ids = None
else:
    X_train = X
    y_train = y_enc
    test_ids = df_test[id_col]
    X_val = df_test.drop(columns=target_columns + [id_col], errors='ignore')
    y_val = None

# drop all-missing cols
def drop_missing(df1, df2=None):
    df1 = df1.dropna(axis=1, how='all')
    if df2 is not None:
        df2 = df2[df1.columns]
        return df1, df2
    return df1

if df_test is not None:
    X_train, X_val = drop_missing(X_train, X_val)
else:
    X_train = drop_missing(X_train)

# drop high-cardinality cats
cat_cols_full = X_train.select_dtypes(include=['object','category']).columns
high_card = [c for c in cat_cols_full if X_train[c].nunique()>50]
X_train = X_train.drop(columns=high_card)
if df_test is not None:
    X_val = X_val.drop(columns=high_card)

# preprocessing pipeline
num_cols = X_train.select_dtypes(include=['int64','float64']).columns.tolist()
cat_cols = X_train.select_dtypes(include=['object','category']).columns.tolist()
num_pipe = Pipeline([
    ('imp', SimpleImputer(strategy='median')),
    ('scale', StandardScaler())
])
cat_pipe = Pipeline([
    ('imp', SimpleImputer(strategy='most_frequent')),
    ('ohe', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])
preprocessor = ColumnTransformer([
    ('num', num_pipe, num_cols),
    ('cat', cat_pipe, cat_cols)
])
X_train_proc = preprocessor.fit_transform(X_train)
X_val_proc = preprocessor.transform(X_val) if df_test is not None else None

# model
n_features = X_train_proc.shape[1]
n_classes = y_train.shape[1]
n_samples = X_train_proc.shape[0]
# layer sizes
if n_samples<10000 or n_features<100:
    layer_sizes=[min(n_features*2,128),min(n_features,64)]
    drop_rate=0.3
else:
    sizes=[min(n_features*2,1024),min(n_features,1024),
           min(int(n_features*0.5),1024),min(int(n_features*0.25),1024)]
    layer_sizes=[s for s in sizes if s>=16]
    drop_rate=0.4
# build
inputs=Input(shape=(n_features,))
x=inputs
for s in layer_sizes:
    x=Dense(s,activation='relu')(x)
    x=BatchNormalization()(x)
    x=Dropout(drop_rate)(x)
outputs=Dense(n_classes,activation='sigmoid')(x)
model=Model(inputs,outputs)
# compile
model.compile(
    optimizer='adam',loss='binary_crossentropy',
    metrics=['accuracy',tf.keras.metrics.Precision(),tf.keras.metrics.Recall()]
)
# callbacks
cbs=[
    EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True),
    ModelCheckpoint('best_model.h5',monitor='val_loss',save_best_only=True)
]
# train
start=time.time()
if X_val_proc is not None:
    history=model.fit(
        X_train_proc,y_train,
        validation_data=(X_val_proc,y_val),
        epochs=100,batch_size=128,callbacks=cbs,verbose=2
    )
else:
    history=model.fit(
        X_train_proc,y_train,validation_split=0.2,
        epochs=100,batch_size=128,callbacks=cbs,verbose=2
    )
end=time.time()
# log results
res={
    'training_accuracy':history.history['accuracy'][-1],
    'training_loss':history.history['loss'][-1],
    'validation_accuracy':history.history.get('val_accuracy', [None])[-1],
    'validation_loss':history.history.get('val_loss',[None])[-1]
}
with open('results.json','w') as f:
    json.dump(res,f)
# predict
X_test_proc = X_val_proc if X_val_proc is not None else preprocessor.transform(X)
raw=model.predict(X_test_proc)
final=(raw>0.5).astype(int)
if final.ndim==1:final=final.reshape(-1,1)
sub=pd.DataFrame(final,columns=classes_)
sub.insert(0,id_col,test_ids.reset_index(drop=True))
sub.to_csv('submission_result.csv',index=False)

Epoch 1/100


ValueError: None values not supported.

# Attempt - 2

In [9]:
import random
import numpy as np
import pandas as pd
import json
import time
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.metrics import Precision, Recall
import tensorflow.keras.backend as K

# 1. Reproducibility
random.seed(42)
np.random.seed(42)
import tensorflow as tf
tf.random.set_seed(42)

# 2. Data Loading
train_df = pd.read_csv('playground-series-s5e6/train.csv')
test_df = pd.read_csv('playground-series-s5e6/test.csv')

# Infer id and target columns
id_col = 'id'
target_col = 'Fertilizer Name'

# 3. Target Encoding for multi-label classification
def parse_labels(x):
    return x.split() if isinstance(x, str) else []

y_raw = train_df[target_col].astype(str).apply(parse_labels)
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
y_enc = mlb.fit_transform(y_raw)
classes_ = mlb.classes_

# 4. Feature matrix
drop_cols = [target_col]
X = train_df.drop(columns=drop_cols + [id_col], errors='ignore')
X_test = test_df.drop(columns=[target_col, id_col], errors='ignore')

# 5. Drop columns with all missing values
all_missing = [col for col in X.columns if X[col].isna().all()]
X.drop(columns=all_missing, inplace=True)
X_test.drop(columns=all_missing, inplace=True)

# Identify categorical and numeric
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()
# Remove high-cardinality cat
cat_keep = [col for col in categorical_features if X[col].nunique() <= 50]

# Preprocessing pipeline
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', cat_transformer, cat_keep)
    ],
    remainder='drop'
)

# Fit and transform
t0 = time.time()
X_proc = preprocessor.fit_transform(X)
X_test_proc = preprocessor.transform(X_test)
print(f'Preprocessing time: {time.time() - t0:.2f}s')

# 6. Model Architecture (multi-label, shallow for n_features<100)
n_samples, n_features = X_proc.shape
n_classes = len(classes_)

# Custom F1 metric
def f1_score_metric(y_true, y_pred):
    y_pred_round = K.round(y_pred)
    tp = K.sum(K.cast(y_true * y_pred_round, 'float'), axis=0)
    pp = K.sum(K.cast(y_pred_round, 'float'), axis=0)
    ap = K.sum(K.cast(y_true, 'float'), axis=0)
    precision = tp / (pp + K.epsilon())
    recall = tp / (ap + K.epsilon())
    f1 = 2 * precision * recall / (precision + recall + K.epsilon())
    return K.mean(f1)

# Build model
inputs = tf.keras.Input(shape=(n_features,))
# Layer 1
x = tf.keras.layers.Dense(min(n_features*2, 128), activation='relu')(inputs)
x = tf.keras.layers.Dropout(0.3)(x)
# Layer 2
x = tf.keras.layers.Dense(min(n_features, 64), activation='relu')(x)
x = tf.keras.layers.Dropout(0.3)(x)
# Output
y_pred = tf.keras.layers.Dense(n_classes, activation='sigmoid')(x)
model = tf.keras.Model(inputs=inputs, outputs=y_pred)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(), Recall(), f1_score_metric]
)
model.summary()

# 7. Callbacks & Training
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1),
    ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True, verbose=1)
]
start_time = time.time()
history = model.fit(
    X_proc, y_enc,
    validation_split=0.2,
    epochs=100,
    batch_size=128,
    callbacks=callbacks,
    verbose=2
)
training_duration = time.time() - start_time

# 8. Logging
results = {
    'training_accuracy': float(history.history['accuracy'][-1]),
    'training_loss': float(history.history['loss'][-1]),
    'validation_accuracy': float(history.history['val_accuracy'][-1]),
    'validation_loss': float(history.history['val_loss'][-1]),
    'training_time_sec': training_duration
}
with open('results.json', 'w') as f:
    json.dump(results, f)

# 9. Prediction & Submission
raw_preds = model.predict(X_test_proc)
binary_preds = (raw_preds > 0.5).astype(int)
labels = mlb.inverse_transform(binary_preds)
# Join labels
submission_list = [' '.join(label_set) if len(label_set)>0 else '' for label_set in labels]

submission = pd.DataFrame({
    id_col: test_df[id_col].values,
    target_col: submission_list
})
submission.to_csv('submission_result.csv', index=False)

Preprocessing time: 0.95s


Epoch 1/100


TypeError: Input 'y' of 'Mul' Op has type float32 that does not match type int64 of argument 'x'.

# Attempt - 3

## Required to give a small hint: "It is a mutliclass classification where you need to predict top-3 fertilizer names"

In [16]:
# Reproducibility
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import json
import time
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import backend as K
from tensorflow.keras.metrics import Precision, Recall

# Set seeds
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Load data
files = [f for f in ["playground-series-s5e6/sample_submission.csv","playground-series-s5e6/test.csv","playground-series-s5e6/train.csv"] if f != 'playground-series-s5e6/sample_submission.csv']
train_dfs = []
df_test = None
for f in files:
    if f.endswith('test.csv'):
        df_test = pd.read_csv(f)
    else:
        train_dfs.append(pd.read_csv(f))

# Infer id and target
id_col = 'id'
target_columns = ['Fertilizer Name']

# Combine training
df = pd.concat(train_dfs, ignore_index=True)

# Encode targets for multi-label
from sklearn.preprocessing import MultiLabelBinarizer
# split space-delimited labels
y_list = df['Fertilizer Name'].astype(str).str.split(' ')
mlb = MultiLabelBinarizer()
y_enc = mlb.fit_transform(y_list)
classes_ = mlb.classes_

# Prepare features
X = df.drop(columns=target_columns + [id_col], errors='ignore')

# If df_test exists, set train and test
if df_test is not None:
    X_train = X.copy()
    y_train = y_enc
    train_ids = df[id_col]
    test_ids = df_test[id_col]
    X_val = df_test.drop(columns=target_columns + [id_col], errors='ignore')
    y_val = None
else:
    strat = None
    X_train, X_val, y_train, y_val = train_test_split(
        X, y_enc,
        test_size=0.2,
        random_state=42,
        stratify=strat
    )
    train_ids = X_train[id_col] if id_col in X_train else None
    test_ids = X_val[id_col] if id_col in X_val else None

# Feature engineering
# Drop cols all missing
drop_all_missing = [c for c in X_train.columns if X_train[c].isna().all()]
X_train.drop(columns=drop_all_missing, inplace=True)
X_val.drop(columns=drop_all_missing, inplace=True)
if df_test is not None:
    X_val = X_val.copy()

# Identify categorical vars
cat_cols = X_train.select_dtypes(include=['object', 'category']).columns.tolist()
# Drop high-cardinality
low_card_cats = [c for c in cat_cols if X_train[c].nunique() <= 50]
high_card = set(cat_cols) - set(low_card_cats)
X_train.drop(columns=list(high_card), inplace=True)
X_val.drop(columns=list(high_card), inplace=True)

# Numeric columns
num_cols = X_train.select_dtypes(include=[np.number]).columns.tolist()

# Preprocessing pipeline
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])
preprocessor = ColumnTransformer([
    ('num', num_pipeline, num_cols),
    ('cat', cat_pipeline, low_card_cats)
])

# Fit and transform
X_train_proc = preprocessor.fit_transform(X_train)
X_val_proc = preprocessor.transform(X_val)

# Model architecture
n_samples, n_features = X_train_proc.shape
n_classes = len(classes_)

# Build layers sizes
def get_layer_sizes(n_feat):
    sizes = []
    for i in [2,1,0.5,0.25]:
        sz = int(min(n_feat * i, 1024))
        if sz >= 16:
            sizes.append(sz)
    return sizes

layer_sizes = get_layer_sizes(n_features)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout

model = Sequential()
# Input layer implicit
for sz in layer_sizes:
    model.add(Dense(sz, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
# Output layer
model.add(Dense(n_classes, activation='sigmoid'))

# Compile
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[Precision(name='precision'), Recall(name='recall')]
)

# Callbacks
timestamp = int(time.time())
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint(f'model_{timestamp}.h5', save_best_only=True)
]

# Training
t0 = time.time()
history = model.fit(
    X_train_proc, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=128,
    callbacks=callbacks,
    verbose=2
)
duration = time.time() - t0

# Save results
results = {
    'training_accuracy': history.history.get('recall')[-1],
    'training_loss': history.history['loss'][-1],
    'validation_accuracy': history.history.get('val_recall')[-1],
    'validation_loss': history.history['val_loss'][-1]
}
with open('results.json','w') as f:
    json.dump(results, f)

# Predictions
raw_preds = model.predict(X_val_proc)
# Use top 3 predictions
top_k = 3
preds = []
for row in raw_preds:
    idxs = np.argsort(-row)[:top_k]
    names = classes_[idxs]
    preds.append(' '.join(names))

# Submission
submission = pd.DataFrame({id_col: test_ids.reset_index(drop=True), 'Fertilizer Name': preds})
submission.to_csv('submission_result.csv', index=False)

Epoch 1/100




4688/4688 - 11s - 2ms/step - loss: 0.4298 - precision: 0.1410 - recall: 0.0172 - val_loss: 0.4097 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/100




4688/4688 - 9s - 2ms/step - loss: 0.4108 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4096 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 3/100




4688/4688 - 9s - 2ms/step - loss: 0.4100 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4093 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 4/100




4688/4688 - 9s - 2ms/step - loss: 0.4096 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4092 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 5/100




4688/4688 - 9s - 2ms/step - loss: 0.4094 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4092 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 6/100




4688/4688 - 9s - 2ms/step - loss: 0.4092 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4091 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 7/100




4688/4688 - 10s - 2ms/step - loss: 0.4091 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 8/100




4688/4688 - 9s - 2ms/step - loss: 0.4091 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 9/100




4688/4688 - 9s - 2ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 10/100




4688/4688 - 9s - 2ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 11/100
4688/4688 - 9s - 2ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 12/100




4688/4688 - 8s - 2ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 13/100




4688/4688 - 9s - 2ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 14/100
4688/4688 - 9s - 2ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 15/100




4688/4688 - 9s - 2ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 16/100




4688/4688 - 9s - 2ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 17/100
4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 18/100




4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 19/100
4688/4688 - 9s - 2ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 20/100




4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 21/100
4688/4688 - 8s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 22/100




4688/4688 - 8s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 23/100
4688/4688 - 8s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 24/100




4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 25/100




4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 26/100
4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 27/100




4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 28/100
4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 29/100
4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 30/100




4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 31/100
4688/4688 - 8s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 32/100
4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 33/100
4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 34/100




4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 35/100




4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 36/100




4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 37/100




4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 38/100
4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 39/100
4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 40/100
4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 41/100
4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 42/100
4688/4688 - 9s - 2ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall

# Keras Tuner - 1 Attempt

## Attempt 1

In [None]:
# Reproducibility
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import json
import time
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import backend as K
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.models import load_model


# Set seeds
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Load data
files = [f for f in ["sample_submission.csv","test.csv","train.csv"] if f != 'sample_submission.csv']
train_dfs = []
df_test = None
for f in files:
    if f.endswith('test.csv'):
        df_test = pd.read_csv(f)
    else:
        train_dfs.append(pd.read_csv(f))

# Infer id and target
id_col = 'id'
target_columns = ['Fertilizer Name']

# Combine training
df = pd.concat(train_dfs, ignore_index=True)

# Encode targets for multi-label
from sklearn.preprocessing import MultiLabelBinarizer
# split space-delimited labels
y_list = df['Fertilizer Name'].astype(str).str.split(' ')
mlb = MultiLabelBinarizer()
y_enc = mlb.fit_transform(y_list)
classes_ = mlb.classes_

# Prepare features
X = df.drop(columns=target_columns + [id_col], errors='ignore')

# If df_test exists, set train and test
if df_test is not None:
    X_train = X.copy()
    y_train = y_enc
    train_ids = df[id_col]
    test_ids = df_test[id_col]
    X_val = df_test.drop(columns=target_columns + [id_col], errors='ignore')
    y_val = None
else:
    strat = None
    X_train, X_val, y_train, y_val = train_test_split(
        X, y_enc,
        test_size=0.2,
        random_state=42,
        stratify=strat
    )
    train_ids = X_train[id_col] if id_col in X_train else None
    test_ids = X_val[id_col] if id_col in X_val else None

# Feature engineering
# Drop cols all missing
drop_all_missing = [c for c in X_train.columns if X_train[c].isna().all()]
X_train.drop(columns=drop_all_missing, inplace=True)
X_val.drop(columns=drop_all_missing, inplace=True)
if df_test is not None:
    X_val = X_val.copy()

# Identify categorical vars
cat_cols = X_train.select_dtypes(include=['object', 'category']).columns.tolist()
# Drop high-cardinality
low_card_cats = [c for c in cat_cols if X_train[c].nunique() <= 50]
high_card = set(cat_cols) - set(low_card_cats)
X_train.drop(columns=list(high_card), inplace=True)
X_val.drop(columns=list(high_card), inplace=True)

# Numeric columns
num_cols = X_train.select_dtypes(include=[np.number]).columns.tolist()

# Preprocessing pipeline
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(sparse_output=False, handle_unknown='ignore'))
])
preprocessor = ColumnTransformer([
    ('num', num_pipeline, num_cols),
    ('cat', cat_pipeline, low_card_cats)
])

# Fit and transform
X_train_proc = preprocessor.fit_transform(X_train)
X_val_proc = preprocessor.transform(X_val)

# Model architecture
n_samples, n_features = X_train_proc.shape
n_classes = len(classes_)

import keras_tuner as kt
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define early stopping and checkpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

class MyHyperModel(kt.HyperModel):
    def build(self, hp):
        layers = hp.Int('layers', 2, 8)
        units = hp.Int('units', 64, 1024, step=64)
        drop = hp.Float('dropout', 0.0, 0.5, step=0.1)
        opt = hp.Choice('optimizer', ['adam'])
        lr = hp.Float('learning_rate', 1e-5, 0.01, sampling='log')

        inputs = Input(shape=(n_features,))
        x = inputs
        for _ in range(layers):
            x = Dense(units, activation='relu')(x)
            x = BatchNormalization()(x)
            x = Dropout(drop)(x)
        x = Dense(n_classes, activation='sigmoid')(x)  # Output layer
        model = Model(inputs, x)
        model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['precision', 'recall'])
        return model

# Tuner setup
bs = 32  # batch size
ep = 20  # epochs

tuner = kt.BayesianOptimization(
    MyHyperModel(),
    objective='val_loss',
    max_trials=10,
    executions_per_trial=1,
    seed=42,
    overwrite=True,
    project_name='bayesian_tuner'
)

if y_val is not None:
    tuner.search(
        X_train_proc, y_train,
        validation_data=(X_val_proc, y_val),
        batch_size=bs, epochs=ep,
        callbacks=[early_stopping, checkpoint]
    )
else:
    tuner.search(
        X_train_proc, y_train,
        validation_split=0.2,
        batch_size=bs, epochs=ep,
        callbacks=[early_stopping, checkpoint]
    )

model = tuner.hypermodel.build(
    tuner.get_best_hyperparameters(1)[0]
)

# Retrain model with original callbacks and data

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['precision', 'recall'])

start_time = time.time()  # Start timing


if y_val is not None:
    history = model.fit(
        X_train_proc, y_train,
        validation_data=(X_val_proc, y_val),
        initial_epoch = 33,
        epochs=100, batch_size=bs,
        callbacks=[early_stopping, checkpoint],
        verbose=2
    )
else:
    history = model.fit(
        X_train_proc, y_train,
        validation_split=0.2,
        initial_epoch = 33,
        epochs=100, batch_size=bs,
        callbacks=[early_stopping, checkpoint],
        verbose=2
    )

duration = time.time() - start_time  # Calculate duration


# Save results
results = {
    'training_accuracy': history.history.get('recall')[-1],
    'training_loss': history.history['loss'][-1],
    'validation_accuracy': history.history.get('val_recall')[-1],
    'validation_loss': history.history['val_loss'][-1]
}
with open('results.json','w') as f:
    json.dump(results, f)

# Predictions
raw_preds = model.predict(X_val_proc)
# Use top 3 predictions
top_k = 3
preds = []
for row in raw_preds:
    idxs = np.argsort(-row)[:top_k]
    names = classes_[idxs]
    preds.append(' '.join(names))

# Submission
submission = pd.DataFrame({id_col: test_ids.reset_index(drop=True), 'Fertilizer Name': preds})
submission.to_csv('submission_result.csv', index=False)

Trial 10 Complete [00h 20m 08s]
val_loss: 0.4092257022857666

Best val_loss So Far: 0.4089970588684082
Total elapsed time: 04h 43m 15s
Epoch 1/100




18750/18750 - 83s - 4ms/step - loss: 0.4167 - precision: 0.1445 - recall: 0.0033 - val_loss: 0.4096 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/100




18750/18750 - 76s - 4ms/step - loss: 0.4098 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4093 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 3/100




18750/18750 - 71s - 4ms/step - loss: 0.4096 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4092 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 4/100




18750/18750 - 72s - 4ms/step - loss: 0.4095 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4092 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 5/100




18750/18750 - 80s - 4ms/step - loss: 0.4094 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4092 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 6/100




18750/18750 - 80s - 4ms/step - loss: 0.4093 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4092 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 7/100




18750/18750 - 77s - 4ms/step - loss: 0.4092 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4091 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 8/100




18750/18750 - 79s - 4ms/step - loss: 0.4092 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4091 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 9/100




18750/18750 - 83s - 4ms/step - loss: 0.4091 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4091 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 10/100
18750/18750 - 77s - 4ms/step - loss: 0.4091 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4091 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 11/100
18750/18750 - 77s - 4ms/step - loss: 0.4091 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4091 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 12/100




18750/18750 - 77s - 4ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4091 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 13/100
18750/18750 - 77s - 4ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4092 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 14/100




18750/18750 - 80s - 4ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4091 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 15/100




18750/18750 - 77s - 4ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 16/100
18750/18750 - 78s - 4ms/step - loss: 0.4090 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 17/100
18750/18750 - 77s - 4ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 18/100
18750/18750 - 79s - 4ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 19/100




18750/18750 - 76s - 4ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 20/100
18750/18750 - 79s - 4ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 21/100
18750/18750 - 78s - 4ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 22/100
18750/18750 - 78s - 4ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 23/100
18750/18750 - 73s - 4ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 24/100
18750/18750 - 74s - 4ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.000



18750/18750 - 76s - 4ms/step - loss: 0.4089 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 27/100




18750/18750 - 79s - 4ms/step - loss: 0.4088 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4089 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 28/100
18750/18750 - 78s - 4ms/step - loss: 0.4088 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 29/100
18750/18750 - 76s - 4ms/step - loss: 0.4088 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 30/100
18750/18750 - 80s - 4ms/step - loss: 0.4088 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 31/100
18750/18750 - 80s - 4ms/step - loss: 0.4088 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 32/100
18750/18750 - 78s - 4ms/step - loss: 0.4088 - precision: 0.0000e+00 - recall: 0.0000e+00 - val_loss: 0.4090 - val_precision: 0.000

In [None]:
print(duration)

3795.496926
