In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
import os

base_dir = "./inputs"
files = os.listdir(base_dir)
files

['bank-full.csv', 'sample_submission.csv', 'test.csv', 'train.csv']

In [2]:
train_df = pd.read_csv(os.path.join(base_dir, files[3])).set_index("id")
target = train_df["y"]
train_df.drop("y", axis=1, inplace=True)
orig_df = pd.read_csv(os.path.join(base_dir, files[0]))
orig_target = orig_df["y"]
orig_df.drop("y", axis=1, inplace=True)

In [3]:
# Encode categorical columns
categories = train_df.select_dtypes(include=["object"]).columns
le = LabelEncoder()
for column in categories:
    train_df[column] = le.fit_transform(train_df[column])
    orig_df[column] = le.transform(orig_df[column])
orig_target = le.fit_transform(orig_target)

In [4]:
# Scale the data
sdt = StandardScaler()
train_df = sdt.fit_transform(train_df)
orig_df = sdt.fit_transform(orig_df)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(train_df, target, test_size=0.2, random_state=42)

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input

# Define the model
model = Sequential([
    Input(shape=(16,)),  # Input layer with 7 features
    Dense(10, activation='relu'),  # 10 hidden layers
    Dense(1, activation='sigmoid')  # Output layer with sigmoid activation for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model and save the history
history = model.fit(X_train, y_train, epochs=10, batch_size=10, validation_split=0.2)

Epoch 1/10
[1m48000/48000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 2ms/step - accuracy: 0.9115 - loss: 0.1978 - val_accuracy: 0.9175 - val_loss: 0.1853
Epoch 2/10
[1m48000/48000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 2ms/step - accuracy: 0.9183 - loss: 0.1846 - val_accuracy: 0.9191 - val_loss: 0.1834
Epoch 3/10
[1m48000/48000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 2ms/step - accuracy: 0.9188 - loss: 0.1833 - val_accuracy: 0.9190 - val_loss: 0.1828
Epoch 4/10
[1m48000/48000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 2ms/step - accuracy: 0.9190 - loss: 0.1831 - val_accuracy: 0.9190 - val_loss: 0.1826
Epoch 5/10
[1m48000/48000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 2ms/step - accuracy: 0.9190 - loss: 0.1828 - val_accuracy: 0.9193 - val_loss: 0.1820
Epoch 6/10
[1m48000/48000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 2ms/step - accuracy: 0.9191 - loss: 0.1826 - val_accuracy: 0.9197 - val_loss:

In [7]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")
print(f"Test Loss: {loss}")

[1m4688/4688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - accuracy: 0.9196 - loss: 0.1823
Test Accuracy: 0.9196133613586426
Test Loss: 0.1822865754365921


In [8]:
model.save('./saved_models/model.keras')

In [9]:
from tensorflow.keras.models import load_model

model = load_model('./saved_models/model.keras')

In [10]:
X_train = orig_df
y_train = orig_target

In [11]:
model.fit(X_train, y_train, epochs=10, batch_size=10, validation_split=0.2)

Epoch 1/10
[1m3617/3617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9371 - loss: 0.1656 - val_accuracy: 0.7396 - val_loss: 0.6012
Epoch 2/10
[1m3617/3617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9386 - loss: 0.1584 - val_accuracy: 0.7349 - val_loss: 0.6267
Epoch 3/10
[1m3617/3617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9395 - loss: 0.1561 - val_accuracy: 0.7325 - val_loss: 0.6204
Epoch 4/10
[1m3617/3617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9394 - loss: 0.1547 - val_accuracy: 0.7352 - val_loss: 0.5952
Epoch 5/10
[1m3617/3617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9393 - loss: 0.1537 - val_accuracy: 0.7327 - val_loss: 0.6017
Epoch 6/10
[1m3617/3617[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.9393 - loss: 0.1532 - val_accuracy: 0.7267 - val_loss: 0.6180
Epoch 7/10
[1m3

<keras.src.callbacks.history.History at 0x24d9c7a3cb0>

In [14]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test.to_numpy())
print(f"Test Accuracy: {accuracy}")
print(f"Test Loss: {loss}")

[1m4688/4688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 690us/step - accuracy: 0.9037 - loss: 0.2231
Test Accuracy: 0.9037200212478638
Test Loss: 0.22308942675590515
