In [2]:
from google.colab import userdata
import os

os.environ['KAGGLE_USERNAME'] = userdata.get('KAGGLE_USERNAME');
os.environ['KAGGLE_KEY'] = userdata.get('KAGGLE_KEY');

!kaggle competitions download -c lend-or-lose
!unzip lend-or-lose.zip

Downloading lend-or-lose.zip to /content
 61% 5.00M/8.23M [00:00<00:00, 27.6MB/s]
100% 8.23M/8.23M [00:00<00:00, 42.6MB/s]
Archive:  lend-or-lose.zip
  inflating: sample_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [17]:
train_data = pd.read_csv('train.csv')
submission_data = pd.read_csv('test.csv')

train_data = train_data.drop('LoanID', axis=1)

In [18]:
class LogTransformer:
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return np.log1p(X)

catColumns = ["EmploymentType", "Education", "MaritalStatus", "LoanPurpose", "HasMortgage", "HasDependents", "HasCoSigner"]
numColumns = train_data.drop(['Default'], axis=1).select_dtypes(include=['int64', 'float64']).columns.tolist()

In [16]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

X = train_data.drop(['Default'], axis=1)
y = train_data['Default']


numTransformer = Pipeline(steps=[
    ('log', LogTransformer()),
    ('scaler', StandardScaler())
])

catTransformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numTransformer, numColumns),
        ('cat', catTransformer, catColumns)
    ]
)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

X_train = preprocessor.fit_transform(X_train)
X_val = preprocessor.transform(X_val)

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical

model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(512, activation='relu'),
    Dropout(0.2),
    Dense(512, activation='relu'),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=32)


results = model.evaluate(X_val, y_val, verbose=0)
print(f"Test Loss: {results[0]}, Test Accuracy: {results[1]}")

Epoch 1/20
[1m4469/4469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 5ms/step - accuracy: 0.8843 - loss: 0.3319 - val_accuracy: 0.8829 - val_loss: 0.3248
Epoch 2/20
[1m4469/4469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 4ms/step - accuracy: 0.8847 - loss: 0.3195 - val_accuracy: 0.8829 - val_loss: 0.3184
Epoch 3/20
[1m4469/4469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 4ms/step - accuracy: 0.8855 - loss: 0.3169 - val_accuracy: 0.8839 - val_loss: 0.3251
Epoch 4/20
[1m4469/4469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 4ms/step - accuracy: 0.8840 - loss: 0.3189 - val_accuracy: 0.8832 - val_loss: 0.3181
Epoch 5/20
[1m4469/4469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 4ms/step - accuracy: 0.8846 - loss: 0.3168 - val_accuracy: 0.8829 - val_loss: 0.3170
Epoch 6/20
[1m4469/4469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 4ms/step - accuracy: 0.8847 - loss: 0.3154 - val_accuracy: 0.8841 - val_loss: 0.3179
Epoch 7/20

In [19]:
test_df_ids = submission_data['LoanID']

test_df = submission_data.drop(columns=['LoanID'])

X_test = preprocessor.transform(test_df)

test_predictions = model.predict(X_test)
test_predictions = test_predictions.ravel()

test_predictions = (test_predictions > 0.5).astype(int)

[1m1596/1596[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
