In [None]:
from google.colab import files
import pandas as pd
import io

upload_files = files.upload()

file_name = next(iter(upload_files))

df = pd.read_csv(io.StringIO(upload_files[file_name].decode('utf-8')), header=1, delimiter=";")

df

In [None]:
df = df.drop('ID', axis = 1)
df

In [None]:
print(df["EDUCATION"].unique())

In [None]:
# Combine EDUCATION values 0, 5 and 6 into 4
# 1 = graduate school; 2 = university; 3 = high school; 4 = others.
df["EDUCATION"] = df["EDUCATION"].replace([0, 5, 6], 4)
df["EDUCATION"].unique()

In [None]:
print(df["MARRIAGE"].unique())

In [None]:
# Combining MARRIAGE column "0" values to 3
# 1 = married; 2 = single; 3 = others.
df["MARRIAGE"] = df["MARRIAGE"].replace(0, 3)
df["MARRIAGE"].unique()

In [None]:
# Encoding EDUCATION column:
new_data = pd.get_dummies(df, columns=['EDUCATION', 'MARRIAGE'], drop_first=True)
new_data

In [None]:
print(df["SEX"].unique())

In [None]:
# Encoding SEX column:
df['SEX'] = df['SEX'] - 1
print(df["SEX"].unique())
# 0 = male, 1 = female

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

categorical_features = ['SEX', 'EDUCATION', 'MARRIAGE']

pre = ColumnTransformer(
    transformers=[('cat', OneHotEncoder(), categorical_features)],
    remainder='passthrough'
)

x = df.drop('default payment next month', axis = 1)
y = df['default payment next month']

In [None]:
from sklearn.preprocessing import StandardScaler

x_encoded = pre.fit_transform(x)
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x_encoded)

In [None]:
from sklearn.model_selection import train_test_split

x = df.drop(columns=["default payment next month"])
y = df["default payment next month"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
from tensorflow import keras

model = keras.Sequential([
    keras.layers.Dense(128, activation='relu', input_shape=(x_train.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

y_train = y_train.reset_index(drop=True)
hist = model.fit(x_train, y_train, epochs=20, batch_size=32, validation_split=0.2, class_weight={ 0: 1.0, 1: 3.0 })

In [None]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Test accuracy:', test_acc)

y_pred = (model.predict(x_test) > 0.5).astype(int)

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))