In [None]:
from sklearn.datasets import fetch_openml
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, OneHotEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier



In [None]:
# fetch dataset 
statlog_german_credit_data = fetch_openml(data_id=31, as_frame=True)
# data (as pandas dataframes) 

X = statlog_german_credit_data.data
y = statlog_german_credit_data.target
# metadata 
print(X.columns)


In [None]:
print(X.shape)
print(y.shape)

In [None]:
print(X)
y = y.apply(lambda x: 1 if x == 'bad' else 0)
y


In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)


# Delete temporary variables

print(f"the shape of the training set (input) is: {x_train.shape}")
print(f"the shape of the training set (target) is: {y_train.shape}\n")
print(f"the shape of the test set (input) is: {x_test.shape}")
print(f"the shape of the test set (target) is: {y_test.shape}")


In [None]:
x_train

In [None]:
numerical_features = ['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
categorical_features = list(set(X.columns) - set(numerical_features))


In [None]:
# Preprocessing for numerical data: StandardScaler
numerical_transformer = Pipeline(steps=[
    ('poly', PolynomialFeatures(degree=1, include_bias=False)),
    ('scaler', StandardScaler())
])

# Preprocessing for categorical data: OneHotEncoder
categorical_transformer = OneHotEncoder(drop='first')  # drop='first' to avoid the dummy variable trap

# Bundle preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])
preprocessor.fit(x_train)
X_train_transformed = preprocessor.transform(x_train)



In [None]:
X_train_transformed.shape

In [None]:
def create_model():
    model = Sequential([
        Dense(10, input_shape=(X_train_transformed.shape[1],), activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
# Wrap the Keras model
keras_model = KerasClassifier(model=create_model, epochs=50, batch_size=10, verbose=0)
# Create and fit the pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', keras_model)])
pipeline.fit(x_train, y_train)

In [None]:
# keras_model_ = pipeline.named_steps['model'].model_
# keras_model_.summary()

In [None]:

# Predict and calculate training error
yhat_train = pipeline.predict(x_train)
train_error = np.mean(yhat_train != y_train)
print(f"Fraction of misclassified training data: {train_error}")


In [None]:
yhat_test = pipeline.predict(x_test)
test_error = np.mean(yhat_test != y_test)
print(f"Fraction of misclassified test data: {test_error}")
df_new = pd.DataFrame({
    'Prediction': yhat_test,
    'Actual Result': y_test
})
finalDataFrame = pd.concat([x_test, df_new], axis=1)

In [None]:
finalDataFrame[:50]