In [21]:
import os.path
import pandas as pd
pd.set_option('display.max_colwidth', None)
import numpy as np
np.set_printoptions(precision=3, suppress=True)
import seaborn as sns
sns.set(style='whitegrid')
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from keras import regularizers
from sklearn.model_selection import train_test_split
import zipfile
#https://datascience.stackexchange.com/questions/107340/need-help-understanding-how-this-neural-network-is-working

In [71]:
class Data():
    def __init__(self, data_path = 'Data/SouthGermanCredit/', german = 'SouthGermanCredit.asc', english = 'SouthGermanCredit_english.csv'):
        self.data_path = data_path
        self.german = german
        self.english = english
        if not os.path.isfile(self.data_path + english):
            self.translate()
            
    def translate(self):
        df = pd.read_csv(self.path + german, sep = ' ')
        df.rename(columns = {
                            'laufkont': 'status', 
                            'laufzeit': 'credit_history',
                            'moral': 'credit_history',
                            'verw': 'purpose',
                            'hoehe': 'amount',
                            'sparkont': 'savings',
                            'beszeit': 'employment_duration',
                            'rate': 'installment_rate',
                            'famges': 'personal_status_sex',
                            'buerge': 'other_debtors',
                            'wohnzeit': 'present_residence',
                            'verm': 'property',
                            'alter': 'age',
                            'weitkred': 'other_installment_plans',
                            'wohn': 'housing',
                            'bishkred': 'number_credits',
                            'beruf': 'job',
                            'pers': 'people_liable',
                            'telef': 'telephone',
                            'gastarb': 'foreign_worker',
                            'kredit': 'credit_risk'
                            }, inplace = True)
        df.to_csv(self.path + self.english)
    
    def view_data(self):
        df = pd.read_csv(self.data_path + self.english)
        print(df.columns)
        print(df.describe())
        print(df.head())
        
    def get_data(self):
        return pd.read_csv(self.data_path + self.english).astype(np.float32)
        

In [72]:
df = Data().get_data()
features = df.copy()
labels = features.pop('credit_risk')
trainX, testX, trainY, testY = train_test_split(features, labels, test_size=0.2, random_state=69)
print(type(trainX.loc[0]['status']), type(trainY.loc[0]))

<class 'numpy.float32'> <class 'numpy.float32'>


In [73]:
normalizer = preprocessing.Normalization()
normalizer.adapt(np.array(trainX))


batch_size=32
learning_rate=1e-3

model = tf.keras.Sequential([
      normalizer,
      layers.Dense(128, activation='elu', kernel_regularizer=regularizers.l2(0.01)),
      layers.Dropout(0.5),
      layers.Dense(128, activation='elu', kernel_regularizer=regularizers.l2(0.01)),
      layers.Dropout(0.5),
      layers.Dense(2),
      layers.Softmax()])

model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

model.fit(trainX, trainY, epochs=50, verbose=0, batch_size=batch_size)

<tensorflow.python.keras.callbacks.History at 0x7fc2b0213490>

In [74]:
model.summary()

Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization_16 (Normalizat multiple                  41        
_________________________________________________________________
dense_48 (Dense)             multiple                  2688      
_________________________________________________________________
dropout_32 (Dropout)         multiple                  0         
_________________________________________________________________
dense_49 (Dense)             multiple                  16512     
_________________________________________________________________
dropout_33 (Dropout)         multiple                  0         
_________________________________________________________________
dense_50 (Dense)             multiple                  258       
_________________________________________________________________
softmax_16 (Softmax)         multiple                