In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [4]:
# Read & initial data processing
df = pd.read_csv("../data/german_credit_data.csv")
df.drop(columns=['Unnamed: 0'], inplace=True)

# Handle missing data
df['Saving accounts'].fillna("No Account", inplace=True)
df['Checking account'].fillna("No Account", inplace=True)

# Encode categorical variables
le = LabelEncoder()
label_columns = ['Saving accounts', 'Checking account', 'Risk']
for col in label_columns:
    df[col] = le.fit_transform(df[col])

# 1-hot encoding
df = pd.get_dummies(df, columns=['Sex', 'Housing', 'Purpose'], drop_first=True)
df = df.astype(int)

# split data
target = 'Risk'
feature_cols = ['Checking account', 'Duration', 'Job', 'Housing_rent', 'Credit amount', 'Purpose_radio/TV', 'Purpose_car', 'Saving accounts', 'Housing_own', 'Purpose_education', 'Purpose_repairs', 'Purpose_furniture/equipment']
X = df[feature_cols]  # Features
y = df[target]  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Print results
print(f"Model Accuracy: {accuracy:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)

Training set shape: (800, 12)
Testing set shape: (200, 12)
Model Accuracy: 0.72

Confusion Matrix:
[[ 13  46]
 [  9 132]]

Classification Report:
              precision    recall  f1-score   support

           0       0.59      0.22      0.32        59
           1       0.74      0.94      0.83       141

    accuracy                           0.72       200
   macro avg       0.67      0.58      0.57       200
weighted avg       0.70      0.72      0.68       200



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Saving accounts'].fillna("No Account", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Checking account'].fillna("No Account", inplace=True)


In [5]:
# Print results
print(f"Model Accuracy: {accuracy:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)

Model Accuracy: 0.72

Confusion Matrix:
[[ 13  46]
 [  9 132]]

Classification Report:
              precision    recall  f1-score   support

           0       0.59      0.22      0.32        59
           1       0.74      0.94      0.83       141

    accuracy                           0.72       200
   macro avg       0.67      0.58      0.57       200
weighted avg       0.70      0.72      0.68       200



In [6]:
compare_df = pd.DataFrame({
    'Actual': y_test,
    'Predicted': y_pred
})

In [7]:
print(compare_df.shape)
compare_df[compare_df['Actual'] == compare_df['Predicted']]

(200, 2)


Unnamed: 0,Actual,Predicted
737,1,1
740,1,1
660,1,1
411,1,1
678,1,1
...,...,...
408,1,1
332,0,0
208,1,1
613,1,1
