**Importing the required packages **


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier


**Generate the synthetic data**

In [10]:
np.random.seed(42)
n_samples = 1000
data = pd.DataFrame({
    'status': np.random.choice(['good', 'bad'], n_samples),
    'duration': np.random.randint(1, 48, n_samples),
    'credit_history': np.random.choice(['no_credit', 'good', 'poor'], n_samples),
    'purpose': np.random.choice(['car', 'education', 'furniture', 'business'], n_samples),
    'amount': np.random.randint(100, 10000, n_samples),
    'savings': np.random.choice(['low', 'medium', 'high'], n_samples),
    'employment_duration': np.random.choice(['<1', '1-4', '4-7', '7+'], n_samples),
    'installment_rate': np.random.randint(1, 5, n_samples),
    'personal_status_sex': np.random.choice(['male', 'female'], n_samples),
    'other_debtors': np.random.choice(['none', 'guarantor', 'co-applicant'], n_samples),
    'present_residence': np.random.randint(1, 5, n_samples),
    'property': np.random.choice(['real_estate', 'savings', 'car', 'other'], n_samples),
    'age': np.random.randint(18, 75, n_samples),
    'other_installment_plans': np.random.choice(['none', 'bank', 'stores'], n_samples),
    'housing': np.random.choice(['own', 'rent', 'free'], n_samples),
    'number_credits': np.random.randint(1, 5, n_samples),
    'job': np.random.choice(['skilled', 'unskilled', 'management', 'self-employed'], n_samples),
    'people_liable': np.random.randint(1, 3, n_samples),
    'telephone': np.random.choice(['yes', 'no'], n_samples),
    'foreign_worker': np.random.choice(['yes', 'no'], n_samples),
    'credit_risk': np.random.choice([0, 1], n_samples)
})


Encode categorical variables

In [11]:
ategorical_columns = ['status', 'credit_history', 'purpose', 'savings', 'employment_duration',
                       'personal_status_sex', 'other_debtors', 'property', 'other_installment_plans',
                       'housing', 'job', 'telephone', 'foreign_worker']

label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le


Split the data into features and target as well as the data into training and testing sets


In [12]:
X = data.drop('credit_risk', axis=1)
y = data['credit_risk']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

Scale the features

In [13]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

 Train a Random Forest model as an example

In [14]:
best_model = RandomForestClassifier()
best_model.fit(X_train, y_train)


Example synthetic test case

In [15]:

test_case = {
    'status': 'good',
    'duration': 24,
    'credit_history': 'good',
    'purpose': 'car',
    'amount': 5000,
    'savings': 'medium',
    'employment_duration': '1-4',
    'installment_rate': 2,
    'personal_status_sex': 'male',
    'other_debtors': 'none',
    'present_residence': 3,
    'property': 'real_estate',
    'age': 35,
    'other_installment_plans': 'none',
    'housing': 'own',
    'number_credits': 2,
    'job': 'skilled',
    'people_liable': 1,
    'telephone': 'yes',
    'foreign_worker': 'yes'
}



Convert the test case to a DataFrame

In [18]:

test_df = pd.DataFrame([test_case])
test_df

Unnamed: 0,status,duration,credit_history,purpose,amount,savings,employment_duration,installment_rate,personal_status_sex,other_debtors,present_residence,property,age,other_installment_plans,housing,number_credits,job,people_liable,telephone,foreign_worker
0,good,24,good,car,5000,medium,1-4,2,male,none,3,real_estate,35,none,own,2,skilled,1,yes,yes


***Encode categorical variables using the same label encoders used for training***

In [17]:


for col, le in label_encoders.items():
    test_df[col] = le.transform(test_df[col])



** Scale the features using the same scaler used for training and predict the creditworthiness**

In [22]:

test_df_scaled = scaler.transform(test_df)


creditworthiness_prediction = best_model.predict(test_df_scaled)
creditworthiness_prob = best_model.predict_proba(test_df_scaled)

print(f"Predicted Creditworthiness: {'Good' if creditworthiness_prediction[0] == 1 else 'Bad'}")
print(f"Prediction Probabilities: {creditworthiness_prob}")

Predicted Creditworthiness: Bad
Prediction Probabilities: [[0.52 0.48]]
