In [1]:
!pip install scikit-learn tensorflow pygad joblib pandas numpy matplotlib shap

Collecting pygad
  Downloading pygad-3.5.0-py3-none-any.whl.metadata (20 kB)
Downloading pygad-3.5.0-py3-none-any.whl (89 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.6/89.6 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pygad
Successfully installed pygad-3.5.0


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import os

In [5]:
data_path = '/content/drive/MyDrive/neuro/dataset.csv'
df = pd.read_csv(data_path)

# Basic info and head to inspect
print(df.info())
print(df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Unnamed: 0        1000 non-null   int64 
 1   Age               1000 non-null   int64 
 2   Sex               1000 non-null   object
 3   Job               1000 non-null   int64 
 4   Housing           1000 non-null   object
 5   Saving accounts   817 non-null    object
 6   Checking account  606 non-null    object
 7   Credit amount     1000 non-null   int64 
 8   Duration          1000 non-null   int64 
 9   Purpose           1000 non-null   object
dtypes: int64(5), object(5)
memory usage: 78.3+ KB
None
   Unnamed: 0  Age     Sex  Job Housing Saving accounts Checking account  \
0           0   67    male    2     own             NaN           little   
1           1   22  female    2     own          little         moderate   
2           2   49    male    1     own          little   

In [7]:
# Drop unnamed index column if exists
if 'Unnamed: 0' in df.columns:
    df.drop(columns=['Unnamed: 0'], inplace=True)

# Check for missing values where 'NA' is a string and replace them with np.nan for imputation
df.replace('NA', np.nan, inplace=True)

# For simplicity, fill missing categorical values with 'missing'
for col in ['Saving accounts', 'Checking account']:
    df[col].fillna('missing', inplace=True)

# Encode categorical columns using LabelEncoder
categorical_cols = ['Sex', 'Housing', 'Saving accounts', 'Checking account', 'Purpose']
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

# Check for target column (assuming 'Risk' or 'credit_risk', if not present set dummy target for example)
# For demo purpose: Let's assume a synthetic target based on Credit amount threshold (replace with your target)
if 'Risk' not in df.columns:
    df['Risk'] = (df['Credit amount'] > df['Credit amount'].median()).astype(int)

# Features and target
X = df.drop(columns=['Risk'])
y = df['Risk']

# Scale numeric features (Age, Credit amount, Duration)
numeric_cols = ['Age', 'Credit amount', 'Duration']
scaler = StandardScaler()
X[numeric_cols] = scaler.fit_transform(X[numeric_cols])

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

print("Preprocessing complete. Training samples:", X_train.shape[0])

Preprocessing complete. Training samples: 800


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna('missing', inplace=True)


In [8]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop],
    verbose=2
)

Epoch 1/100
20/20 - 3s - 126ms/step - accuracy: 0.5063 - loss: 0.7572 - val_accuracy: 0.6875 - val_loss: 0.6284
Epoch 2/100
20/20 - 0s - 10ms/step - accuracy: 0.6531 - loss: 0.6199 - val_accuracy: 0.8188 - val_loss: 0.5522
Epoch 3/100
20/20 - 0s - 11ms/step - accuracy: 0.7297 - loss: 0.5620 - val_accuracy: 0.8438 - val_loss: 0.4842
Epoch 4/100
20/20 - 0s - 13ms/step - accuracy: 0.7688 - loss: 0.4920 - val_accuracy: 0.8625 - val_loss: 0.4303
Epoch 5/100
20/20 - 0s - 11ms/step - accuracy: 0.7875 - loss: 0.4499 - val_accuracy: 0.8687 - val_loss: 0.3935
Epoch 6/100
20/20 - 0s - 10ms/step - accuracy: 0.8375 - loss: 0.3853 - val_accuracy: 0.8687 - val_loss: 0.3501
Epoch 7/100
20/20 - 0s - 12ms/step - accuracy: 0.8188 - loss: 0.3827 - val_accuracy: 0.8625 - val_loss: 0.3227
Epoch 8/100
20/20 - 0s - 11ms/step - accuracy: 0.8687 - loss: 0.3452 - val_accuracy: 0.9000 - val_loss: 0.2951
Epoch 9/100
20/20 - 0s - 12ms/step - accuracy: 0.8641 - loss: 0.3263 - val_accuracy: 0.8750 - val_loss: 0.2837


In [10]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9530 - loss: 0.0862  
Test Loss: 0.0776
Test Accuracy: 0.9650


In [11]:
save_dir = '/content/drive/MyDrive/credit_risk_model/'
os.makedirs(save_dir, exist_ok=True)

model.save(os.path.join(save_dir, 'nn_credit_risk_model.h5'))

import joblib
joblib.dump(scaler, os.path.join(save_dir, 'scaler.save'))

print("Model and scaler saved.")



Model and scaler saved.
