In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [2]:
# Load the dataset
data_path = r'credit_scoring_dataset_large.xlsx'
df = pd.read_excel(data_path)

In [3]:
def calculate_credit_score(row):
    credit_score = 0

    # Payment History - On-Time Payments
    if row['payment_history'] == 'excellent':
        credit_score += 100
    elif row['payment_history'] == 'good':
        credit_score += 80
    elif row['payment_history'] == 'fair':
        credit_score += 50
    elif row['payment_history'] == 'poor':
        credit_score += 20

    # Late Payments
    if row['late_payments'] == 0:
        credit_score += 30
    elif 1 <= row['late_payments'] <= 2:
        credit_score += 20
    elif 3 <= row['late_payments'] <= 4:
        credit_score += 10
    elif row['late_payments'] > 4:
        credit_score -= 20

    # Defaults and Bankruptcies
    if row['bankruptcies'] == 0:
        credit_score += 50
    elif row['bankruptcies'] == 1:
        credit_score -= 50
    elif row['bankruptcies'] > 1:
        credit_score -= 100

    # Amounts Owed - Credit Utilization Ratio
    if row['credit_utilization'] < 10:
        credit_score += 50
    elif row['credit_utilization'] < 30:
        credit_score += 30
    elif row['credit_utilization'] < 50:
        credit_score += 10
    elif row['credit_utilization'] > 50:
        credit_score -= 30

    # Amounts Owed - Outstanding Balances
    if row['outstanding_balances'] < 1000:
        credit_score += 20
    elif row['outstanding_balances'] < 5000:
        credit_score += 10
    elif row['outstanding_balances'] > 5000:
        credit_score -= 20

    # Length of Credit History - Age of Oldest Account
    if row['oldest_account_age'] > 10:
        credit_score += 40
    elif row['oldest_account_age'] > 5:
        credit_score += 20
    elif row['oldest_account_age'] < 3:
        credit_score += 10

    # Length of Credit History - Average Age of Accounts
    if row['avg_account_age'] > 7:
        credit_score += 30
    elif row['avg_account_age'] > 5:
        credit_score += 20
    elif row['avg_account_age'] < 3:
        credit_score += 10

    # Credit Mix - Types of Credit Accounts
    if row['credit_mix'] == 'diverse':
        credit_score += 20
    elif row['credit_mix'] == 'moderate':
        credit_score += 10
    elif row['credit_mix'] == 'limited':
        credit_score += 0

    # New Credit - Recent Credit Inquiries
    if row['recent_inquiries'] == 0:
        credit_score += 20
    elif row['recent_inquiries'] <= 2:
        credit_score += 10
    elif row['recent_inquiries'] > 2:
        credit_score -= 10

    # Income Levels - Employment Income
    if row['employment_income'] > 100000:
        credit_score += 50
    elif row['employment_income'] > 50000:
        credit_score += 30
    elif row['employment_income'] < 50000:
        credit_score += 10

    # Income Levels - Bonus/Commission Income
    if row['bonus_commission_income'] > 10000:
        credit_score += 20
    elif row['bonus_commission_income'] > 5000:
        credit_score += 10
    elif row['bonus_commission_income'] < 5000:
        credit_score += 0

    # Employment Status - Job Stability
    if row['job_stability'] == 'stable':
        credit_score += 30
    elif row['job_stability'] == 'moderate':
        credit_score += 15
    elif row['job_stability'] == 'unstable':
        credit_score += 0

    # Employment Status - Industry Sector
    if row['industry_stability'] == 'stable':
        credit_score += 20
    elif row['industry_stability'] == 'moderate':
        credit_score += 10
    elif row['industry_stability'] == 'unstable':
        credit_score += 0

    # Existing Debts - Outstanding Loans
    if row['outstanding_loans'] < 5000:
        credit_score += 20
    elif row['outstanding_loans'] < 20000:
        credit_score += 10
    elif row['outstanding_loans'] > 20000:
        credit_score -= 20

    # Existing Debts - Credit Card Balances
    if row['credit_card_balances'] < 1000:
        credit_score += 20
    elif row['credit_card_balances'] < 5000:
        credit_score += 10
    elif row['credit_card_balances'] > 5000:
        credit_score -= 20

    # Utility Data - Bills Payment History
    if row['utility_bills_payment_history'] == 'excellent':
        credit_score += 30
    elif row['utility_bills_payment_history'] == 'good':
        credit_score += 20
    elif row['utility_bills_payment_history'] == 'fair':
        credit_score += 10
    elif row['utility_bills_payment_history'] == 'poor':
        credit_score += 0

    return credit_score

In [4]:
df['credit_score'] = df.apply(calculate_credit_score, axis=1)

In [5]:
# Preprocess the data
categorical_features = ['payment_history', 'credit_mix', 'job_stability', 'industry_stability', 'utility_bills_payment_history']
numerical_features = ['late_payments', 'bankruptcies', 'credit_utilization', 'outstanding_balances', 'oldest_account_age', 
                      'avg_account_age', 'recent_inquiries', 'employment_income', 'bonus_commission_income', 'outstanding_loans', 
                      'credit_card_balances']

In [6]:
encoder = OneHotEncoder()
encoded_features = encoder.fit_transform(df[categorical_features]).toarray()

# Scale numerical features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[numerical_features])

# Combine encoded and scaled features
X = np.hstack((encoded_features, scaled_features))
y = df['credit_score']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# Define the neural network model
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))


In [8]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

In [9]:
# Train the model
model.fit(X_train, y_train, epochs=500, batch_size=32, validation_split=0.2)


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x1e9107a5f70>

In [10]:
# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f'Model Loss: {loss}')

Model Loss: 972.4888916015625


In [11]:
import os

# Define the directory and file path
directory = r'C:\Users\aniru\Machine Learning\Humanize'
file_path = os.path.join(directory, 'credit_score_model.h5')

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Save the model
model.save(file_path)

In [17]:
# Encode categorical features
encoder = OneHotEncoder(sparse=False)
encoded_features = encoder.fit_transform(df[categorical_features])

# Scale numerical features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[numerical_features])




In [18]:
import pandas as pd
import numpy as np

def neural_expert_system(input_data):
    # Ensure input_data is in a DataFrame format
    input_df = pd.DataFrame([input_data])

    # Preprocess the input data similarly to the training data
    encoded_input = encoder.transform(input_df[categorical_features]).toarray()
    scaled_input = scaler.transform(input_df[numerical_features])
    combined_input = np.hstack((encoded_input, scaled_input))

    # Predict the credit score using the neural network model
    predicted_score = model.predict(combined_input)
    return predicted_score[0][0]

# Example input data
input_data = {
    'payment_history': 'good',
    'late_payments': 1,
    'bankruptcies': 0,
    'credit_utilization': 20,
    'outstanding_balances': 1500,
    'oldest_account_age': 6,
    'avg_account_age': 5,
    'credit_mix': 'moderate',
    'recent_inquiries': 1,
    'employment_income': 60000,
    'bonus_commission_income': 5000,
    'job_stability': 'moderate',
    'industry_stability': 'moderate',
    'outstanding_loans': 10000,
    'credit_card_balances': 2000,
    'utility_bills_payment_history': 'good'
}

predicted_score = neural_expert_system(input_data)
print(f'Predicted Credit Score: {predicted_score}')


AttributeError: 'numpy.ndarray' object has no attribute 'toarray'

In [13]:
def load_neural_expert_system_model():
    global model, encoder, scaler
    model = load_model('credit_score_model.h5')


In [14]:
load_neural_expert_system_model()

In [15]:
# Example input data
input_data = {
    'payment_history': 'good',
    'late_payments': 1,
    'bankruptcies': 0,
    'credit_utilization': 20,
    'outstanding_balances': 1500,
    'oldest_account_age': 6,
    'avg_account_age': 5,
    'credit_mix': 'moderate',
    'recent_inquiries': 2,
    'employment_income': 60000,
    'bonus_commission_income': 5000,
    'job_stability': 'stable',
    'industry_stability': 'stable',
    'outstanding_loans': 10000,
    'credit_card_balances': 2000,
    'utility_bills_payment_history': 'good'
}

In [19]:
predicted_score = neural_expert_system(input_data)
print(f'Predicted Credit Score: {predicted_score}')


AttributeError: 'numpy.ndarray' object has no attribute 'toarray'