# Credit Scoring Model

**Credit scoring model to predict the creditworthiness of individuals based on historical financial data. Utilize classification algorithms and assess the model's accuracy.**

## 1- Importing Libraries

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Conv1D, Flatten, MaxPooling1D
from tensorflow.keras.layers import Dropout

2024-07-15 23:50:15.746675: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-15 23:50:15.749407: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-15 23:50:15.756434: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-15 23:50:15.767920: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-15 23:50:15.771304: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-15 23:50:15.780279: I tensorflow/core/platform/cpu_feature_gu

## 2- Preprocess the Data

In [2]:
# Load Data from CSV file
dataset = pd.read_csv('Data.csv')

In [3]:
# Dataset shape
dataset.shape

(3000, 30)

In [4]:
# Discover any missing values
dataset.isnull().sum()

TARGET               0
ID                   0
DerogCnt             0
CollectCnt           0
BanruptcyInd         0
InqCnt06             0
InqTimeLast        188
InqFinanceCnt24      0
TLTimeFirst          0
TLTimeLast           0
TLCnt03              0
TLCnt12              0
TLCnt24              0
TLCnt                3
TLSum               40
TLMaxSum            40
TLSatCnt             4
TLDel60Cnt           0
TLBadCnt24           0
TL75UtilCnt         99
TL50UtilCnt         99
TLBalHCPct          41
TLSatPct             4
TLDel3060Cnt24       0
TLDel90Cnt24         0
TLDel60CntAll        0
TLOpenPct            3
TLBadDerogCnt        0
TLDel60Cnt24         0
TLOpen24Pct          3
dtype: int64

In [5]:
# Convert the strings to numeriacal data
def clean_and_convert(value):
    if isinstance(value, str):
        value = value.replace('$', '').replace(',', '').replace('%', '').strip()
        return float(value)
    return value

# Apply the cleaning function to specific columns
columns_to_clean = ["TLSum", "TLMaxSum", "TLBalHCPct", "TLSatPct", "TLOpenPct", "TLOpen24Pct"]
for col in columns_to_clean:
    dataset[col] = dataset[col].apply(clean_and_convert)

In [6]:
# Replance the missing values with the mean of the column
dataset = dataset.fillna(dataset.mean())

In [7]:
print(dataset.shape)

(3000, 30)


## 3- Model Training

In [8]:
# Split the dataset into features and target
X = dataset.drop('TARGET', axis=1)
y = dataset['TARGET']

In [9]:
# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43)

In [10]:
# Standerized the data 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
# Reshape the data for Conv1D
X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)
X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], X_test_scaled.shape[1], 1)

In [12]:
# Build the CNN model
model = Sequential([
    Input(shape=(X_train_scaled.shape[1], 1)),
    Conv1D(32, kernel_size=2, activation='relu'),
    Dropout(0.2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

NameError: name 'Dropout' is not defined

In [None]:
# Compile the model
model.compile(optimizer='RMSprop', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test))

## 4- Model Evaluation

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print(f'Test Accuracy: {accuracy:.2f}')