# Credit Scoring Model

**Credit scoring model to predict the creditworthiness of individuals based on historical financial data. Utilize classification algorithms and assess the model's accuracy.**

## 1- Importing Libraries

In [215]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

## 2- Preprocess the Data

In [216]:
# Load Data from CSV file
dataset = pd.read_csv('Data.csv')

In [217]:
# Dataset shape
dataset.shape

(3000, 30)

In [218]:
# Discover any missing values
dataset.isnull().sum()

TARGET               0
ID                   0
DerogCnt             0
CollectCnt           0
BanruptcyInd         0
InqCnt06             0
InqTimeLast        188
InqFinanceCnt24      0
TLTimeFirst          0
TLTimeLast           0
TLCnt03              0
TLCnt12              0
TLCnt24              0
TLCnt                3
TLSum               40
TLMaxSum            40
TLSatCnt             4
TLDel60Cnt           0
TLBadCnt24           0
TL75UtilCnt         99
TL50UtilCnt         99
TLBalHCPct          41
TLSatPct             4
TLDel3060Cnt24       0
TLDel90Cnt24         0
TLDel60CntAll        0
TLOpenPct            3
TLBadDerogCnt        0
TLDel60Cnt24         0
TLOpen24Pct          3
dtype: int64

In [219]:
# Convert the strings to numeriacal data
def clean_and_convert(value):
    if isinstance(value, str):
        value = value.replace('$', '').replace(',', '').replace('%', '').strip()
        return float(value)
    return value

# Apply the cleaning function to specific columns
columns_to_clean = ["TLSum", "TLMaxSum", "TLBalHCPct", "TLSatPct", "TLOpenPct", "TLOpen24Pct"]
for col in columns_to_clean:
    dataset[col] = dataset[col].apply(clean_and_convert)

In [220]:
# Replance the missing values with the mean of the column
dataset = dataset.fillna(dataset.mean())

In [221]:
print(dataset.shape)

(3000, 30)


## 3- Model Training

In [222]:
# Devide the data into features and target
y = dataset.iloc[:, 0].values
X = dataset.iloc[:, 1:28].values

In [223]:
# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [224]:
# Standerized the data 
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [225]:
# Setting up the classifier
classifier = RandomForestClassifier()

In [226]:
# Training the model
classifier.fit(X_train, y_train)
prediction = classifier.predict(X_test)

## 4- Model Evaluation

In [227]:
print("Accuracy:", accuracy_score(y_test, prediction))
print("Confusion Matrix:\n", confusion_matrix(y_test, prediction))
print("Classification Report:\n", classification_report(y_test, prediction))

Accuracy: 0.835
Confusion Matrix:
 [[492   6]
 [ 93   9]]
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.99      0.91       498
           1       0.60      0.09      0.15       102

    accuracy                           0.83       600
   macro avg       0.72      0.54      0.53       600
weighted avg       0.80      0.83      0.78       600

