In [27]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder, OneHotEncoder, OrdinalEncoder


In [28]:
df = pd.read_csv('https://github.com/Lakshay-Gautam-767/Datasets/raw/main/credit_scoring.csv')

In [29]:
df.isnull().sum()

Unnamed: 0,0
Age,0
Gender,0
Marital Status,0
Education Level,0
Employment Status,0
Credit Utilization Ratio,0
Payment History,0
Number of Credit Accounts,0
Loan Amount,0
Interest Rate,0


In [30]:
df.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan
0,60,Male,Married,Master,Employed,0.22,2685.0,2,4675000,2.65,48,Personal Loan
1,25,Male,Married,High School,Unemployed,0.2,2371.0,9,3619000,5.19,60,Auto Loan
2,30,Female,Single,Master,Employed,0.22,2771.0,6,957000,2.76,12,Auto Loan
3,58,Female,Married,PhD,Unemployed,0.12,1371.0,2,4731000,6.57,60,Auto Loan
4,32,Male,Married,Bachelor,Self-Employed,0.99,828.0,2,3289000,6.28,36,Personal Loan


In [31]:
# lb = LabelEncoder()
# df['Gender'] = lb.fit_transform(df['Gender'])
# df['Marital Status'] = lb.fit_transform(df['Marital Status'])
# df['Education Level'] = lb.fit_transform(df['Education Level'])
# df['Employment Status'] = lb.fit_transform(df['Employment Status'])
# df['Type of Loan'] = lb.fit_transform(df['Type of Loan'])

In [32]:
df.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan
0,60,Male,Married,Master,Employed,0.22,2685.0,2,4675000,2.65,48,Personal Loan
1,25,Male,Married,High School,Unemployed,0.2,2371.0,9,3619000,5.19,60,Auto Loan
2,30,Female,Single,Master,Employed,0.22,2771.0,6,957000,2.76,12,Auto Loan
3,58,Female,Married,PhD,Unemployed,0.12,1371.0,2,4731000,6.57,60,Auto Loan
4,32,Male,Married,Bachelor,Self-Employed,0.99,828.0,2,3289000,6.28,36,Personal Loan


In [33]:
x = df.drop(columns = 'Loan Amount')
y = df['Loan Amount']

In [34]:
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size = 0.2,
                                                    random_state = 42)

In [35]:
x_train

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Interest Rate,Loan Term,Type of Loan
29,28,Female,Married,PhD,Unemployed,0.71,2028.0,1,2.43,24,Home Loan
535,64,Male,Married,Bachelor,Unemployed,0.05,1114.0,2,12.61,60,Personal Loan
695,20,Male,Divorced,Bachelor,Self-Employed,0.56,2542.0,8,11.22,12,Home Loan
557,47,Female,Married,High School,Self-Employed,0.35,1771.0,9,16.76,48,Home Loan
836,52,Female,Single,Bachelor,Self-Employed,0.71,2000.0,2,5.25,60,Personal Loan
...,...,...,...,...,...,...,...,...,...,...,...
106,62,Male,Single,Master,Unemployed,0.94,1542.0,1,12.41,36,Personal Loan
270,49,Female,Married,High School,Employed,0.81,1085.0,10,5.09,36,Personal Loan
860,63,Female,Divorced,Master,Unemployed,0.50,2571.0,4,18.70,36,Personal Loan
435,31,Male,Married,High School,Self-Employed,0.19,114.0,10,13.50,60,Home Loan


In [36]:
# StandardScaler and Encoding Column Transformer

In [37]:
from sklearn.compose import ColumnTransformer   # This is how to import ColumnTransformer

In [38]:
# Column Transformer define karna
# 1. StandardScaler for Numerical Columns
# 2. OrdinalEncoder for Categorical Columns
num_cols = ['Age', 'Credit Utilization Ratio', 'Payment History', 'Number of Credit Accounts', 'Interest Rate', 'Loan Term']
cat_cols = ['Gender', 'Marital Status', 'Education Level', 'Employment Status', 'Type of Loan']

transformer = ColumnTransformer(transformers=[
    ('tnf1', StandardScaler(), num_cols),
    ('tnf2', OrdinalEncoder(), cat_cols)
], remainder='passthrough')

# Applying transformation
X_train_transformed = transformer.fit_transform(x_train)
X_test_transformed = transformer.transform(x_test)

print("Transformed Shape:", X_train_transformed.shape)

Transformed Shape: (800, 11)


In [41]:
X_train_transformed

array([[-1.12134862,  0.67278248,  0.67327775, ...,  3.        ,
         2.        ,  1.        ],
       [ 1.60142689, -1.58364354, -0.44271883, ...,  0.        ,
         2.        ,  2.        ],
       [-1.72640985,  0.15995838,  1.3008732 , ...,  0.        ,
         1.        ,  1.        ],
       ...,
       [ 1.52579423, -0.04517126,  1.33628228, ...,  2.        ,
         2.        ,  2.        ],
       [-0.89445066, -1.10500772, -1.66372166, ...,  1.        ,
         1.        ,  1.        ],
       [-0.21375679,  0.36508802,  0.77828399, ...,  0.        ,
         1.        ,  1.        ]])

In [45]:
#@title **Standard Scaler Checking, It's Done**
X_train_transformed_new = pd.DataFrame(X_train_transformed, columns = x_train.columns)
np.round(X_train_transformed_new.describe(), 2)

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Interest Rate,Loan Term,Type of Loan
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,-0.0,0.0,-0.0,-0.0,0.0,0.0,0.5,1.0,1.51,1.01,1.0
std,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.81,1.11,0.81,0.82
min,-1.73,-1.75,-1.8,-1.57,-1.76,-1.47,0.0,0.0,0.0,0.0,0.0
25%,-0.84,-0.93,-0.83,-0.89,-0.84,-0.78,0.0,0.0,1.0,0.0,0.0
50%,-0.06,0.06,-0.02,0.13,-0.02,-0.09,1.0,1.0,2.0,1.0,1.0
75%,0.86,0.88,0.85,0.81,0.87,0.61,1.0,2.0,2.0,2.0,2.0
max,1.68,1.66,1.69,1.49,1.73,1.3,1.0,2.0,3.0,2.0,2.0


In [46]:
#@title **Encoding Checking, It's Done**
X_train_transformed_new.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Interest Rate,Loan Term,Type of Loan
0,-1.121349,0.672782,0.673278,-1.568533,-1.497783,-0.779005,0.0,1.0,3.0,2.0,1.0
1,1.601427,-1.583644,-0.442719,-1.228656,0.373848,1.300653,1.0,1.0,0.0,2.0,2.0
2,-1.72641,0.159958,1.300873,0.810607,0.118291,-1.472224,1.0,0.0,0.0,1.0,1.0
3,0.315672,-0.557995,0.35948,1.150484,1.136841,0.607433,0.0,1.0,1.0,1.0,1.0
4,0.693835,0.672782,0.63909,-1.228656,-0.979315,1.300653,0.0,2.0,0.0,1.0,2.0
