In [2]:
import os
print(os.getcwd())

c:\Users\naren\Desktop\New_DS\LR_BankLoan\notebook


In [3]:
import pandas as pd

df = pd.read_csv('data/processed/cleaned_loan_data.csv')
df.head()

Unnamed: 0,amount_requested,amount_funded_by_investors,interest_rate,loan_length,debt_to_income_ratio,home_ownership,monthly_income,fico_range,open_credit_lines,revolving_credit_balance,inquiries_in_the_last_6_months,loan_purpose_grouped,state_region,employment_length_group
0,25000.0,25000.0,18.49,60.0,27.56,mortgage,8606.56,722.0,11.0,15210.0,3.0,debt_related,southeast,medium
1,19750.0,19750.0,17.27,60.0,13.39,mortgage,6737.5,712.0,14.0,19070.0,3.0,debt_related,northeast,medium
2,2100.0,2100.0,14.33,36.0,3.5,own,1000.0,692.0,13.0,893.0,1.0,large_purchase,southeast,very_short
3,28000.0,28000.0,16.29,36.0,19.62,mortgage,7083.33,712.0,12.0,38194.0,1.0,debt_related,west,long
4,24250.0,17431.82,12.23,60.0,23.79,mortgage,5833.33,732.0,6.0,31061.0,2.0,debt_related,midwest,long


In [4]:
# Step 1: Identify numeric columns only
numeric_cols = df.select_dtypes(include='number').columns
print(numeric_cols)

Index(['amount_requested', 'amount_funded_by_investors', 'interest_rate',
       'loan_length', 'debt_to_income_ratio', 'monthly_income', 'fico_range',
       'open_credit_lines', 'revolving_credit_balance',
       'inquiries_in_the_last_6_months'],
      dtype='object')


In [5]:
# Step 2: Check outliers using IQR method
def detect_outliers_iqr(series):
    Q1 = series.quantile(0.25)
    Q3 = series.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return ((series < lower_bound) | (series > upper_bound)).sum()

# Step 3: Apply to all numeric columns
outlier_counts = {col: detect_outliers_iqr(df[col]) for col in numeric_cols}

# Step 4: Display nicely
outlier_df = pd.DataFrame.from_dict(outlier_counts, orient='index', columns=['Outlier Count'])
outlier_df = outlier_df[outlier_df['Outlier Count'] > 0]  # Show only columns with outliers
outlier_df

Unnamed: 0,Outlier Count
amount_requested,50
amount_funded_by_investors,55
interest_rate,5
loan_length,476
monthly_income,109
fico_range,49
open_credit_lines,26
revolving_credit_balance,124
inquiries_in_the_last_6_months,222


# ✅ Categorical Data – Outlier Handling


| Method                | Description                             | Example                       |
| --------------------- | --------------------------------------- | ----------------------------- |
| Group Rare Categories | Merge infrequent values into "Other"    | `'none'` → `'other'`          |
| Ordinal Mapping       | Map to numerical if order exists        | `'low'`→1, `'medium'`→2, etc. |
| Label Encoding        | Assign integers to categories           | `'rent'`→0, `'own'`→1...      |
| One-Hot Encoding      | Create binary columns for each category | `'mortgage'` → `[1,0,0]`      |


# 📊 Numeric Data – Outlier Treatment (Industry Standards)

| Scenario              | Common Practice                     | Why It’s Used                             |
| --------------------- | ----------------------------------- | ----------------------------------------- |
| Predictive Modeling   | **Capping** at 1st/99th percentile  | Keeps structure, reduces noise            |
| Obvious Data Errors   | **Remove** extreme wrong values     | Avoid skewing models                      |
| Finance / Risk Models | **Winsorization** (cap, don’t drop) | Keeps data intact, limits distortion      |
| Regulatory Reporting  | **Keep + Flag** anomalies           | Needed for transparency                   |
| EDA (Exploratory)     | **Temporarily cap or keep**         | Helps visualize without permanent changes |


# Here, I will Winsorization (Capping) Outliers using Percentiles?

Capping means limiting extreme values in numeric columns so they don’t distort your analysis or ML model.

e.g: if data :

 incomes = [2000, 2200, 2500, 3000, 3500, 100000]

 If we cap at the 1st and 99th percentiles:

  np.clip() forces all values to stay within [low, high]

  New data will be like:

  income = [2000, 2200, 2500, 3000, 3500, 3500]

  100000 becomes 3500 (capped)

  Capping does not remove rows — it just limits extreme values to a safe threshold.

✅ Key Benefits:

Keeps all data points

Reduces impact of extreme outliers on mean, std, or ML models

Good for algorithms sensitive to outliers (like linear regression, KNN).

⚠️  Caveat:

You do lose the original extreme value, but not the row

It’s a trade-off: better model stability vs. keeping extreme raw values

So overall — no loss of row-level information, just controlled adjustments to extremes.

capping (via percentiles) works only for numeric data.



# What is Winsorization (Capping)?
It means:

Replacing extreme values (outliers) with a threshold value
instead of removing them.



# Capping plan:

| Column                           | Suggested Capping Method                       | Why?                                |
| -------------------------------- | ---------------------------------------------- | ----------------------------------- |
| `amount_requested`               | Cap at 1st & 99th percentiles (winsorization)  | Removes extreme loan requests       |
| `amount_funded_by_investors`     | Cap at high (e.g., 99th percentile)            | Matches funding to realistic values |
| `interest_rate`                  | Optional — cap only if visibly affecting model | Few outliers, mostly safe           |
| `loan_length`                    | ❌ Skip capping — it's categorical (36/60)      | Not a continuous variable           |
| `monthly_income`                 | Cap at 99th percentile                         | Avoid high-income skew              |
| `fico_range`                     | ❌ No capping — range is naturally 300–850        | Valid bounded range                 |
| `open_credit_lines`              | Cap at 95th percentile                         | Limit unusually high credit lines   |
| `revolving_credit_balance`       | Cap or log-transform                           | Handles heavy-tailed values         |
| `inquiries_in_the_last_6_months` | Cap at 3 or 4                                  | 0–3 is typical; higher may distort  |


# List of columns to cap

In [None]:
# Columns to cap (excluding 'inquiries_in_the_last_6_months')
cap_cols = [
    'amount_requested',
    'amount_funded_by_investors',
    'monthly_income',
    'revolving_credit_balance',
    'open_credit_lines',
    'interest_rate'
]

# Capping function
def cap_outliers(df, column, lower=0.01, upper=0.99):
    lower_bound = df[column].quantile(lower)
    upper_bound = df[column].quantile(upper)
    df[column] = df[column].clip(lower=lower_bound, upper=upper_bound)
    return df

# Apply capping
for col in cap_cols:
    df = cap_outliers(df, col)

# Now handle 'inquiries_in_the_last_6_months' separately
df['inquiries_in_the_last_6_months'] = df['inquiries_in_the_last_6_months'].clip(upper=3)

print("Capping complete, including special handling for inquiries.")



✅ Capping complete, including special handling for inquiries.


In [9]:
df.head()

Unnamed: 0,amount_requested,amount_funded_by_investors,interest_rate,loan_length,debt_to_income_ratio,home_ownership,monthly_income,fico_range,open_credit_lines,revolving_credit_balance,inquiries_in_the_last_6_months,loan_purpose_grouped,state_region,employment_length_group
0,25000.0,25000.0,18.49,60.0,27.56,mortgage,8606.56,722.0,11.0,15210.0,3.0,debt_related,southeast,medium
1,19750.0,19750.0,17.27,60.0,13.39,mortgage,6737.5,712.0,14.0,19070.0,3.0,debt_related,northeast,medium
2,2100.0,2100.0,14.33,36.0,3.5,own,1415.8366,692.0,13.0,893.0,1.0,large_purchase,southeast,very_short
3,28000.0,28000.0,16.29,36.0,19.62,mortgage,7083.33,712.0,12.0,38194.0,1.0,debt_related,west,long
4,24250.0,17431.82,12.23,60.0,23.79,mortgage,5833.33,732.0,6.0,31061.0,2.0,debt_related,midwest,long


#  Quick Note:

Since loan_length has only two discrete values (36 and 60 months), it's best treated as a categorical variable — not continuous.



In [10]:
print("Unique values in 'loan_length':", df['loan_length'].unique())

Unique values in 'loan_length': [60. 36.]


# Convert loan_length to a string or category:

In [11]:
df['loan_length'] = df['loan_length'].astype(str)  # or .astype('category')
print("Unique values in 'loan_length' after conversion:", df['loan_length'].unique())

Unique values in 'loan_length' after conversion: ['60.0' '36.0']


# Encoding Strategy for categorical columns Based on Data Type:

Label Encoding → for variables with natural order (ordinal)

One-Hot Encoding → for variables with no order (nominal)


| **Encoding Type**    | **When to Use**             | **Example Column**        | **Example Values**                | **Encoding Example**                                      |
| -------------------- | --------------------------- | ------------------------- | --------------------------------- | --------------------------------------------------------- |
| **Label Encoding**   | Natural order (**Ordinal**) | `employment_length_group` | very\_short < short < long        | very\_short → 0, short → 1, long → 2                      |
|                      |                             | `loan_length`             | 36 < 60                           | 36 → 0, 60 → 1                                            |
| **One-Hot Encoding** | No order (**Nominal**)      | `home_ownership`          | rent, mortgage, own               | rent → \[1,0,0], mortgage → \[0,1,0], own → \[0,0,1]      |
|                      |                             | `state_region`            | west, northeast, south, etc.      | Each becomes its own column → e.g., `west` → \[0,0,1,0,0] |
|                      |                             | `loan_purpose_grouped`    | debt\_related, small\_business... | Each purpose becomes a column with 0 or 1                 |


In [12]:
df.head()

Unnamed: 0,amount_requested,amount_funded_by_investors,interest_rate,loan_length,debt_to_income_ratio,home_ownership,monthly_income,fico_range,open_credit_lines,revolving_credit_balance,inquiries_in_the_last_6_months,loan_purpose_grouped,state_region,employment_length_group
0,25000.0,25000.0,18.49,60.0,27.56,mortgage,8606.56,722.0,11.0,15210.0,3.0,debt_related,southeast,medium
1,19750.0,19750.0,17.27,60.0,13.39,mortgage,6737.5,712.0,14.0,19070.0,3.0,debt_related,northeast,medium
2,2100.0,2100.0,14.33,36.0,3.5,own,1415.8366,692.0,13.0,893.0,1.0,large_purchase,southeast,very_short
3,28000.0,28000.0,16.29,36.0,19.62,mortgage,7083.33,712.0,12.0,38194.0,1.0,debt_related,west,long
4,24250.0,17431.82,12.23,60.0,23.79,mortgage,5833.33,732.0,6.0,31061.0,2.0,debt_related,midwest,long


# Apply Label Encoding to Ordinal Columns

In [13]:
label_map_loan_length = {36.0: 0, 60.0: 1}
df['loan_length'] = df['loan_length'].map(label_map_loan_length)

label_map_emp = {'very_short': 0, 'short': 1, 'medium': 2, 'long': 3}
df['employment_length_group'] = df['employment_length_group'].map(label_map_emp)

#  Apply One-Hot Encoding to Nominal Columns

In [14]:
df = pd.get_dummies(df, columns=[
    'home_ownership',
    'loan_purpose_grouped',
    'state_region'
], drop_first=True)  # drop_first avoids multicollinearity
print("✅ One-hot encoding complete.")
df.head()

✅ One-hot encoding complete.


Unnamed: 0,amount_requested,amount_funded_by_investors,interest_rate,loan_length,debt_to_income_ratio,monthly_income,fico_range,open_credit_lines,revolving_credit_balance,inquiries_in_the_last_6_months,...,home_ownership_own,home_ownership_rent,loan_purpose_grouped_debt_related,loan_purpose_grouped_large_purchase,loan_purpose_grouped_other,loan_purpose_grouped_small_personal,state_region_northeast,state_region_southeast,state_region_southwest,state_region_west
0,25000.0,25000.0,18.49,,27.56,8606.56,722.0,11.0,15210.0,3.0,...,False,False,True,False,False,False,False,True,False,False
1,19750.0,19750.0,17.27,,13.39,6737.5,712.0,14.0,19070.0,3.0,...,False,False,True,False,False,False,True,False,False,False
2,2100.0,2100.0,14.33,,3.5,1415.8366,692.0,13.0,893.0,1.0,...,True,False,False,True,False,False,False,True,False,False
3,28000.0,28000.0,16.29,,19.62,7083.33,712.0,12.0,38194.0,1.0,...,False,False,True,False,False,False,False,False,False,True
4,24250.0,17431.82,12.23,,23.79,5833.33,732.0,6.0,31061.0,2.0,...,False,False,True,False,False,False,False,False,False,False


In [15]:
print(df['loan_length'].unique())


[nan]


In [17]:
# Re-import loan_length from the clean dataset
clean_df = pd.read_csv('data/processed/cleaned_loan_data.csv')
df['loan_length'] = clean_df['loan_length']

In [18]:
df.head()

Unnamed: 0,amount_requested,amount_funded_by_investors,interest_rate,loan_length,debt_to_income_ratio,monthly_income,fico_range,open_credit_lines,revolving_credit_balance,inquiries_in_the_last_6_months,...,home_ownership_own,home_ownership_rent,loan_purpose_grouped_debt_related,loan_purpose_grouped_large_purchase,loan_purpose_grouped_other,loan_purpose_grouped_small_personal,state_region_northeast,state_region_southeast,state_region_southwest,state_region_west
0,25000.0,25000.0,18.49,60.0,27.56,8606.56,722.0,11.0,15210.0,3.0,...,False,False,True,False,False,False,False,True,False,False
1,19750.0,19750.0,17.27,60.0,13.39,6737.5,712.0,14.0,19070.0,3.0,...,False,False,True,False,False,False,True,False,False,False
2,2100.0,2100.0,14.33,36.0,3.5,1415.8366,692.0,13.0,893.0,1.0,...,True,False,False,True,False,False,False,True,False,False
3,28000.0,28000.0,16.29,36.0,19.62,7083.33,712.0,12.0,38194.0,1.0,...,False,False,True,False,False,False,False,False,False,True
4,24250.0,17431.82,12.23,60.0,23.79,5833.33,732.0,6.0,31061.0,2.0,...,False,False,True,False,False,False,False,False,False,False


In [19]:
df['loan_length'] = df['loan_length'].map({36.0: 0, 60.0: 1})


In [20]:
df.head()

Unnamed: 0,amount_requested,amount_funded_by_investors,interest_rate,loan_length,debt_to_income_ratio,monthly_income,fico_range,open_credit_lines,revolving_credit_balance,inquiries_in_the_last_6_months,...,home_ownership_own,home_ownership_rent,loan_purpose_grouped_debt_related,loan_purpose_grouped_large_purchase,loan_purpose_grouped_other,loan_purpose_grouped_small_personal,state_region_northeast,state_region_southeast,state_region_southwest,state_region_west
0,25000.0,25000.0,18.49,1,27.56,8606.56,722.0,11.0,15210.0,3.0,...,False,False,True,False,False,False,False,True,False,False
1,19750.0,19750.0,17.27,1,13.39,6737.5,712.0,14.0,19070.0,3.0,...,False,False,True,False,False,False,True,False,False,False
2,2100.0,2100.0,14.33,0,3.5,1415.8366,692.0,13.0,893.0,1.0,...,True,False,False,True,False,False,False,True,False,False
3,28000.0,28000.0,16.29,0,19.62,7083.33,712.0,12.0,38194.0,1.0,...,False,False,True,False,False,False,False,False,False,True
4,24250.0,17431.82,12.23,1,23.79,5833.33,732.0,6.0,31061.0,2.0,...,False,False,True,False,False,False,False,False,False,False


# employment_length_group

In [None]:
print(df['employment_length_group'].unique())


[2 0 3 1]


| Category    | Encoded Value |
| ----------- | ------------- |
| very\_short | 0             |
| short       | 1             |
| medium      | 2             |
| long        | 3             |


In [23]:
reverse_map = {0: 'very_short', 1: 'short', 2: 'medium', 3: 'long'}
df['employment_length_group'].map(reverse_map).value_counts()


employment_length_group
long          1049
short          420
very_short     388
medium         343
Name: count, dtype: int64

In [24]:
print(df['employment_length_group'].unique())

[2 0 3 1]


#  verify in columns df

In [25]:
# Check one-hot encoded columns
print([col for col in df.columns if 'home_ownership_' in col])
print([col for col in df.columns if 'loan_purpose_grouped_' in col])
print([col for col in df.columns if 'state_region_' in col])

['home_ownership_own', 'home_ownership_rent']
['loan_purpose_grouped_debt_related', 'loan_purpose_grouped_large_purchase', 'loan_purpose_grouped_other', 'loan_purpose_grouped_small_personal']
['state_region_northeast', 'state_region_southeast', 'state_region_southwest', 'state_region_west']


In [26]:
df.head()

Unnamed: 0,amount_requested,amount_funded_by_investors,interest_rate,loan_length,debt_to_income_ratio,monthly_income,fico_range,open_credit_lines,revolving_credit_balance,inquiries_in_the_last_6_months,...,home_ownership_own,home_ownership_rent,loan_purpose_grouped_debt_related,loan_purpose_grouped_large_purchase,loan_purpose_grouped_other,loan_purpose_grouped_small_personal,state_region_northeast,state_region_southeast,state_region_southwest,state_region_west
0,25000.0,25000.0,18.49,1,27.56,8606.56,722.0,11.0,15210.0,3.0,...,False,False,True,False,False,False,False,True,False,False
1,19750.0,19750.0,17.27,1,13.39,6737.5,712.0,14.0,19070.0,3.0,...,False,False,True,False,False,False,True,False,False,False
2,2100.0,2100.0,14.33,0,3.5,1415.8366,692.0,13.0,893.0,1.0,...,True,False,False,True,False,False,False,True,False,False
3,28000.0,28000.0,16.29,0,19.62,7083.33,712.0,12.0,38194.0,1.0,...,False,False,True,False,False,False,False,False,False,True
4,24250.0,17431.82,12.23,1,23.79,5833.33,732.0,6.0,31061.0,2.0,...,False,False,True,False,False,False,False,False,False,False


# Feature Scaling only numeric columns (not one-hot encoded and label-encoded columns)

# Scaling is only for oContinuous numerical variables.

In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 21 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   amount_requested                     2200 non-null   float64
 1   amount_funded_by_investors           2200 non-null   float64
 2   interest_rate                        2200 non-null   float64
 3   loan_length                          2200 non-null   int64  
 4   debt_to_income_ratio                 2200 non-null   float64
 5   monthly_income                       2200 non-null   float64
 6   fico_range                           2200 non-null   float64
 7   open_credit_lines                    2200 non-null   float64
 8   revolving_credit_balance             2200 non-null   float64
 9   inquiries_in_the_last_6_months       2200 non-null   float64
 10  employment_length_group              2200 non-null   int64  
 11  home_ownership_own            

In [28]:
df.head()

Unnamed: 0,amount_requested,amount_funded_by_investors,interest_rate,loan_length,debt_to_income_ratio,monthly_income,fico_range,open_credit_lines,revolving_credit_balance,inquiries_in_the_last_6_months,...,home_ownership_own,home_ownership_rent,loan_purpose_grouped_debt_related,loan_purpose_grouped_large_purchase,loan_purpose_grouped_other,loan_purpose_grouped_small_personal,state_region_northeast,state_region_southeast,state_region_southwest,state_region_west
0,25000.0,25000.0,18.49,1,27.56,8606.56,722.0,11.0,15210.0,3.0,...,False,False,True,False,False,False,False,True,False,False
1,19750.0,19750.0,17.27,1,13.39,6737.5,712.0,14.0,19070.0,3.0,...,False,False,True,False,False,False,True,False,False,False
2,2100.0,2100.0,14.33,0,3.5,1415.8366,692.0,13.0,893.0,1.0,...,True,False,False,True,False,False,False,True,False,False
3,28000.0,28000.0,16.29,0,19.62,7083.33,712.0,12.0,38194.0,1.0,...,False,False,True,False,False,False,False,False,False,True
4,24250.0,17431.82,12.23,1,23.79,5833.33,732.0,6.0,31061.0,2.0,...,False,False,True,False,False,False,False,False,False,False


# 'inquiries_in_the_last_6_months' is a numeric variable originsally. so, let's scale it>

Since it's a numeric variable with limited range (e.g., 0–8), scaling won’t distort meaning, and it improves model performance and convergence in many ML algorithms.


In [29]:
df['inquiries_in_the_last_6_months'].unique()


array([3., 1., 2., 0.])

#  Identify columns to scale:

In [30]:
# Columns to scale (continuous numeric features only)
scale_cols = [
    'amount_requested', 'amount_funded_by_investors', 'interest_rate',
    'debt_to_income_ratio', 'monthly_income', 'fico_range',
    'open_credit_lines', 'revolving_credit_balance', 'inquiries_in_the_last_6_months'
]

# Apply Standard Scaling:

In [31]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[scale_cols] = scaler.fit_transform(df[scale_cols])
print("Scaling complete.")
df.head()

Scaling complete.


Unnamed: 0,amount_requested,amount_funded_by_investors,interest_rate,loan_length,debt_to_income_ratio,monthly_income,fico_range,open_credit_lines,revolving_credit_balance,inquiries_in_the_last_6_months,...,home_ownership_own,home_ownership_rent,loan_purpose_grouped_debt_related,loan_purpose_grouped_large_purchase,loan_purpose_grouped_other,loan_purpose_grouped_small_personal,state_region_northeast,state_region_southeast,state_region_southwest,state_region_west
0,1.594654,1.665245,1.310251,1,1.620888,0.961061,0.39472,0.214405,0.024339,2.157924,...,False,False,True,False,False,False,False,True,False,False
1,0.925213,0.988607,1.016569,1,-0.262369,0.356504,0.111073,0.899669,0.282512,2.157924,...,False,False,True,False,False,False,True,False,False,False
2,-1.325383,-1.286187,0.308846,0,-1.576795,-1.364817,-0.456219,0.671247,-0.933244,0.168637,...,True,False,False,True,False,False,False,True,False,False
3,1.977191,2.051896,0.780661,0,0.565626,0.468364,0.111073,0.442826,1.561608,0.168637,...,False,False,True,False,False,False,False,False,False,True
4,1.499019,0.689832,-0.196671,1,1.119838,0.064045,0.678366,-0.927702,1.084522,1.163281,...,False,False,True,False,False,False,False,False,False,False


# Train-Test Split Code (for regression)

In [32]:
from sklearn.model_selection import train_test_split

#  Set target and features
X = df.drop('interest_rate', axis=1)  # Drop the target column
y = df['interest_rate']               # Target variable

#  Split the data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Check the shape
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (1760, 20)
X_test shape: (440, 20)
y_train shape: (1760,)
y_test shape: (440,)


In [34]:
import os
print(os.getcwd())

c:\Users\naren\Desktop\New_DS\LR_BankLoan\notebook


# save as .pkl

In [35]:
import joblib
import os
# Create directory if it doesn't exist
os.makedirs('data/processed', exist_ok=True)    

joblib.dump((X_train, X_test, y_train, y_test), 'data/processed/train_test_split.pkl')
print("Train-test split saved to 'notebook/data/processed/train_test_split.pkl'")

Train-test split saved to 'notebook/data/processed/train_test_split.pkl'


In [36]:
import joblib

X_train, X_test, y_train, y_test = joblib.load("data/processed/train_test_split.pkl")


In [37]:
# Preview
print("X_train:\n", X_train.head())
print("\n y_train:\n", y_train.head())

print("\n X_test:\n", X_test.head())
print("\n y_test:\n", y_test.head())

X_train:
       amount_requested  amount_funded_by_investors  loan_length  \
1656         -0.828085                   -0.783541            0   
752           0.319528                    0.376410            0   
892          -0.573060                   -0.526059            0   
1041          0.504421                    0.556847            1   
1179         -0.063009                   -0.029052            0   

      debt_to_income_ratio  monthly_income  fico_range  open_credit_lines  \
1656             -0.316860        1.411777   -1.165335          -0.242438   
752              -1.073087        0.737912   -0.881689          -1.156124   
892               0.052614       -0.609819   -1.165335          -0.470860   
1041              1.359066       -0.906321   -1.023512           2.955461   
1179             -1.427942       -0.367228   -0.314396          -1.612966   

      revolving_credit_balance  inquiries_in_the_last_6_months  \
1656                 -0.442514                       -0.82