In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import gaussian_kde, skew, kurtosis, shapiro
from scipy.stats import chi2_contingency
from sklearn.metrics import r2_score
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [2]:
# Read the Excel files using pd.read_excel
a = pd.read_excel(r'C:\Users\Harshali\Documents\Copy of case_study1.xlsx')
b = pd.read_excel(r'C:\Users\Harshali\Documents\Copy of case_study2.xlsx')


In [3]:
# Creating copies of the original DataFrames

internal_data = a.copy()  # Copy of the internal data
cibil_data = b.copy()     # Copy of the CIBIL data

# Display the column names for both DataFrames
print("Internal Data Columns:")
print(internal_data.columns)
print('=' * 100)

print("CIBIL Data Columns:")
print(cibil_data.columns)

Internal Data Columns:
Index(['PROSPECTID', 'Total_TL', 'Tot_Closed_TL', 'Tot_Active_TL',
       'Total_TL_opened_L6M', 'Tot_TL_closed_L6M', 'pct_tl_open_L6M',
       'pct_tl_closed_L6M', 'pct_active_tl', 'pct_closed_tl',
       'Total_TL_opened_L12M', 'Tot_TL_closed_L12M', 'pct_tl_open_L12M',
       'pct_tl_closed_L12M', 'Tot_Missed_Pmnt', 'Auto_TL', 'CC_TL',
       'Consumer_TL', 'Gold_TL', 'Home_TL', 'PL_TL', 'Secured_TL',
       'Unsecured_TL', 'Other_TL', 'Age_Oldest_TL', 'Age_Newest_TL'],
      dtype='object')
CIBIL Data Columns:
Index(['PROSPECTID', 'time_since_recent_payment',
       'time_since_first_deliquency', 'time_since_recent_deliquency',
       'num_times_delinquent', 'max_delinquency_level',
       'max_recent_level_of_deliq', 'num_deliq_6mts', 'num_deliq_12mts',
       'num_deliq_6_12mts', 'max_deliq_6mts', 'max_deliq_12mts',
       'num_times_30p_dpd', 'num_times_60p_dpd', 'num_std', 'num_std_6mts',
       'num_std_12mts', 'num_sub', 'num_sub_6mts', 'num_sub_12mts', 

In [4]:
# Display information about the Internal Data
print("Internal Data Information")
print("="*100)
internal_data.info()

print("\n" + "="*100 + "\n")

# Display information about the CIBIL Data
print("CIBIL Data Information")
print("="*100)
cibil_data.info()

Internal Data Information
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51336 entries, 0 to 51335
Data columns (total 26 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   PROSPECTID            51336 non-null  int64  
 1   Total_TL              51336 non-null  int64  
 2   Tot_Closed_TL         51336 non-null  int64  
 3   Tot_Active_TL         51336 non-null  int64  
 4   Total_TL_opened_L6M   51336 non-null  int64  
 5   Tot_TL_closed_L6M     51336 non-null  int64  
 6   pct_tl_open_L6M       51336 non-null  float64
 7   pct_tl_closed_L6M     51336 non-null  float64
 8   pct_active_tl         51336 non-null  float64
 9   pct_closed_tl         51336 non-null  float64
 10  Total_TL_opened_L12M  51336 non-null  int64  
 11  Tot_TL_closed_L12M    51336 non-null  int64  
 12  pct_tl_open_L12M      51336 non-null  float64
 13  pct_tl_closed_L12M    51336 non-null  float64
 14  Tot_Missed_Pmnt       51336 non-null  int64 

In [5]:
internal_data['Age_Oldest_TL'].unique()    

array([    72,      7,     47,      5,    131,    150,     17,     36,
           16,     66,     64,     96,     49,     38,      9,      6,
          110,    138,      8,     92,     40,     11,     51,     59,
           37,    159,     10,     20,      4,     26,     19,     41,
           73,     45,     32,     33,     48,     18,     60,     14,
           83,     44,     24,     42,     39,     12,     27,     70,
           76,    120,    115,     46,     93,     56,     61,    113,
           67,     74,     22,    191,      3,     65,    192,     43,
           13,     29,    193,     98,     63,     58,     30,     23,
           69,     53,    145,     31,     77,    104,     87,     15,
           62,     21,     97,     34,     28,    137,     86,    124,
          129,     50,     35,      2,    102,    154,    148,    128,
           94,    107,    135,     81,     68,     78,    130,     91,
           71,     89,    123,    213,     88,     52,    175,      1,
      

In [6]:
# Identify columns that contain -99999
columns_with_negative_value = internal_data.columns[(internal_data == -99999).any()]

# Display the columns
print("Columns containing -99999 values:")
print(columns_with_negative_value)

Columns containing -99999 values:
Index(['Age_Oldest_TL', 'Age_Newest_TL'], dtype='object')


In [7]:
# List of columns to check
columns_to_check = ['Age_Oldest_TL', 'Age_Newest_TL']

# Calculate and print the percentage of -99999 values for each column
for column in columns_to_check:
    negative_values_count = (internal_data[column] == -99999).sum()
    total_values = internal_data[column].shape[0]
    negative_values_percentage = (negative_values_count / total_values) * 100
    print(f"Percentage of -99999 values in '{column}': {negative_values_percentage:.2f}%")

Percentage of -99999 values in 'Age_Oldest_TL': 0.08%
Percentage of -99999 values in 'Age_Newest_TL': 0.08%


---------------------------------------------------------------------------------------------------------------------------------

In [8]:
# Determine the shape of the cibil_data dataframe
cibil_data_shape = cibil_data.shape

# Initialize an empty list to store columns that need to be removed
columns_to_be_removed = []

# Iterate over each column in the cibil_data dataframe
for column in cibil_data.columns:
    # Count the number of occurrences of -99999 in the current column
    count_missing_values = (cibil_data[column] == -99999).sum()
    
    # If the count of -99999 is greater than 10,000, add the column to the removal list
    if count_missing_values > 10000:
        columns_to_be_removed.append(column)

# Print the shape of the dataframe and the columns to be removed
print("Shape of cibil_data:", cibil_data_shape)
print('_'*100)
print("Columns to be removed:", columns_to_be_removed)
print('_'*100)
# Calculate the number of remaining columns
remaining_columns = cibil_data.shape[1] - len(columns_to_be_removed)
print('remaining_columns :',remaining_columns)


Shape of cibil_data: (51336, 62)
____________________________________________________________________________________________________
Columns to be removed: ['time_since_first_deliquency', 'time_since_recent_deliquency', 'max_delinquency_level', 'max_deliq_6mts', 'max_deliq_12mts', 'CC_utilization', 'PL_utilization', 'max_unsec_exposure_inPct']
____________________________________________________________________________________________________
remaining_columns : 54


In [10]:
# This line removes the columns listed in 'columns_to_be_removed' from the 'cibil_data' DataFrame.
cibil_data = cibil_data.drop(columns_to_be_removed, axis=1)

In [11]:

# Print the new shape of the DataFrame to confirm the changes
print("New shape of cibil_data after removing columns:", cibil_data.shape)

New shape of cibil_data after removing columns: (51336, 54)


In [12]:
# This loop iterates over each column in the 'cibil_data' DataFrame.
# remove rows
for i in cibil_data.columns:
    cibil_data = cibil_data.loc[cibil_data[i] != -99999]
    
    
original_rows = 51336 
print("New shape of cibil_data after removing rows:", cibil_data.shape)
print("Total No. of removed rows:", original_rows - cibil_data.shape[0])

New shape of cibil_data after removing rows: (42066, 54)
Total No. of removed rows: 9270


In [13]:
#  "internal_data" and "cibil_data" are your two DataFrames and 'PROSPECTID' is the common column

df = pd.merge(internal_data , cibil_data , how='inner', left_on='PROSPECTID', right_on='PROSPECTID')

# Print the shape of the resulting DataFrame to confirm the merge
print("Shape of the merged DataFrame:", df.shape)

Shape of the merged DataFrame: (42066, 79)


In [14]:
# Print the columns of the combined DataFrame
print("Columns in the combined DataFrame:", df.columns.tolist())

Columns in the combined DataFrame: ['PROSPECTID', 'Total_TL', 'Tot_Closed_TL', 'Tot_Active_TL', 'Total_TL_opened_L6M', 'Tot_TL_closed_L6M', 'pct_tl_open_L6M', 'pct_tl_closed_L6M', 'pct_active_tl', 'pct_closed_tl', 'Total_TL_opened_L12M', 'Tot_TL_closed_L12M', 'pct_tl_open_L12M', 'pct_tl_closed_L12M', 'Tot_Missed_Pmnt', 'Auto_TL', 'CC_TL', 'Consumer_TL', 'Gold_TL', 'Home_TL', 'PL_TL', 'Secured_TL', 'Unsecured_TL', 'Other_TL', 'Age_Oldest_TL', 'Age_Newest_TL', 'time_since_recent_payment', 'num_times_delinquent', 'max_recent_level_of_deliq', 'num_deliq_6mts', 'num_deliq_12mts', 'num_deliq_6_12mts', 'num_times_30p_dpd', 'num_times_60p_dpd', 'num_std', 'num_std_6mts', 'num_std_12mts', 'num_sub', 'num_sub_6mts', 'num_sub_12mts', 'num_dbt', 'num_dbt_6mts', 'num_dbt_12mts', 'num_lss', 'num_lss_6mts', 'num_lss_12mts', 'recent_level_of_deliq', 'tot_enq', 'CC_enq', 'CC_enq_L6m', 'CC_enq_L12m', 'PL_enq', 'PL_enq_L6m', 'PL_enq_L12m', 'time_since_recent_enq', 'enq_L12m', 'enq_L6m', 'enq_L3m', 'MARIT

-----------------------------------------------------------------------------------------------------------------------------

# Feature Selection:
  - Chi-Square Test:
  - ANOVA Test
  - VIF Calculation:

-----------------------------------------------------------------------------------------------------------------------------

In [15]:
# target column
df['Approved_Flag'].unique()


array(['P2', 'P1', 'P3', 'P4'], dtype=object)

In [16]:
# Check how many columns are categorical
for i in df.columns:
    if df[i].dtype == 'object':
        print(i)

MARITALSTATUS
EDUCATION
GENDER
last_prod_enq2
first_prod_enq2
Approved_Flag


In [17]:
# Chi-square test

for i in ['MARITALSTATUS', 'EDUCATION', 'GENDER', 'last_prod_enq2', 'first_prod_enq2']:
    chi2, pval, _, _ = chi2_contingency(pd.crosstab(df[i], df['Approved_Flag']))
    print(i, '---', pval)

MARITALSTATUS --- 3.6401547776371343e-233
EDUCATION --- 2.8651007585199264e-30
GENDER --- 1.8610038357046495e-05
last_prod_enq2 --- 0.0
first_prod_enq2 --- 8.375872889007583e-287


In [18]:
# Decision making based on p-values
print("\nFeature Associations:")
print('_'*120)
for i in ['MARITALSTATUS', 'EDUCATION', 'GENDER', 'last_prod_enq2', 'first_prod_enq2']:
    chi2, pval, _, _ = chi2_contingency(pd.crosstab(df[i], df['Approved_Flag']))
    
    # If p-value is less than 0.05, feature is associated with the target variable
    if pval < 0.05:
        print(f"{i} is associated with the target feature (Approved_Flag).")
    else:
        print(f"{i} is not associated with the target feature (Approved_Flag).")
        
print('_'*120)       


Feature Associations:
________________________________________________________________________________________________________________________
MARITALSTATUS is associated with the target feature (Approved_Flag).
EDUCATION is associated with the target feature (Approved_Flag).
GENDER is associated with the target feature (Approved_Flag).
last_prod_enq2 is associated with the target feature (Approved_Flag).
first_prod_enq2 is associated with the target feature (Approved_Flag).
________________________________________________________________________________________________________________________


In [20]:

# VIF for numerical columns
numeric_columns = []
for i in df.columns:
    if df[i].dtype != 'object' and i not in ['PROSPECTID','Approved_Flag']:
        numeric_columns.append(i)

In [21]:
numeric_columns

['Total_TL',
 'Tot_Closed_TL',
 'Tot_Active_TL',
 'Total_TL_opened_L6M',
 'Tot_TL_closed_L6M',
 'pct_tl_open_L6M',
 'pct_tl_closed_L6M',
 'pct_active_tl',
 'pct_closed_tl',
 'Total_TL_opened_L12M',
 'Tot_TL_closed_L12M',
 'pct_tl_open_L12M',
 'pct_tl_closed_L12M',
 'Tot_Missed_Pmnt',
 'Auto_TL',
 'CC_TL',
 'Consumer_TL',
 'Gold_TL',
 'Home_TL',
 'PL_TL',
 'Secured_TL',
 'Unsecured_TL',
 'Other_TL',
 'Age_Oldest_TL',
 'Age_Newest_TL',
 'time_since_recent_payment',
 'num_times_delinquent',
 'max_recent_level_of_deliq',
 'num_deliq_6mts',
 'num_deliq_12mts',
 'num_deliq_6_12mts',
 'num_times_30p_dpd',
 'num_times_60p_dpd',
 'num_std',
 'num_std_6mts',
 'num_std_12mts',
 'num_sub',
 'num_sub_6mts',
 'num_sub_12mts',
 'num_dbt',
 'num_dbt_6mts',
 'num_dbt_12mts',
 'num_lss',
 'num_lss_6mts',
 'num_lss_12mts',
 'recent_level_of_deliq',
 'tot_enq',
 'CC_enq',
 'CC_enq_L6m',
 'CC_enq_L12m',
 'PL_enq',
 'PL_enq_L6m',
 'PL_enq_L12m',
 'time_since_recent_enq',
 'enq_L12m',
 'enq_L6m',
 'enq_L3m',

In [22]:
len(numeric_columns)

72

In [23]:
# Select only numeric columns from the DataFrame
vif_data = df[numeric_columns]

# Get the total number of columns
total_columns = vif_data.shape[1]

# Initialize an empty list to store columns with acceptable VIF values
columns_to_be_kept = []

# Initialize a column index variable
column_index = 0

In [24]:
vif_data.shape[1]

72

In [25]:
# Perform VIF (Variance Inflation Factor) sequentially to identify highly correlated features

# Iterate over each column
for i in range(total_columns):
    # Calculate VIF value for the current column
    vif_value = variance_inflation_factor(vif_data, column_index)
    
    # Print the column index and VIF value
    print(column_index, '---', vif_value)
    
    # Check if VIF value is acceptable (less than or equal to 6)
    if vif_value <= 6:
        # If acceptable, add the column to the list of columns to be kept
        columns_to_be_kept.append(numeric_columns[i])
        
        # Increment the column index
        column_index += 1
    
    else:
        # If not acceptable, drop the column from the DataFrame
        vif_data = vif_data.drop([numeric_columns[i]], axis=1)

  vif = 1. / (1. - r_squared_i)


0 --- inf


  vif = 1. / (1. - r_squared_i)


0 --- inf
0 --- 11.319677860911183
0 --- 8.291829772566755
0 --- 6.520138234382173
0 --- 5.059265701555735
1 --- 2.6064779388021044


  vif = 1. / (1. - r_squared_i)


2 --- inf
2 --- 1779.2530019989363
2 --- 8.57605846135607
2 --- 3.829565647163703
3 --- 5.4692110955705875
4 --- 5.503957502932518
5 --- 1.973712154119323


  vif = 1. / (1. - r_squared_i)


6 --- inf
6 --- 4.811919298953734
7 --- 23.146961044025943
7 --- 30.634993848319926
7 --- 4.384922735753695
8 --- 3.0648663541161247
9 --- 2.8932461784347443
10 --- 4.369654303665845
11 --- 2.208341258584075
12 --- 566.1909055769335
12 --- 1.0006593766842489
13 --- 1.9695830645742576
14 --- 7.841218783259259
14 --- 5.247703415624753


  vif = 1. / (1. - r_squared_i)


15 --- inf
15 --- 7.377956585088553
15 --- 1.4257933668144418
16 --- 8.084822288044935
16 --- 1.6226699218393812
17 --- 7.238615905249871
17 --- 15.645211505952748
17 --- 1.8184084209088587
18 --- 1.5055024528703032
19 --- 2.1720129343912187
20 --- 2.623539277547352
21 --- 2.2929276257397024
22 --- 7.3588119016168045
22 --- 2.1583860086471636
23 --- 2.8651983640201
24 --- 6.457951397444763
24 --- 2.846406166026212
25 --- 4.751462405159283
26 --- 16.664353050297613
26 --- 6.433862464128575
26 --- 8.90569527930927
26 --- 2.394843877692056
27 --- 8.625154493499016
27 --- 13.097583499300075
27 --- 3.5102775257188883
28 --- 1.8461247374915497
29 --- 18.35043652870675
29 --- 10.708249343242189
29 --- 2.3460067597985903
30 --- 21.54248963193387
30 --- 2.796240194688435
31 --- 3.3738625526342503
32 --- 9.973017265651695
32 --- 6.092683218698287
32 --- 1.0011742340538428
33 --- 3.0642351806835495
34 --- 2.807411739205449
35 --- 20.28118581063136
35 --- 15.881630139889586
35 --- 1.83280523444833

In [26]:
len(columns_to_be_kept)   # { 72 = > 39 }

39

In [27]:
# Perform ANOVA for columns_to_be_kept

from scipy.stats import f_oneway

columns_to_be_kept_numerical = []

for column in columns_to_be_kept:
    # Split data into groups based on 'Approved_Flag'
    groups = [df.loc[df['Approved_Flag'] == flag, column] for flag in ['P1', 'P2', 'P3', 'P4']]
    
    # Perform ANOVA
    f_statistic, p_value = f_oneway(*groups)
    
    # Decision making based on p-values
    if p_value <= 0.05:
        print(f"\033[34m{column}\033[0m has a \033[32msignificant relationship\033[0m with the target feature (Approved_Flag).")
        columns_to_be_kept_numerical.append(column)
    else:
        print(f"\033[34m{column}\033[0m does not have a \033[31msignificant relationship\033[0m with the target feature (Approved_Flag).")
    print("_______________________________________________________________________________")

[34mpct_tl_open_L6M[0m has a [32msignificant relationship[0m with the target feature (Approved_Flag).
_______________________________________________________________________________
[34mpct_tl_closed_L6M[0m has a [32msignificant relationship[0m with the target feature (Approved_Flag).
_______________________________________________________________________________
[34mTot_TL_closed_L12M[0m has a [32msignificant relationship[0m with the target feature (Approved_Flag).
_______________________________________________________________________________
[34mpct_tl_open_L12M[0m has a [32msignificant relationship[0m with the target feature (Approved_Flag).
_______________________________________________________________________________
[34mpct_tl_closed_L12M[0m has a [32msignificant relationship[0m with the target feature (Approved_Flag).
_______________________________________________________________________________
[34mTot_Missed_Pmnt[0m has a [32msignificant relationship

In [28]:
len(columns_to_be_kept_numerical)

36

--------------------------------------------------------------------------------------------------------------------------

 39=>36 

In [35]:
df['EDUCATION'].unique()

array(['12TH', 'GRADUATE', 'SSC', 'POST-GRADUATE', 'UNDER GRADUATE',
       'OTHERS', 'PROFESSIONAL'], dtype=object)

In [36]:
# To filter only the categorical columns (object or category dtype)
categorical_columns = df.select_dtypes(include=['object', 'category']).columns

# Display all categorical columns
print(categorical_columns)

Index(['MARITALSTATUS', 'EDUCATION', 'GENDER', 'last_prod_enq2',
       'first_prod_enq2', 'Approved_Flag'],
      dtype='object')


In [37]:
len(categorical_columns)

6

In [38]:
# total_requrd colums = 42 ( consider output column also)

-------------------------------------------------------------------------------------------------------------------------------------

In [39]:
# listing all the final features
features = columns_to_be_kept_numerical + ['MARITALSTATUS', 'EDUCATION', 'GENDER', 'last_prod_enq2', 'first_prod_enq2']
df = df[features + ['Approved_Flag']]

In [40]:
len(df.columns)

42

In [None]:
# Save the merged DataFrame to an Excel file
df.to_excel('Data_file.xlsx', index=False)


-------------------------------------------------------------------------------------------------------------------------------------------

In [41]:
df1=pd.read_excel('Data_file.xlsx')

In [42]:
df1.columns

Index(['pct_tl_open_L6M', 'pct_tl_closed_L6M', 'Tot_TL_closed_L12M',
       'pct_tl_open_L12M', 'pct_tl_closed_L12M', 'Tot_Missed_Pmnt', 'CC_TL',
       'Home_TL', 'PL_TL', 'Secured_TL', 'Unsecured_TL', 'Other_TL',
       'time_since_recent_payment', 'max_recent_level_of_deliq',
       'num_deliq_6_12mts', 'num_times_60p_dpd', 'num_std_12mts', 'num_sub',
       'num_sub_6mts', 'num_sub_12mts', 'num_dbt', 'num_dbt_12mts', 'num_lss',
       'recent_level_of_deliq', 'CC_enq_L12m', 'PL_enq_L12m',
       'time_since_recent_enq', 'enq_L3m', 'NETMONTHLYINCOME',
       'Time_With_Curr_Empr', 'CC_Flag', 'PL_Flag', 'pct_PL_enq_L6m_of_ever',
       'pct_CC_enq_L6m_of_ever', 'HL_Flag', 'GL_Flag', 'MARITALSTATUS',
       'EDUCATION', 'GENDER', 'last_prod_enq2', 'first_prod_enq2',
       'Approved_Flag'],
      dtype='object')

----------------------------------------------------------------------------------------------------------------------------------------------------

In [43]:
pd.read_excel('Updated_Unseen_Dataset1.xlsx')

Unnamed: 0,pct_tl_open_L6M,pct_tl_closed_L6M,Tot_TL_closed_L12M,pct_tl_open_L12M,pct_tl_closed_L12M,Tot_Missed_Pmnt,CC_TL,Home_TL,PL_TL,Secured_TL,...,pct_PL_enq_L6m_of_ever,pct_CC_enq_L6m_of_ever,HL_Flag,GL_Flag,MARITALSTATUS,EDUCATION,GENDER,last_prod_enq2,first_prod_enq2,Predictions
0,0.000,0.0,0,0.000,0.000,0,0,0,4,1,...,0.000,0.0,1,0,Married,12TH,M,PL,PL,1
1,0.000,0.0,0,1.000,0.000,0,0,0,0,0,...,0.000,0.0,0,0,Single,GRADUATE,F,ConsumerLoan,ConsumerLoan,1
2,0.125,0.0,0,0.250,0.000,1,0,0,0,2,...,0.000,0.0,1,0,Married,SSC,M,ConsumerLoan,others,1
3,0.000,0.0,0,0.167,0.000,0,0,0,0,3,...,0.000,0.0,0,0,Married,POST-GRADUATE,M,AL,AL,0
4,0.000,0.0,1,0.429,0.167,0,0,0,0,6,...,0.429,0.0,1,0,Married,12TH,M,ConsumerLoan,PL,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.000,0.0,0,0.152,0.000,0,0,0,1,1,...,0.000,0.0,0,0,Married,GRADUATE,M,ConsumerLoan,others,1
96,0.222,0.0,2,0.538,0.222,0,0,0,0,1,...,0.000,0.0,0,0,Married,12TH,M,ConsumerLoan,others,1
97,0.000,0.0,0,0.211,0.000,0,0,0,0,1,...,0.000,0.0,0,0,Married,SSC,M,others,others,1
98,0.000,1.0,1,0.396,1.000,0,0,0,0,1,...,0.000,0.0,0,0,Married,12TH,M,others,others,1


---------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
pd.read_excel('Updated_Unseen_Dataset.xlsx')   # prediction 

Unnamed: 0,pct_tl_open_L6M,pct_tl_closed_L6M,Tot_TL_closed_L12M,pct_tl_open_L12M,pct_tl_closed_L12M,Tot_Missed_Pmnt,CC_TL,Home_TL,PL_TL,Secured_TL,...,pct_PL_enq_L6m_of_ever,pct_CC_enq_L6m_of_ever,HL_Flag,GL_Flag,MARITALSTATUS,EDUCATION,GENDER,last_prod_enq2,first_prod_enq2,Predictions
0,0.000,0.0,0,0.000,0.000,0,0,0,4,1,...,0.000,0.0,1,0,Married,12TH,M,PL,PL,P2
1,0.000,0.0,0,1.000,0.000,0,0,0,0,0,...,0.000,0.0,0,0,Single,GRADUATE,F,ConsumerLoan,ConsumerLoan,P2
2,0.125,0.0,0,0.250,0.000,1,0,0,0,2,...,0.000,0.0,1,0,Married,SSC,M,ConsumerLoan,others,P2
3,0.000,0.0,0,0.167,0.000,0,0,0,0,3,...,0.000,0.0,0,0,Married,POST-GRADUATE,M,AL,AL,P1
4,0.000,0.0,1,0.429,0.167,0,0,0,0,6,...,0.429,0.0,1,0,Married,12TH,M,ConsumerLoan,PL,P4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.000,0.0,0,0.152,0.000,0,0,0,1,1,...,0.000,0.0,0,0,Married,GRADUATE,M,ConsumerLoan,others,P2
96,0.222,0.0,2,0.538,0.222,0,0,0,0,1,...,0.000,0.0,0,0,Married,12TH,M,ConsumerLoan,others,P2
97,0.000,0.0,0,0.211,0.000,0,0,0,0,1,...,0.000,0.0,0,0,Married,SSC,M,others,others,P2
98,0.000,1.0,1,0.396,1.000,0,0,0,0,1,...,0.000,0.0,0,0,Married,12TH,M,others,others,P2


_________________________________________________________________________________________________________________________________________