In [2]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 50000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from varclushi import VarClusHi

In [3]:
df = pd.read_excel('Data- acquisition scorecard.xlsx')

In [4]:
df.rename(columns={'ENQ_1':'days_last_enq','ENQ_2':'days_hl_last_enq','ENQ_3':'#_enq_3mnths','ENQ_4':'#_hle_enq_3mnths','ENQ_5':'#_enq_12mnths','ENQ_6':'#_hle_enq_12mnths','ENQ_7':'Tot_enq','ENQ_8':'Tot_hle_enq','ACCOUNT_9':'#_defaults_3mnths','ACCOUNT_10':'#_defaults_12mnths','ACCOUNT_11':'days_last_acc_open','ACCOUNT_12':'Tot_accounts','ACCOUNT_13':'Tot_hle_accounts','ACCOUNT_14':'Tot_unsecured_loans','ACCOUNT_15':'Tot_live_loans','ACCOUNT_16':'Tot_live_homeloans','ACCOUNT_17':'Tot_live_unsecuredloans','ACCOUNT_18':'Tot_amt_outstanding','ACCOUNT_19':'Tot_secured_outstanding_amt','ACCOUNT_20':'Tot_unsecured_outstanding_amt','ACCOUNT_21':'Avg_days_in_debt'},inplace=True)

In [5]:
df_copy = df.copy()

## Data Preparation

In [6]:
len(df)

3980

In [7]:
df.ID.nunique()

3894

In [8]:
df.drop(df[df.duplicated()].index,inplace=True)

### Missing

In [9]:
df.isnull().sum().sort_values(ascending=False).head(7)

Bank_balance        626
Type_of_industry      5
SEX                   4
Marital_Status        4
Months_in_city        4
Age                   1
ID                    0
dtype: int64

In [10]:
sal_bb = df.loc[df['Customer_type'] == 'SALARIED','Bank_balance'].mean()
se_bb = df.loc[df['Customer_type'] == 'SELF EMPLOYED','Bank_balance'].mean()
ne_bb = df.loc[df['Customer_type'] == 'NON EARNING','Bank_balance'].mean()

In [11]:
df['Bank_balance'] = np.where((df['Customer_type'] == 'SALARIED') & (df['Bank_balance'].isna()),sal_bb,np.where(((df['Customer_type'] == 'SELF EMPLOYED')&(df['Bank_balance'].isna())),se_bb,np.where(((df['Customer_type'] == 'NON EARNING') & (df['Bank_balance'].isna())),ne_bb,df['Bank_balance'])))

In [12]:
df.isnull().sum().sort_values(ascending=False).head(7)

Type_of_industry       5
SEX                    4
Marital_Status         4
Months_in_city         4
Age                    1
ID                     0
Tot_unsecured_loans    0
dtype: int64

In [13]:
df.dropna(inplace=True)

## Variable creation

In [14]:
df.head(1)

Unnamed: 0,ID,Age,Customer_type,Gross_income,Net_income,SEX,Type_of_industry,Marital_Status,Months_in_city,Months_in_current_job,Org_Type,Bank_balance,Debt_ratio,Target,days_last_enq,days_hl_last_enq,#_enq_3mnths,#_hle_enq_3mnths,#_enq_12mnths,#_hle_enq_12mnths,Tot_enq,Tot_hle_enq,#_defaults_3mnths,#_defaults_12mnths,days_last_acc_open,Tot_accounts,Tot_hle_accounts,Tot_unsecured_loans,Tot_live_loans,Tot_live_homeloans,Tot_live_unsecuredloans,Tot_amt_outstanding,Tot_secured_outstanding_amt,Tot_unsecured_outstanding_amt,Avg_days_in_debt
0,1794,40.0,SALARIED,70891.0,62491.0,MALE,OTHERS,MARRIED,229.0,108,INDIVIDUAL,37806.0,0.19,0,324,0,0,0,1,0,14,0,0,0,1486,9,0,8,2,0,2,275606,0,275606,5311


In [15]:
dt = df.dtypes.to_frame()

In [16]:
non_obj = dt.loc[dt[0] != 'object',:].index.tolist()

In [17]:
non_obj.remove('ID')
non_obj.remove('Target')

In [19]:
for i in non_obj:
    for ii in non_obj:
        if ii != i:
            df.loc[:,'{}/{}'.format(i,ii)] = df[i]/df[ii]
        else:
            continue

In [21]:
dt = df.dtypes.to_frame()

In [22]:
df.head(2)

Unnamed: 0,ID,Age,Customer_type,Gross_income,Net_income,SEX,Type_of_industry,Marital_Status,Months_in_city,Months_in_current_job,Org_Type,Bank_balance,Debt_ratio,Target,days_last_enq,days_hl_last_enq,#_enq_3mnths,#_hle_enq_3mnths,#_enq_12mnths,#_hle_enq_12mnths,Tot_enq,Tot_hle_enq,#_defaults_3mnths,#_defaults_12mnths,days_last_acc_open,Tot_accounts,Tot_hle_accounts,Tot_unsecured_loans,Tot_live_loans,Tot_live_homeloans,Tot_live_unsecuredloans,Tot_amt_outstanding,Tot_secured_outstanding_amt,Tot_unsecured_outstanding_amt,Avg_days_in_debt,Age/Gross_income,Age/Net_income,Age/Months_in_city,Age/Months_in_current_job,Age/Bank_balance,Age/Debt_ratio,Age/days_last_enq,Age/days_hl_last_enq,Age/#_enq_3mnths,Age/#_hle_enq_3mnths,Age/#_enq_12mnths,Age/#_hle_enq_12mnths,Age/Tot_enq,Age/Tot_hle_enq,Age/#_defaults_3mnths,Age/#_defaults_12mnths,Age/days_last_acc_open,Age/Tot_accounts,Age/Tot_hle_accounts,Age/Tot_unsecured_loans,Age/Tot_live_loans,Age/Tot_live_homeloans,Age/Tot_live_unsecuredloans,Age/Tot_amt_outstanding,Age/Tot_secured_outstanding_amt,Age/Tot_unsecured_outstanding_amt,Age/Avg_days_in_debt,Gross_income/Age,Gross_income/Net_income,Gross_income/Months_in_city,Gross_income/Months_in_current_job,Gross_income/Bank_balance,Gross_income/Debt_ratio,Gross_income/days_last_enq,Gross_income/days_hl_last_enq,Gross_income/#_enq_3mnths,Gross_income/#_hle_enq_3mnths,Gross_income/#_enq_12mnths,Gross_income/#_hle_enq_12mnths,Gross_income/Tot_enq,Gross_income/Tot_hle_enq,Gross_income/#_defaults_3mnths,Gross_income/#_defaults_12mnths,Gross_income/days_last_acc_open,Gross_income/Tot_accounts,Gross_income/Tot_hle_accounts,Gross_income/Tot_unsecured_loans,Gross_income/Tot_live_loans,Gross_income/Tot_live_homeloans,Gross_income/Tot_live_unsecuredloans,Gross_income/Tot_amt_outstanding,Gross_income/Tot_secured_outstanding_amt,Gross_income/Tot_unsecured_outstanding_amt,Gross_income/Avg_days_in_debt,Net_income/Age,Net_income/Gross_income,Net_income/Months_in_city,Net_income/Months_in_current_job,Net_income/Bank_balance,Net_income/Debt_ratio,Net_income/days_last_enq,Net_income/days_hl_last_enq,Net_income/#_enq_3mnths,Net_income/#_hle_enq_3mnths,Net_income/#_enq_12mnths,Net_income/#_hle_enq_12mnths,Net_income/Tot_enq,Net_income/Tot_hle_enq,Net_income/#_defaults_3mnths,Net_income/#_defaults_12mnths,Net_income/days_last_acc_open,Net_income/Tot_accounts,Net_income/Tot_hle_accounts,Net_income/Tot_unsecured_loans,Net_income/Tot_live_loans,Net_income/Tot_live_homeloans,Net_income/Tot_live_unsecuredloans,Net_income/Tot_amt_outstanding,Net_income/Tot_secured_outstanding_amt,Net_income/Tot_unsecured_outstanding_amt,Net_income/Avg_days_in_debt,Months_in_city/Age,Months_in_city/Gross_income,Months_in_city/Net_income,Months_in_city/Months_in_current_job,Months_in_city/Bank_balance,Months_in_city/Debt_ratio,Months_in_city/days_last_enq,Months_in_city/days_hl_last_enq,Months_in_city/#_enq_3mnths,Months_in_city/#_hle_enq_3mnths,Months_in_city/#_enq_12mnths,Months_in_city/#_hle_enq_12mnths,Months_in_city/Tot_enq,Months_in_city/Tot_hle_enq,Months_in_city/#_defaults_3mnths,Months_in_city/#_defaults_12mnths,Months_in_city/days_last_acc_open,Months_in_city/Tot_accounts,Months_in_city/Tot_hle_accounts,Months_in_city/Tot_unsecured_loans,Months_in_city/Tot_live_loans,Months_in_city/Tot_live_homeloans,Months_in_city/Tot_live_unsecuredloans,Months_in_city/Tot_amt_outstanding,Months_in_city/Tot_secured_outstanding_amt,Months_in_city/Tot_unsecured_outstanding_amt,Months_in_city/Avg_days_in_debt,Months_in_current_job/Age,Months_in_current_job/Gross_income,Months_in_current_job/Net_income,Months_in_current_job/Months_in_city,Months_in_current_job/Bank_balance,Months_in_current_job/Debt_ratio,Months_in_current_job/days_last_enq,Months_in_current_job/days_hl_last_enq,Months_in_current_job/#_enq_3mnths,Months_in_current_job/#_hle_enq_3mnths,Months_in_current_job/#_enq_12mnths,Months_in_current_job/#_hle_enq_12mnths,Months_in_current_job/Tot_enq,Months_in_current_job/Tot_hle_enq,Months_in_current_job/#_defaults_3mnths,Months_in_current_job/#_defaults_12mnths,Months_in_current_job/days_last_acc_open,Months_in_current_job/Tot_accounts,Months_in_current_job/Tot_hle_accounts,Months_in_current_job/Tot_unsecured_loans,Months_in_current_job/Tot_live_loans,Months_in_current_job/Tot_live_homeloans,Months_in_current_job/Tot_live_unsecuredloans,Months_in_current_job/Tot_amt_outstanding,Months_in_current_job/Tot_secured_outstanding_amt,Months_in_current_job/Tot_unsecured_outstanding_amt,Months_in_current_job/Avg_days_in_debt,Bank_balance/Age,Bank_balance/Gross_income,Bank_balance/Net_income,Bank_balance/Months_in_city,Bank_balance/Months_in_current_job,Bank_balance/Debt_ratio,Bank_balance/days_last_enq,Bank_balance/days_hl_last_enq,Bank_balance/#_enq_3mnths,Bank_balance/#_hle_enq_3mnths,Bank_balance/#_enq_12mnths,Bank_balance/#_hle_enq_12mnths,Bank_balance/Tot_enq,Bank_balance/Tot_hle_enq,Bank_balance/#_defaults_3mnths,Bank_balance/#_defaults_12mnths,Bank_balance/days_last_acc_open,Bank_balance/Tot_accounts,Bank_balance/Tot_hle_accounts,Bank_balance/Tot_unsecured_loans,Bank_balance/Tot_live_loans,Bank_balance/Tot_live_homeloans,Bank_balance/Tot_live_unsecuredloans,Bank_balance/Tot_amt_outstanding,Bank_balance/Tot_secured_outstanding_amt,Bank_balance/Tot_unsecured_outstanding_amt,Bank_balance/Avg_days_in_debt,Debt_ratio/Age,Debt_ratio/Gross_income,Debt_ratio/Net_income,Debt_ratio/Months_in_city,Debt_ratio/Months_in_current_job,Debt_ratio/Bank_balance,Debt_ratio/days_last_enq,Debt_ratio/days_hl_last_enq,Debt_ratio/#_enq_3mnths,Debt_ratio/#_hle_enq_3mnths,Debt_ratio/#_enq_12mnths,Debt_ratio/#_hle_enq_12mnths,Debt_ratio/Tot_enq,Debt_ratio/Tot_hle_enq,Debt_ratio/#_defaults_3mnths,Debt_ratio/#_defaults_12mnths,Debt_ratio/days_last_acc_open,Debt_ratio/Tot_accounts,Debt_ratio/Tot_hle_accounts,Debt_ratio/Tot_unsecured_loans,Debt_ratio/Tot_live_loans,Debt_ratio/Tot_live_homeloans,Debt_ratio/Tot_live_unsecuredloans,Debt_ratio/Tot_amt_outstanding,Debt_ratio/Tot_secured_outstanding_amt,Debt_ratio/Tot_unsecured_outstanding_amt,Debt_ratio/Avg_days_in_debt,days_last_enq/Age,days_last_enq/Gross_income,days_last_enq/Net_income,days_last_enq/Months_in_city,days_last_enq/Months_in_current_job,days_last_enq/Bank_balance,days_last_enq/Debt_ratio,days_last_enq/days_hl_last_enq,days_last_enq/#_enq_3mnths,days_last_enq/#_hle_enq_3mnths,days_last_enq/#_enq_12mnths,days_last_enq/#_hle_enq_12mnths,days_last_enq/Tot_enq,days_last_enq/Tot_hle_enq,days_last_enq/#_defaults_3mnths,days_last_enq/#_defaults_12mnths,days_last_enq/days_last_acc_open,days_last_enq/Tot_accounts,days_last_enq/Tot_hle_accounts,days_last_enq/Tot_unsecured_loans,days_last_enq/Tot_live_loans,days_last_enq/Tot_live_homeloans,days_last_enq/Tot_live_unsecuredloans,days_last_enq/Tot_amt_outstanding,days_last_enq/Tot_secured_outstanding_amt,days_last_enq/Tot_unsecured_outstanding_amt,...,Tot_accounts/Tot_live_loans,Tot_accounts/Tot_live_homeloans,Tot_accounts/Tot_live_unsecuredloans,Tot_accounts/Tot_amt_outstanding,Tot_accounts/Tot_secured_outstanding_amt,Tot_accounts/Tot_unsecured_outstanding_amt,Tot_accounts/Avg_days_in_debt,Tot_hle_accounts/Age,Tot_hle_accounts/Gross_income,Tot_hle_accounts/Net_income,Tot_hle_accounts/Months_in_city,Tot_hle_accounts/Months_in_current_job,Tot_hle_accounts/Bank_balance,Tot_hle_accounts/Debt_ratio,Tot_hle_accounts/days_last_enq,Tot_hle_accounts/days_hl_last_enq,Tot_hle_accounts/#_enq_3mnths,Tot_hle_accounts/#_hle_enq_3mnths,Tot_hle_accounts/#_enq_12mnths,Tot_hle_accounts/#_hle_enq_12mnths,Tot_hle_accounts/Tot_enq,Tot_hle_accounts/Tot_hle_enq,Tot_hle_accounts/#_defaults_3mnths,Tot_hle_accounts/#_defaults_12mnths,Tot_hle_accounts/days_last_acc_open,Tot_hle_accounts/Tot_accounts,Tot_hle_accounts/Tot_unsecured_loans,Tot_hle_accounts/Tot_live_loans,Tot_hle_accounts/Tot_live_homeloans,Tot_hle_accounts/Tot_live_unsecuredloans,Tot_hle_accounts/Tot_amt_outstanding,Tot_hle_accounts/Tot_secured_outstanding_amt,Tot_hle_accounts/Tot_unsecured_outstanding_amt,Tot_hle_accounts/Avg_days_in_debt,Tot_unsecured_loans/Age,Tot_unsecured_loans/Gross_income,Tot_unsecured_loans/Net_income,Tot_unsecured_loans/Months_in_city,Tot_unsecured_loans/Months_in_current_job,Tot_unsecured_loans/Bank_balance,Tot_unsecured_loans/Debt_ratio,Tot_unsecured_loans/days_last_enq,Tot_unsecured_loans/days_hl_last_enq,Tot_unsecured_loans/#_enq_3mnths,Tot_unsecured_loans/#_hle_enq_3mnths,Tot_unsecured_loans/#_enq_12mnths,Tot_unsecured_loans/#_hle_enq_12mnths,Tot_unsecured_loans/Tot_enq,Tot_unsecured_loans/Tot_hle_enq,Tot_unsecured_loans/#_defaults_3mnths,Tot_unsecured_loans/#_defaults_12mnths,Tot_unsecured_loans/days_last_acc_open,Tot_unsecured_loans/Tot_accounts,Tot_unsecured_loans/Tot_hle_accounts,Tot_unsecured_loans/Tot_live_loans,Tot_unsecured_loans/Tot_live_homeloans,Tot_unsecured_loans/Tot_live_unsecuredloans,Tot_unsecured_loans/Tot_amt_outstanding,Tot_unsecured_loans/Tot_secured_outstanding_amt,Tot_unsecured_loans/Tot_unsecured_outstanding_amt,Tot_unsecured_loans/Avg_days_in_debt,Tot_live_loans/Age,Tot_live_loans/Gross_income,Tot_live_loans/Net_income,Tot_live_loans/Months_in_city,Tot_live_loans/Months_in_current_job,Tot_live_loans/Bank_balance,Tot_live_loans/Debt_ratio,Tot_live_loans/days_last_enq,Tot_live_loans/days_hl_last_enq,Tot_live_loans/#_enq_3mnths,Tot_live_loans/#_hle_enq_3mnths,Tot_live_loans/#_enq_12mnths,Tot_live_loans/#_hle_enq_12mnths,Tot_live_loans/Tot_enq,Tot_live_loans/Tot_hle_enq,Tot_live_loans/#_defaults_3mnths,Tot_live_loans/#_defaults_12mnths,Tot_live_loans/days_last_acc_open,Tot_live_loans/Tot_accounts,Tot_live_loans/Tot_hle_accounts,Tot_live_loans/Tot_unsecured_loans,Tot_live_loans/Tot_live_homeloans,Tot_live_loans/Tot_live_unsecuredloans,Tot_live_loans/Tot_amt_outstanding,Tot_live_loans/Tot_secured_outstanding_amt,Tot_live_loans/Tot_unsecured_outstanding_amt,Tot_live_loans/Avg_days_in_debt,Tot_live_homeloans/Age,Tot_live_homeloans/Gross_income,Tot_live_homeloans/Net_income,Tot_live_homeloans/Months_in_city,Tot_live_homeloans/Months_in_current_job,Tot_live_homeloans/Bank_balance,Tot_live_homeloans/Debt_ratio,Tot_live_homeloans/days_last_enq,Tot_live_homeloans/days_hl_last_enq,Tot_live_homeloans/#_enq_3mnths,Tot_live_homeloans/#_hle_enq_3mnths,Tot_live_homeloans/#_enq_12mnths,Tot_live_homeloans/#_hle_enq_12mnths,Tot_live_homeloans/Tot_enq,Tot_live_homeloans/Tot_hle_enq,Tot_live_homeloans/#_defaults_3mnths,Tot_live_homeloans/#_defaults_12mnths,Tot_live_homeloans/days_last_acc_open,Tot_live_homeloans/Tot_accounts,Tot_live_homeloans/Tot_hle_accounts,Tot_live_homeloans/Tot_unsecured_loans,Tot_live_homeloans/Tot_live_loans,Tot_live_homeloans/Tot_live_unsecuredloans,Tot_live_homeloans/Tot_amt_outstanding,Tot_live_homeloans/Tot_secured_outstanding_amt,Tot_live_homeloans/Tot_unsecured_outstanding_amt,Tot_live_homeloans/Avg_days_in_debt,Tot_live_unsecuredloans/Age,Tot_live_unsecuredloans/Gross_income,Tot_live_unsecuredloans/Net_income,Tot_live_unsecuredloans/Months_in_city,Tot_live_unsecuredloans/Months_in_current_job,Tot_live_unsecuredloans/Bank_balance,Tot_live_unsecuredloans/Debt_ratio,Tot_live_unsecuredloans/days_last_enq,Tot_live_unsecuredloans/days_hl_last_enq,Tot_live_unsecuredloans/#_enq_3mnths,Tot_live_unsecuredloans/#_hle_enq_3mnths,Tot_live_unsecuredloans/#_enq_12mnths,Tot_live_unsecuredloans/#_hle_enq_12mnths,Tot_live_unsecuredloans/Tot_enq,Tot_live_unsecuredloans/Tot_hle_enq,Tot_live_unsecuredloans/#_defaults_3mnths,Tot_live_unsecuredloans/#_defaults_12mnths,Tot_live_unsecuredloans/days_last_acc_open,Tot_live_unsecuredloans/Tot_accounts,Tot_live_unsecuredloans/Tot_hle_accounts,Tot_live_unsecuredloans/Tot_unsecured_loans,Tot_live_unsecuredloans/Tot_live_loans,Tot_live_unsecuredloans/Tot_live_homeloans,Tot_live_unsecuredloans/Tot_amt_outstanding,Tot_live_unsecuredloans/Tot_secured_outstanding_amt,Tot_live_unsecuredloans/Tot_unsecured_outstanding_amt,Tot_live_unsecuredloans/Avg_days_in_debt,Tot_amt_outstanding/Age,Tot_amt_outstanding/Gross_income,Tot_amt_outstanding/Net_income,Tot_amt_outstanding/Months_in_city,Tot_amt_outstanding/Months_in_current_job,Tot_amt_outstanding/Bank_balance,Tot_amt_outstanding/Debt_ratio,Tot_amt_outstanding/days_last_enq,Tot_amt_outstanding/days_hl_last_enq,Tot_amt_outstanding/#_enq_3mnths,Tot_amt_outstanding/#_hle_enq_3mnths,Tot_amt_outstanding/#_enq_12mnths,Tot_amt_outstanding/#_hle_enq_12mnths,Tot_amt_outstanding/Tot_enq,Tot_amt_outstanding/Tot_hle_enq,Tot_amt_outstanding/#_defaults_3mnths,Tot_amt_outstanding/#_defaults_12mnths,Tot_amt_outstanding/days_last_acc_open,Tot_amt_outstanding/Tot_accounts,Tot_amt_outstanding/Tot_hle_accounts,Tot_amt_outstanding/Tot_unsecured_loans,Tot_amt_outstanding/Tot_live_loans,Tot_amt_outstanding/Tot_live_homeloans,Tot_amt_outstanding/Tot_live_unsecuredloans,Tot_amt_outstanding/Tot_secured_outstanding_amt,Tot_amt_outstanding/Tot_unsecured_outstanding_amt,Tot_amt_outstanding/Avg_days_in_debt,Tot_secured_outstanding_amt/Age,Tot_secured_outstanding_amt/Gross_income,Tot_secured_outstanding_amt/Net_income,Tot_secured_outstanding_amt/Months_in_city,Tot_secured_outstanding_amt/Months_in_current_job,Tot_secured_outstanding_amt/Bank_balance,Tot_secured_outstanding_amt/Debt_ratio,Tot_secured_outstanding_amt/days_last_enq,Tot_secured_outstanding_amt/days_hl_last_enq,Tot_secured_outstanding_amt/#_enq_3mnths,Tot_secured_outstanding_amt/#_hle_enq_3mnths,Tot_secured_outstanding_amt/#_enq_12mnths,Tot_secured_outstanding_amt/#_hle_enq_12mnths,Tot_secured_outstanding_amt/Tot_enq,Tot_secured_outstanding_amt/Tot_hle_enq,Tot_secured_outstanding_amt/#_defaults_3mnths,Tot_secured_outstanding_amt/#_defaults_12mnths,Tot_secured_outstanding_amt/days_last_acc_open,Tot_secured_outstanding_amt/Tot_accounts,Tot_secured_outstanding_amt/Tot_hle_accounts,Tot_secured_outstanding_amt/Tot_unsecured_loans,Tot_secured_outstanding_amt/Tot_live_loans,Tot_secured_outstanding_amt/Tot_live_homeloans,Tot_secured_outstanding_amt/Tot_live_unsecuredloans,Tot_secured_outstanding_amt/Tot_amt_outstanding,Tot_secured_outstanding_amt/Tot_unsecured_outstanding_amt,Tot_secured_outstanding_amt/Avg_days_in_debt,Tot_unsecured_outstanding_amt/Age,Tot_unsecured_outstanding_amt/Gross_income,Tot_unsecured_outstanding_amt/Net_income,Tot_unsecured_outstanding_amt/Months_in_city,Tot_unsecured_outstanding_amt/Months_in_current_job,Tot_unsecured_outstanding_amt/Bank_balance,Tot_unsecured_outstanding_amt/Debt_ratio,Tot_unsecured_outstanding_amt/days_last_enq,Tot_unsecured_outstanding_amt/days_hl_last_enq,Tot_unsecured_outstanding_amt/#_enq_3mnths,Tot_unsecured_outstanding_amt/#_hle_enq_3mnths,Tot_unsecured_outstanding_amt/#_enq_12mnths,Tot_unsecured_outstanding_amt/#_hle_enq_12mnths,Tot_unsecured_outstanding_amt/Tot_enq,Tot_unsecured_outstanding_amt/Tot_hle_enq,Tot_unsecured_outstanding_amt/#_defaults_3mnths,Tot_unsecured_outstanding_amt/#_defaults_12mnths,Tot_unsecured_outstanding_amt/days_last_acc_open,Tot_unsecured_outstanding_amt/Tot_accounts,Tot_unsecured_outstanding_amt/Tot_hle_accounts,Tot_unsecured_outstanding_amt/Tot_unsecured_loans,Tot_unsecured_outstanding_amt/Tot_live_loans,Tot_unsecured_outstanding_amt/Tot_live_homeloans,Tot_unsecured_outstanding_amt/Tot_live_unsecuredloans,Tot_unsecured_outstanding_amt/Tot_amt_outstanding,Tot_unsecured_outstanding_amt/Tot_secured_outstanding_amt,Tot_unsecured_outstanding_amt/Avg_days_in_debt,Avg_days_in_debt/Age,Avg_days_in_debt/Gross_income,Avg_days_in_debt/Net_income,Avg_days_in_debt/Months_in_city,Avg_days_in_debt/Months_in_current_job,Avg_days_in_debt/Bank_balance,Avg_days_in_debt/Debt_ratio,Avg_days_in_debt/days_last_enq,Avg_days_in_debt/days_hl_last_enq,Avg_days_in_debt/#_enq_3mnths,Avg_days_in_debt/#_hle_enq_3mnths,Avg_days_in_debt/#_enq_12mnths,Avg_days_in_debt/#_hle_enq_12mnths,Avg_days_in_debt/Tot_enq,Avg_days_in_debt/Tot_hle_enq,Avg_days_in_debt/#_defaults_3mnths,Avg_days_in_debt/#_defaults_12mnths,Avg_days_in_debt/days_last_acc_open,Avg_days_in_debt/Tot_accounts,Avg_days_in_debt/Tot_hle_accounts,Avg_days_in_debt/Tot_unsecured_loans,Avg_days_in_debt/Tot_live_loans,Avg_days_in_debt/Tot_live_homeloans,Avg_days_in_debt/Tot_live_unsecuredloans,Avg_days_in_debt/Tot_amt_outstanding,Avg_days_in_debt/Tot_secured_outstanding_amt,Avg_days_in_debt/Tot_unsecured_outstanding_amt
0,1794,40.0,SALARIED,70891.0,62491.0,MALE,OTHERS,MARRIED,229.0,108,INDIVIDUAL,37806.0,0.19,0,324,0,0,0,1,0,14,0,0,0,1486,9,0,8,2,0,2,275606,0,275606,5311,0.001,0.001,0.175,0.37,0.001,210.526,0.123,inf,inf,inf,40.0,inf,2.857,inf,inf,inf,0.027,4.444,inf,5.0,20.0,inf,20.0,0.0,inf,0.0,0.008,1772.275,1.134,309.568,656.398,1.875,373110.526,218.799,inf,inf,inf,70891.0,inf,5063.643,inf,inf,inf,47.706,7876.778,inf,8861.375,35445.5,inf,35445.5,0.257,inf,0.257,13.348,1562.275,0.882,272.886,578.62,1.653,328900.0,192.873,inf,inf,inf,62491.0,inf,4463.643,inf,inf,inf,42.053,6943.444,inf,7811.375,31245.5,inf,31245.5,0.227,inf,0.227,11.766,5.725,0.003,0.004,2.12,0.006,1205.263,0.707,inf,inf,inf,229.0,inf,16.357,inf,inf,inf,0.154,25.444,inf,28.625,114.5,inf,114.5,0.001,inf,0.001,0.043,2.7,0.002,0.002,0.472,0.003,568.421,0.333,inf,inf,inf,108.0,inf,7.714,inf,inf,inf,0.073,12.0,inf,13.5,54.0,inf,54.0,0.0,inf,0.0,0.02,945.15,0.533,0.605,165.092,350.056,198978.947,116.685,inf,inf,inf,37806.0,inf,2700.429,inf,inf,inf,25.441,4200.667,inf,4725.75,18903.0,inf,18903.0,0.137,inf,0.137,7.118,0.005,0.0,0.0,0.001,0.002,0.0,0.001,inf,inf,inf,0.19,inf,0.014,inf,inf,inf,0.0,0.021,inf,0.024,0.095,inf,0.095,0.0,inf,0.0,0.0,8.1,0.005,0.005,1.415,3.0,0.009,1705.263,inf,inf,inf,324.0,inf,23.143,inf,inf,inf,0.218,36.0,inf,40.5,162.0,inf,162.0,0.001,inf,0.001,...,4.5,inf,4.5,0.0,inf,0.0,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,0.0,,,,0.0,0.0,0.0,0.0,,0.0,0.0,,0.0,0.0,0.2,0.0,0.0,0.035,0.074,0.0,42.105,0.025,inf,inf,inf,8.0,inf,0.571,inf,inf,inf,0.005,0.889,inf,4.0,inf,4.0,0.0,inf,0.0,0.002,0.05,0.0,0.0,0.009,0.019,0.0,10.526,0.006,inf,inf,inf,2.0,inf,0.143,inf,inf,inf,0.001,0.222,inf,0.25,inf,1.0,0.0,inf,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,0.0,,,,0.0,0.0,,0.0,0.0,0.0,0.0,,0.0,0.0,0.05,0.0,0.0,0.009,0.019,0.0,10.526,0.006,inf,inf,inf,2.0,inf,0.143,inf,inf,inf,0.001,0.222,inf,0.25,1.0,inf,0.0,inf,0.0,0.0,6890.15,3.888,4.41,1203.52,2551.907,7.29,1450557.895,850.636,inf,inf,inf,275606.0,inf,19686.143,inf,inf,inf,185.468,30622.889,inf,34450.75,137803.0,inf,137803.0,inf,1.0,51.893,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,0.0,,,,0.0,0.0,,0.0,0.0,,0.0,0.0,0.0,0.0,6890.15,3.888,4.41,1203.52,2551.907,7.29,1450557.895,850.636,inf,inf,inf,275606.0,inf,19686.143,inf,inf,inf,185.468,30622.889,inf,34450.75,137803.0,inf,137803.0,1.0,inf,51.893,132.775,0.075,0.085,23.192,49.176,0.14,27952.632,16.392,inf,inf,inf,5311.0,inf,379.357,inf,inf,inf,3.574,590.111,inf,663.875,2655.5,inf,2655.5,0.019,inf,0.019
1,3183,35.0,SALARIED,53617.0,53617.0,MALE,OTHERS,MARRIED,408.0,60,INDIVIDUAL,15499.0,0.42,0,616,1978,0,0,0,0,11,1,5,5,1912,8,0,6,1,0,0,4844,4824,20,5289,0.001,0.001,0.086,0.583,0.002,83.333,0.057,0.018,inf,inf,inf,inf,3.182,35.0,7.0,7.0,0.018,4.375,inf,5.833,35.0,inf,inf,0.007,0.007,1.75,0.007,1531.914,1.0,131.414,893.617,3.459,127659.524,87.041,27.107,inf,inf,inf,inf,4874.273,53617.0,10723.4,10723.4,28.042,6702.125,inf,8936.167,53617.0,inf,inf,11.069,11.115,2680.85,10.137,1531.914,1.0,131.414,893.617,3.459,127659.524,87.041,27.107,inf,inf,inf,inf,4874.273,53617.0,10723.4,10723.4,28.042,6702.125,inf,8936.167,53617.0,inf,inf,11.069,11.115,2680.85,10.137,11.657,0.008,0.008,6.8,0.026,971.429,0.662,0.206,inf,inf,inf,inf,37.091,408.0,81.6,81.6,0.213,51.0,inf,68.0,408.0,inf,inf,0.084,0.085,20.4,0.077,1.714,0.001,0.001,0.147,0.004,142.857,0.097,0.03,inf,inf,inf,inf,5.455,60.0,12.0,12.0,0.031,7.5,inf,10.0,60.0,inf,inf,0.012,0.012,3.0,0.011,442.829,0.289,0.289,37.988,258.317,36902.381,25.161,7.836,inf,inf,inf,inf,1409.0,15499.0,3099.8,3099.8,8.106,1937.375,inf,2583.167,15499.0,inf,inf,3.2,3.213,774.95,2.93,0.012,0.0,0.0,0.001,0.007,0.0,0.001,0.0,inf,inf,inf,inf,0.038,0.42,0.084,0.084,0.0,0.052,inf,0.07,0.42,inf,inf,0.0,0.0,0.021,0.0,17.6,0.011,0.011,1.51,10.267,0.04,1466.667,0.311,inf,inf,inf,inf,56.0,616.0,123.2,123.2,0.322,77.0,inf,102.667,616.0,inf,inf,0.127,0.128,30.8,...,8.0,inf,inf,0.002,0.002,0.4,0.002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.171,0.0,0.0,0.015,0.1,0.0,14.286,0.01,0.003,inf,inf,inf,inf,0.545,6.0,1.2,1.2,0.003,0.75,inf,6.0,inf,inf,0.001,0.001,0.3,0.001,0.029,0.0,0.0,0.002,0.017,0.0,2.381,0.002,0.001,inf,inf,inf,inf,0.091,1.0,0.2,0.2,0.001,0.125,inf,0.167,inf,inf,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,,0.0,0.0,0.0,0.0,138.4,0.09,0.09,11.873,80.733,0.313,11533.333,7.864,2.449,inf,inf,inf,inf,440.364,4844.0,968.8,968.8,2.533,605.5,inf,807.333,4844.0,inf,inf,1.004,242.2,0.916,137.829,0.09,0.09,11.824,80.4,0.311,11485.714,7.831,2.439,inf,inf,inf,inf,438.545,4824.0,964.8,964.8,2.523,603.0,inf,804.0,4824.0,inf,inf,0.996,241.2,0.912,0.571,0.0,0.0,0.049,0.333,0.001,47.619,0.032,0.01,inf,inf,inf,inf,1.818,20.0,4.0,4.0,0.01,2.5,inf,3.333,20.0,inf,inf,0.004,0.004,0.004,151.114,0.099,0.099,12.963,88.15,0.341,12592.857,8.586,2.674,inf,inf,inf,inf,480.818,5289.0,1057.8,1057.8,2.766,661.125,inf,881.5,5289.0,inf,inf,1.092,1.096,264.45


In [23]:
df.replace([np.inf, -np.inf], np.nan, inplace=True)

In [24]:
non_obj = dt.loc[dt[0] != 'object',:].index.tolist()

In [25]:
non_obj.remove('ID')
non_obj.remove('Target')

In [26]:
non_obj_iv = pd.DataFrame()
for i in non_obj:
    temp = df.loc[:,[i,'Target']]
    temp['qcut'] = pd.qcut(temp[i],10,duplicates='drop')
    res = pd.crosstab(temp['qcut'],temp['Target'],normalize='columns').assign(woe=lambda x:np.log(x[0]/x[1])).assign(iv=lambda x:np.sum(x['woe']*(x[0]-x[1])))
    res['name'] = i
    non_obj_iv = pd.concat([non_obj_iv,res])

In [27]:
obj = dt.loc[dt[0] == 'object'][0].index.tolist()

In [28]:
obj_iv = pd.DataFrame()
for i in obj:
    res = pd.crosstab(df[i],df['Target'],normalize='columns').assign(woe=lambda x:np.log(x[0]/x[1])).assign(iv=lambda x:np.sum(x['woe']*(x[0]-x[1])))
    res['name'] = i
    obj_iv = pd.concat([obj_iv,res])

In [29]:
iv = pd.concat([non_obj_iv,obj_iv])

In [30]:
iv.rename(columns={0:'%good',1:'%bad'},inplace=True)

In [30]:
iv.drop(iv[iv['iv'].isin([np.inf,-np.inf])].index,inplace=True)

In [31]:
iv.drop_duplicates(subset='name',inplace=True)

In [32]:
iv.sort_values(by='iv',ascending=False,inplace=True)

In [31]:
iv.head(7)

Target,%good,%bad,woe,iv,name
"(20.999, 28.0]",0.118,0.115,0.022,0.057,Age
"(28.0, 31.0]",0.114,0.092,0.216,0.057,Age
"(31.0, 33.0]",0.093,0.092,0.013,0.057,Age
"(33.0, 35.0]",0.08,0.129,-0.474,0.057,Age
"(35.0, 38.0]",0.131,0.095,0.326,0.057,Age
"(38.0, 40.0]",0.075,0.064,0.151,0.057,Age
"(40.0, 43.0]",0.12,0.129,-0.073,0.057,Age


In [32]:
iv.to_csv('IV_complete.csv')

In [34]:
list(set(iv.loc[iv['iv'] == 0,'name'].values.tolist()))

[]

In [35]:
fin_divs = list(set(iv.loc[iv['iv']>0.3,'name'].values))

In [36]:
og_cols = df_copy.columns

In [37]:
df = df[[*og_cols,*fin_divs]]

In [38]:
df.head(1)

Unnamed: 0,ID,Age,Customer_type,Gross_income,Net_income,SEX,Type_of_industry,Marital_Status,Months_in_city,Months_in_current_job,Org_Type,Bank_balance,Debt_ratio,Target,days_last_enq,days_hl_last_enq,#_enq_3mnths,#_hle_enq_3mnths,#_enq_12mnths,#_hle_enq_12mnths,Tot_enq,Tot_hle_enq,#_defaults_3mnths,#_defaults_12mnths,days_last_acc_open,Tot_accounts,Tot_hle_accounts,Tot_unsecured_loans,Tot_live_loans,Tot_live_homeloans,Tot_live_unsecuredloans,Tot_amt_outstanding,Tot_secured_outstanding_amt,Tot_unsecured_outstanding_amt,Avg_days_in_debt,days_last_acc_open/#_hle_enq_3mnths,#_enq_12mnths/Tot_live_homeloans,#_enq_3mnths/Tot_hle_accounts,Age/Tot_live_homeloans,#_enq_3mnths/#_defaults_3mnths,Tot_enq/#_defaults_3mnths,#_enq_12mnths/Age,Tot_enq/#_hle_enq_3mnths,days_last_enq/Tot_hle_accounts,#_enq_3mnths/Tot_live_homeloans,Tot_unsecured_outstanding_amt/#_defaults_3mnths,#_enq_3mnths/#_hle_enq_3mnths,Tot_enq/Tot_hle_accounts,days_last_acc_open/#_hle_enq_12mnths,#_enq_12mnths/#_defaults_3mnths,#_enq_12mnths.1,Tot_secured_outstanding_amt/Tot_live_homeloans,Tot_live_unsecuredloans/#_defaults_3mnths,Bank_balance/#_defaults_3mnths
0,1794,40.0,SALARIED,70891.0,62491.0,MALE,OTHERS,MARRIED,229.0,108,INDIVIDUAL,37806.0,0.19,0,324,0,0,0,1,0,14,0,0,0,1486,9,0,8,2,0,2,275606,0,275606,5311,,,,,,,0.025,,,,,,,,,1,,,


In [39]:
df.shape

(3885, 54)

In [40]:
fin_nulls = pd.DataFrame(df.isnull().sum().sort_values(ascending=False))

In [41]:
list(fin_nulls.loc[fin_nulls[0]==3335].index)

['Bank_balance/#_defaults_3mnths',
 'Tot_unsecured_outstanding_amt/#_defaults_3mnths',
 'Tot_live_unsecuredloans/#_defaults_3mnths',
 '#_enq_12mnths/#_defaults_3mnths',
 '#_enq_3mnths/#_defaults_3mnths',
 'Tot_enq/#_defaults_3mnths']

In [42]:
df.drop(columns=list(fin_nulls.loc[fin_nulls[0]==3335].index),inplace=True)

In [43]:
df.shape

(3885, 48)

In [44]:
df.fillna(-1,inplace=True)

In [45]:
df.replace([np.inf, -np.inf], np.nan, inplace=True)

In [46]:
df = pd.get_dummies(df)

In [47]:
cols = df.columns.to_list()
cols.remove('ID')
cols.remove('Target')

In [48]:
model_performance = pd.DataFrame()
for i in cols:
    for ii in cols:
        if i != ii:
            x = df.loc[:,[i,ii]]
            y = df.loc[:,'Target']
            x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25)
            model = DecisionTreeClassifier(criterion='entropy')
            model.fit(x_train,y_train)
            pred = model.predict(x_test)
            c = {}
            c['var1'] = [i]
            c['var2'] = [ii]
            c['accuracy'] = [accuracy_score(y_test,pred)]
            model_performance = pd.concat([model_performance,pd.DataFrame.from_dict(c)])

In [49]:
model_performance.to_csv('DecisionTree_check.csv')

In [50]:
iv.to_csv('IV.csv')

## Random Forest

In [51]:
rf_model = RandomForestClassifier(criterion='entropy')

In [52]:
x = df.drop(columns=['ID','Target'])
y = df.Target

In [53]:
rf_model.fit(x,y)

In [54]:
rf_df = pd.DataFrame(rf_model.feature_importances_,index=rf_model.feature_names_in_)

In [55]:
rf_df.to_csv('RandomForest_FeatureImportance.csv')

## Variable Clustering

In [57]:
var_clus_df = df.drop(columns='Target')

In [62]:
var_clus_df.head()

Unnamed: 0,ID,Age,Gross_income,Net_income,Months_in_city,Months_in_current_job,Bank_balance,Debt_ratio,days_last_enq,days_hl_last_enq,#_enq_3mnths,#_hle_enq_3mnths,#_enq_12mnths,#_hle_enq_12mnths,Tot_enq,Tot_hle_enq,#_defaults_3mnths,#_defaults_12mnths,days_last_acc_open,Tot_accounts,Tot_hle_accounts,Tot_unsecured_loans,Tot_live_loans,Tot_live_homeloans,Tot_live_unsecuredloans,Tot_amt_outstanding,Tot_secured_outstanding_amt,Tot_unsecured_outstanding_amt,Avg_days_in_debt,days_last_acc_open/#_hle_enq_3mnths,#_enq_12mnths/Tot_live_homeloans,#_enq_3mnths/Tot_hle_accounts,Age/Tot_live_homeloans,#_enq_12mnths/Age,Tot_enq/#_hle_enq_3mnths,days_last_enq/Tot_hle_accounts,#_enq_3mnths/Tot_live_homeloans,#_enq_3mnths/#_hle_enq_3mnths,Tot_enq/Tot_hle_accounts,days_last_acc_open/#_hle_enq_12mnths,#_enq_12mnths.1,Tot_secured_outstanding_amt/Tot_live_homeloans,Customer_type_NON EARNING,Customer_type_SALARIED,Customer_type_SELF EMPLOYED,SEX_FEMALE,SEX_MALE,Type_of_industry_AGRICULTURE,Type_of_industry_AUTO COMPONENTS,Type_of_industry_AUTOMOTIVE,Type_of_industry_BANKING AND FINANCE,Type_of_industry_CEMENT AND CEMENT PRODUCTS,Type_of_industry_CERAMIC TILES AND SANITARY WARE,Type_of_industry_CHEMICALS,Type_of_industry_COAL,Type_of_industry_COMMUNICATION,Type_of_industry_CONSTRUCTION,Type_of_industry_CONSTRUCTION IRRIGATION PROJECTS,Type_of_industry_CONSTRUCTION REAL ESTATE AND INDUSTRIAL,Type_of_industry_CONSUMER ELECTRONICS,Type_of_industry_CONSUMER PRODUCTS,Type_of_industry_CONTAINERS AND PACKING,Type_of_industry_EDUCATION,Type_of_industry_ELECTRICAL EQUIPMENT AND COMPONENTS,Type_of_industry_ELECTRICAL MACHINERY,Type_of_industry_ELECTRONICS,Type_of_industry_ENGINEERING,Type_of_industry_ENTERTAINMENT AND LEISURE,Type_of_industry_ERW STEEL PIPES,Type_of_industry_FABRICATED METAL PRODUCT,Type_of_industry_FMCG,Type_of_industry_FOOD PRODUCTS,Type_of_industry_GEMS AND JEWELLRY,Type_of_industry_GIFT ITEMS AND HANDICRAFTS,Type_of_industry_GLASS AND GLASS PRODUCTS,Type_of_industry_GOVERNMENT,Type_of_industry_HEALTHCARE AND HEALTHCARE PROVIDERS,Type_of_industry_HOSPITALS AND NURSING HOMES,Type_of_industry_HOTELS AND RESORTS,Type_of_industry_INDUSTRIAL EQUIPMENT,Type_of_industry_INSTITUTIONS AND TRUST,Type_of_industry_INSURANCE,Type_of_industry_IT AND COMMUNICATION,Type_of_industry_IT HARDWARE,Type_of_industry_IT SERVICES,Type_of_industry_LEATHER,Type_of_industry_LIGHT ENGINEERING,Type_of_industry_MACHINE TOOLS,Type_of_industry_MACHINERY AND EQUIPMENT,Type_of_industry_MEDIA,Type_of_industry_MEDIA ADVERTISING,Type_of_industry_MEDIA MOVIES AND ENTERTAINMENT,Type_of_industry_MEDICAL AND OPTICAL INSTRUMENT,Type_of_industry_MEDICAL EQUIPMENT,Type_of_industry_METALS,Type_of_industry_MILK AND DAIRY PRODUCTS,Type_of_industry_NON METAL MINERAL PRODUCT,Type_of_industry_OIL AND GAS ALLIED,Type_of_industry_OTHER MANUFACTURING,Type_of_industry_OTHER SERVICES,Type_of_industry_OTHERS,Type_of_industry_PAINTS,Type_of_industry_PAPER & PULP,Type_of_industry_PETROLEUM PRODUCTS,Type_of_industry_PHARMACEUTICALS BULK DRUGS,Type_of_industry_PHARMACEUTICALS FORMULATIONS,Type_of_industry_PHOTOGRAPHIC EQUIPMENT AND ALLIED PRODUCTS,Type_of_industry_PLASTIC AND PLASTIC PRODUCTS,Type_of_industry_PLASTIC PIPE AND PIPE FITTINGS,Type_of_industry_PLASTIC PRODUCTS,Type_of_industry_PLYWOOD AND LAMINATES,Type_of_industry_POULTRY,Type_of_industry_PRINT MANUFACTURE RECORD MEDIA,Type_of_industry_PRINTING AND PACKAGING,Type_of_industry_PROFESSIONAL SERVICES,Type_of_industry_SECURITY SERVICES,Type_of_industry_STATIONARY AND OTHER SUPPLIES,Type_of_industry_STEEL PIG IRON SPONGE IRON,Type_of_industry_TEA,Type_of_industry_TEXTILE READY MADE GARMENTS,Type_of_industry_TEXTILE WEAVING KNITTING AND PROCESSING,Type_of_industry_TEXTILES,Type_of_industry_TRANSPORT LOGISTICS,Type_of_industry_TRANSPORT OPERATOR,Type_of_industry_TRAVEL AGENTS AND TOUR OPERATORS,Type_of_industry_UTILITY SERVICES,Type_of_industry_WOOD AND WOOD PRODUCTS,Marital_Status_DIVORCED,Marital_Status_MARRIED,Marital_Status_SEPARATED,Marital_Status_SINGLE,Marital_Status_WIDOWED,Org_Type_ASSOCIATION OF PEOPLE,Org_Type_CLOSELY HELD - PUBLIC LIMITED,Org_Type_GOVT (ONLY FOR SALARIED),Org_Type_INDIVIDUAL,Org_Type_LISTED - PUBLIC LIMITED,Org_Type_MNC,Org_Type_PARTNERSHIP,Org_Type_PRIVATE LIMITED,Org_Type_PROPRIETORSHIP,Org_Type_SOCIETY,Org_Type_TRUST
0,1794,40.0,70891.0,62491.0,229.0,108,37806.0,0.19,324,0,0,0,1,0,14,0,0,0,1486,9,0,8,2,0,2,275606,0,275606,5311,-1.0,-1.0,-1.0,-1.0,0.025,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1,-1.0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0
1,3183,35.0,53617.0,53617.0,408.0,60,15499.0,0.42,616,1978,0,0,0,0,11,1,5,5,1912,8,0,6,1,0,0,4844,4824,20,5289,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0,-1.0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0
2,1841,46.0,20000.0,45912.0,552.0,204,15034.0,0.34,688,0,0,0,0,0,3,0,0,0,681,4,2,1,1,1,0,15695,15695,0,5153,-1.0,0.0,0.0,46.0,0.0,-1.0,344.0,0.0,-1.0,1.5,-1.0,0,15695.0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0
3,1115,49.0,40000.0,77440.0,0.0,0,27349.0,0.08,52,52,2,2,3,2,3,2,0,0,5143,1,1,0,1,1,0,480051,480051,0,5143,2571.5,3.0,2.0,49.0,0.061,1.5,52.0,2.0,1.0,3.0,2571.5,3,480051.0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0
4,3125,45.0,36526.0,36526.0,540.0,22,28520.0,0.48,33,0,1,0,1,0,2,0,5,5,3310,2,0,0,1,0,0,2781,2781,0,5093,-1.0,-1.0,-1.0,-1.0,0.022,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1,-1.0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0


In [59]:
var_clus = VarClusHi(var_clus_df)

In [60]:
var_clus.varclus()

ValueError: shapes (145,) and (4,145) not aligned: 145 (dim 0) != 4 (dim 0)

In [63]:
df.shape

(3885, 144)