In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
cards = pd.read_csv("clean_data_with_id2.csv")

In [3]:
card_features  = cards.drop(["card_name","id","url"],axis=1)

In [4]:
# pd.Series(cards['bank_name'].unique()).to_csv("bank_name.csv",index=False)

In [5]:
card_features.isnull().sum()

bank_name                               0
card_type                               0
interest_rate                          93
max_credit_limit                       63
international_transaction_available     0
balance_transfer_available              0
dual_currency                           0
reward_supplementary_card               0
reward_airport_lounge                   0
reward_cashback_available               0
reward_luxary_resort_hotel              0
reward_insurance_plan                   0
reward_travel_benefits                  0
reward_fine_dining                      0
reward_buffet_discount                  0
reward_medical_discount                 0
reward_shopping                         0
reward_airlines_ticket                  0
reward_point_program                    0
reward_emi_available                    0
dtype: int64

In [6]:
# Boolean Features 

card_boolean_features = card_features.columns[4:]

In [7]:
card_boolean_features

Index(['international_transaction_available', 'balance_transfer_available',
       'dual_currency', 'reward_supplementary_card', 'reward_airport_lounge',
       'reward_cashback_available', 'reward_luxary_resort_hotel',
       'reward_insurance_plan', 'reward_travel_benefits', 'reward_fine_dining',
       'reward_buffet_discount', 'reward_medical_discount', 'reward_shopping',
       'reward_airlines_ticket', 'reward_point_program',
       'reward_emi_available'],
      dtype='object')

# Handle Card Type 

In [8]:
card_features['card_type'] = card_features['card_type'].str.strip()

In [9]:
card_features['card_type'].value_counts()

Credit                   83
Debit                    34
Prepaid                   6
Prepaid International     1
Virtual Prepaid           1
Hajj                      1
Prepaid(Hajj)             1
Dual Currency             1
Credit(Women only)        1
Proprietary               1
Name: card_type, dtype: int64

In [10]:
card_type_dict = {}
for card_type in card_features['card_type'].value_counts().index:
    if card_type in ["Credit","Debit","Prepaid"]:
        card_type_dict[card_type]=card_type
    else:
        card_type_dict[card_type]="Prepaid"

In [11]:
card_features['card_type'] = card_features['card_type'].map(card_type_dict)

In [12]:
card_features['card_type'].value_counts()

Credit     83
Debit      34
Prepaid    13
Name: card_type, dtype: int64

# Handle Interest Rate 

In [13]:
card_features['interest_rate'] = card_features['interest_rate'].fillna(-10).astype('float')

interest_rate_conditions = [
    (card_features['interest_rate'] == 0),
    (card_features['interest_rate'] == -10),
    (card_features['interest_rate'] >=1) & (card_features['interest_rate'] <=10),
    (card_features['interest_rate'] >=11) & (card_features['interest_rate'] <=20),
    (card_features['interest_rate'] >=21) & (card_features['interest_rate'] <=30),]
    

interest_rate_choices = ['zero','1-10','1-10','11-20','21-30']

In [14]:
card_features['interest_rate'] = np.select(interest_rate_conditions,interest_rate_choices)

In [15]:
card_features['interest_rate'].replace('0','zero',inplace=True)

# Handle Max Credit Limit 

In [16]:
card_features['max_credit_limit'] = card_features['max_credit_limit'].fillna(-10).astype('float')

In [17]:
credit_limit_conditions = [
    (card_features['max_credit_limit'] == -10),
    (card_features['max_credit_limit']<=100000),
    (card_features['max_credit_limit']>100000) & (card_features['max_credit_limit']<500000),
    (card_features['max_credit_limit']>500000) & (card_features['max_credit_limit']<1000000),
    (card_features['max_credit_limit']>1000000),
    ]

credit_limit_choices = ['unknown','0-100000','100000-500000','500000-1000000','1000000+']
card_features['max_credit_limit'] = np.select(credit_limit_conditions,credit_limit_choices)

In [18]:
card_features['hajj_card'] = cards['card_name'].str.contains('Hajj')
card_features['is_visa'] = (cards['card_name'].str.contains('Visa')) | (cards['card_name'].str.contains('visa'))
card_features['is_mastercard'] = (cards['card_name'].str.contains('MasterCard')) | (cards['card_name'].str.contains('mastercard'))

In [19]:
card_features['max_credit_limit'].replace('0','N/A',inplace=True)

In [20]:
card_features = pd.get_dummies(card_features)

In [21]:
card_features.columns

Index(['international_transaction_available', 'balance_transfer_available',
       'dual_currency', 'reward_supplementary_card', 'reward_airport_lounge',
       'reward_cashback_available', 'reward_luxary_resort_hotel',
       'reward_insurance_plan', 'reward_travel_benefits', 'reward_fine_dining',
       'reward_buffet_discount', 'reward_medical_discount', 'reward_shopping',
       'reward_airlines_ticket', 'reward_point_program',
       'reward_emi_available', 'hajj_card', 'is_visa', 'is_mastercard',
       'bank_name_AB Bank', 'bank_name_Agrani Bank Limited',
       'bank_name_BRAC Bank Limited', 'bank_name_Bangladesh Krishi Bank',
       'bank_name_Bank Asia Limited', 'bank_name_City Bank Limited',
       'bank_name_Dhaka Bank', 'bank_name_Dutch Bangla Bank Limited',
       'bank_name_IFIC Bank Limited', 'bank_name_Jamuna Bank Limited',
       'bank_name_Janata Bank Limited', 'bank_name_Meghna Bank',
       'bank_name_Midland Bank', 'bank_name_Modhumoti Bank',
       'bank_name_Mut

# Unsupervised Model Fitting 

In [22]:
from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors(n_neighbors=5) 

In [23]:
neigh.fit(card_features)

NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=1, n_neighbors=5, p=2, radius=1.0)

In [24]:
query_reshape = len(card_features.columns)

In [25]:
from sklearn.externals import joblib


In [26]:
joblib.dump(neigh,'knn.pickle')

['knn.pickle']

In [None]:
example_query = {}