# **Predicting Credit Card Approval**

# *Importing Libraries*

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV

import pandas_profiling

import warnings
warnings.filterwarnings('ignore')

# *Loading Dataset*

In [2]:
df = pd.read_csv('../input/crx-uci-ml-repository/crx.data')

# *Basic Data Analysis*

In [3]:
df.profile_report()

Summarize dataset:   0%|          | 0/29 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



In [4]:
df.head()

Unnamed: 0,b,30.83,0,u,g,w,v,1.25,t,t.1,01,f,g.1,00202,0.1,+
0,a,58.67,4.46,u,g,q,h,3.04,t,t,6,f,g,43,560,+
1,a,24.5,0.5,u,g,q,h,1.5,t,f,0,f,g,280,824,+
2,b,27.83,1.54,u,g,w,v,3.75,t,t,5,t,g,100,3,+
3,b,20.17,5.625,u,g,w,v,1.71,t,f,0,f,s,120,0,+
4,b,32.08,4.0,u,g,m,v,2.5,t,f,0,t,g,360,0,+


In [5]:
df.describe()

Unnamed: 0,0,1.25,01,0.1
count,689.0,689.0,689.0,689.0
mean,4.765631,2.224819,2.402032,1018.862119
std,4.97847,3.348739,4.86618,5213.743149
min,0.0,0.0,0.0,0.0
25%,1.0,0.165,0.0,0.0
50%,2.75,1.0,0.0,5.0
75%,7.25,2.625,3.0,396.0
max,28.0,28.5,67.0,100000.0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 689 entries, 0 to 688
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   b       689 non-null    object 
 1   30.83   689 non-null    object 
 2   0       689 non-null    float64
 3   u       689 non-null    object 
 4   g       689 non-null    object 
 5   w       689 non-null    object 
 6   v       689 non-null    object 
 7   1.25    689 non-null    float64
 8   t       689 non-null    object 
 9   t.1     689 non-null    object 
 10  01      689 non-null    int64  
 11  f       689 non-null    object 
 12  g.1     689 non-null    object 
 13  00202   689 non-null    object 
 14  0.1     689 non-null    int64  
 15  +       689 non-null    object 
dtypes: float64(2), int64(2), object(12)
memory usage: 86.2+ KB


**The dataset contains both numerical and categorical values and there appears to be no missing values. As credit card information is confidential, all feature names have been anonymised. The probable features in a typical credit card application are Gender, Age, Debt, Married, BankCustomer, EducationLevel, Ethnicity, YearsEmployed, PriorDefault, Employed, CreditScore, DriversLicense, Citizen, ZipCode, Income and finally the ApprovalStatus.**

# *Splitting Dataset*

In [7]:
df_train, df_test = train_test_split(df, test_size=0.3, random_state=42)

# *Preprocessing*

In [8]:
#Converting categorical features to numeric
df_train = pd.get_dummies(df_train)
df_test = pd.get_dummies(df_test)

# Reindex the columns of the test set aligning with the train set
df_test = df_test.reindex(columns=df_train.columns, fill_value=0)

In [9]:
#creating feature and label variables
X_train, y_train = df_train.iloc[:,:-1].values, df_train.iloc[:, [-1]].values
X_test, y_test = df_test.iloc[:,:-1].values, df_test.iloc[:, [-1]].values

In [10]:
#rescaling the values between 0-1
scaler = MinMaxScaler(feature_range=(0,1))
rescaled_X_train = scaler.fit_transform(X_train)
rescaled_X_test = scaler.transform(X_test)

# *Fitting the model*

In [11]:
logreg = LogisticRegression()

In [12]:
logreg.fit(rescaled_X_train, y_train)

LogisticRegression()

In [13]:
y_pred = logreg.predict(rescaled_X_test)

# *Model Accuracy*

In [14]:
acc = logreg.score(rescaled_X_test, y_test)
print('Accuracy of the model is :', acc)

Accuracy of the model is : 1.0


# *Confusion Matrix*

In [15]:
print(confusion_matrix(y_test,y_pred))

[[ 94   0]
 [  0 113]]
