# **Predicting Credit Card Approval**

# *Importing Libraries*

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV

import pandas_profiling

import warnings
warnings.filterwarnings('ignore')

# *Loading Dataset*

In [None]:
df = pd.read_csv('Data/crx.data')

# *Basic Data Analysis*

In [None]:
df.profile_report()

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.info()

**The dataset contains both numerical and categorical values and there appears to be no missing values. As credit card information is confidential, all feature names have been anonymised. The probable features in a typical credit card application are Gender, Age, Debt, Married, BankCustomer, EducationLevel, Ethnicity, YearsEmployed, PriorDefault, Employed, CreditScore, DriversLicense, Citizen, ZipCode, Income and finally the ApprovalStatus.**

# *Splitting Dataset*

In [None]:
df_train, df_test = train_test_split(df, test_size=0.3, random_state=42)

# *Preprocessing*

In [None]:
#Converting categorical features to numeric
df_train = pd.get_dummies(df_train)
df_test = pd.get_dummies(df_test)

# Reindex the columns of the test set aligning with the train set
df_test = df_test.reindex(columns=df_train.columns, fill_value=0)

In [None]:
#creating feature and label variables
X_train, y_train = df_train.iloc[:,:-1].values, df_train.iloc[:, [-1]].values
X_test, y_test = df_test.iloc[:,:-1].values, df_test.iloc[:, [-1]].values

In [None]:
#rescaling the values between 0-1
scaler = MinMaxScaler(feature_range=(0,1))
rescaled_X_train = scaler.fit_transform(X_train)
rescaled_X_test = scaler.transform(X_test)

# *Fitting the model*

In [None]:
logreg = LogisticRegression()

In [None]:
logreg.fit(rescaled_X_train, y_train)

In [None]:
y_pred = logreg.predict(rescaled_X_test)

# *Model Accuracy*

In [None]:
acc = logreg.score(rescaled_X_test, y_test)
print('Accuracy of the model is :', acc)

# *Confusion Matrix*

In [None]:
print(confusion_matrix(y_test,y_pred))