# Credit Card Applications
**Predict if a credit card application gets accepted or not**

In [1]:
import pandas as pd

## Data

In [8]:
data = pd.read_csv('data/credit-card-data.csv')
data

Unnamed: 0,card,reports,age,income,share,expenditure,owner,selfemp,dependents,months,majorcards,active
0,yes,0,37.66667,4.5200,0.033270,124.983300,yes,no,3,54,1,12
1,yes,0,33.25000,2.4200,0.005217,9.854167,no,no,3,34,1,13
2,yes,0,33.66667,4.5000,0.004156,15.000000,yes,no,4,58,1,5
3,yes,0,30.50000,2.5400,0.065214,137.869200,no,no,0,25,1,7
4,yes,0,32.16667,9.7867,0.067051,546.503300,yes,no,2,64,1,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1314,yes,0,33.58333,4.5660,0.002146,7.333333,yes,no,0,94,1,19
1315,no,5,23.91667,3.1920,0.000376,0.000000,no,no,3,12,1,5
1316,yes,0,40.58333,4.6000,0.026513,101.298300,yes,no,2,1,1,2
1317,yes,0,32.83333,3.7000,0.008999,26.996670,no,yes,0,60,1,7


In [3]:
print("Total entries:", len(data))

Total entries: 1319


### Summary of Data
- **`card`**: "_yes_" if credit card application accepted, "_no_" if not
- **`reports`**: Number of major derogatory reports
- **`age`**: Age n years plus twelfths of a year
- **`income`**: Yearly income (divided by 10,000)
- **`share`**: Ratio of monthly credit card expenditure to yearly income
- **`expenditure`**: Average monthly credit card expenditure
- **`owner`**: "_yes_" if owns home, "_no_" if rents
- **`selfemp`**: "_yes_" if self-employed, "_no_" if not
- **`dependents`**: 1 + number of dependents
- **`months`**: Months living at current address
- **`majorcards`**: Number of major credit cards held
- **`active`**: Number of active credit accounts

In [9]:
column_list = ['card', 'owner', 'selfemp']
for col in column_list:
    data[col] = data[col].replace({"yes": True, "no": False})

## Model

In [10]:
y = data.card
X = data.drop('card', axis=1)
X.head()

Unnamed: 0,reports,age,income,share,expenditure,owner,selfemp,dependents,months,majorcards,active
0,0,37.66667,4.52,0.03327,124.9833,True,False,3,54,1,12
1,0,33.25,2.42,0.005217,9.854167,False,False,3,34,1,13
2,0,33.66667,4.5,0.004156,15.0,True,False,4,58,1,5
3,0,30.5,2.54,0.065214,137.8692,False,False,0,25,1,7
4,0,32.16667,9.7867,0.067051,546.5033,True,False,2,64,1,5


In [11]:
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

my_pipeline = make_pipeline(RandomForestClassifier(n_estimators=100))
cv_scores = cross_val_score(my_pipeline, X, y, cv=5, scoring='accuracy')

print(f"Cross-validation accuracy: {cv_scores.mean()}")

Cross-validation accuracy: 0.9802886277220878
