In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt 
sns.set()
%matplotlib inline

In [2]:
df = pd.read_csv("credit_card_accept.csv")
df.head()

Unnamed: 0,card,reports,age,income,share,expenditure,owner,selfemp,dependents,months,majorcards,active
0,yes,0,37.66667,4.52,0.03327,124.9833,yes,no,3,54,1,12
1,yes,0,33.25,2.42,0.005217,9.854167,no,no,3,34,1,13
2,yes,0,33.66667,4.5,0.004156,15.0,yes,no,4,58,1,5
3,yes,0,30.5,2.54,0.065214,137.8692,no,no,0,25,1,7
4,yes,0,32.16667,9.7867,0.067051,546.5033,yes,no,2,64,1,5


# Data Preprocessing

In [3]:
owner = pd.get_dummies(df['owner'],drop_first=True)
selfemp = pd.get_dummies(df['selfemp'],drop_first=True)
df['dependents'] =   df.dependents.values +1 
df['owner'] = owner
df['selfemp'] = selfemp
X = df
X.head(2)

Unnamed: 0,card,reports,age,income,share,expenditure,owner,selfemp,dependents,months,majorcards,active
0,yes,0,37.66667,4.52,0.03327,124.9833,1,0,4,54,1,12
1,yes,0,33.25,2.42,0.005217,9.854167,0,0,4,34,1,13


In [4]:
features = X.drop('selfemp' , axis = 1)
features = features.drop('owner' , axis = 1)
features = features.drop('dependents' , axis = 1)
features = features.drop('age' , axis = 1)
features.head(2)

Unnamed: 0,card,reports,income,share,expenditure,months,majorcards,active
0,yes,0,4.52,0.03327,124.9833,54,1,12
1,yes,0,2.42,0.005217,9.854167,34,1,13


In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train,X_test , y_train,y_test = train_test_split(features.drop('card' , axis = 1 ) , features['card'] , random_state = 101 ,test_size = 0.3)

In [7]:
X_train.head(2)

Unnamed: 0,reports,income,share,expenditure,months,majorcards,active
545,0,3.0,0.01047,25.5075,3,1,11
635,0,2.5,0.054447,113.1808,8,1,5


# Import ML Module

In [8]:
from sklearn.linear_model import LogisticRegression

In [9]:
logistic  = LogisticRegression()

In [10]:
logistic.fit(X_train , y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [11]:
pred = logistic.predict(X_test)

In [12]:
pred

array(['yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'no', 'yes', 'yes',
       'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'no',
       'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes',
       'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'no',
       'yes', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes',
       'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes',
       'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no', 'yes',
       'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'no', 'no', 'yes',
       'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes',
       'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes',
       'yes', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes',
       'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes',
       'yes', 'yes', 'yes', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no',
       'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes'

In [13]:
print(classification_report(y_test,pred))

NameError: name 'classification_report' is not defined

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_df = pd.DataFrame(confusion_matrix(y_test,pred),
             columns=["Predicted Class " + str(class_name) for class_name in [0,1]],
             index = ["Class " + str(class_name) for class_name in [0,1]])

print(confusion_df)


In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
clf = RandomForestClassifier(min_samples_leaf=5)
clf.fit(X_train , y_train)

# Check Feature Importance

In [None]:
list(zip( X_train ,clf.feature_importances_))

# Data Visualization

In [None]:
sns.boxplot(x="majorcards",y="expenditure",hue = "card",data = features,palette = "PRGn")

In [None]:
plt.scatter(features["share"],features["expenditure"],c = 'red' , s = 50 ) #c is a parameter to pass the color 
plt.xlabel("share")
plt.ylabel("expenditure")

In [None]:
plt.figure(figsize=(20,10))
sns.scatterplot(x = features['months'] , y = features["active"],s=150, hue=features['card'],style="card" , data = features)
#plt.scatter(features["share"],features["expenditure"],  c = 'red' , s = 50 ,) #c is a parameter to pass the color 
#plt.xlabel("share")
#plt.ylabel("expenditure")
plt.show()

In [None]:
plt.figure(figsize=(20,10))
sns.scatterplot(x = features["share"] , y = features["expenditure"],s=150, hue=features['card'],style="card" , data = features)

In [None]:
features[features['card']== 'yes'].groupby(features['share']).count().plot(figsize = (5,5))
plt.legend(bbox_to_anchor =(1,1),loc = 2)

# Correlation

In [None]:
plt.figure(figsize=(20,10))
Var_Corr = features.corr()
sns.heatmap(Var_Corr, xticklabels=Var_Corr.columns, yticklabels=Var_Corr.columns, annot=True)

In [None]:
sns.set(style="darkgrid")

g = sns.jointplot("months", "expenditure", data=features, kind="reg",
                   color="m", height=7)

In [None]:
sns.set(style="darkgrid")
g = sns.jointplot("months", "share", data=features, kind="reg",color="m", height=7)

In [None]:
sns.set(style="whitegrid")
plt.figure(figsize= (10,10) )
sns.palplot(sns.color_palette("muted"))
sns.violinplot(x='months' , y='active' ,hue='card' , data = features ,inner = "point" , scale="count" ,split=True)