In [8]:
# Import Modules
import collections
import pandas as pd
from sklearn.preprocessing import StandardScaler, Imputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import SVC

In [2]:
# Import Dataset
df = pd.read_csv('credit-data.csv')

In [3]:
# Fix Negative values on Age
df.loc[df.age < 0, 'age'] = df.loc[df.age > 0].mean()
df.describe()

Unnamed: 0,clientid,income,age,loan,default
count,2000.0,2000.0,1994.0,2000.0,2000.0
mean,1000.5,45331.600018,40.9277,4444.369695,0.1415
std,577.494589,14326.327119,13.271802,3045.410024,0.348624
min,1.0,20014.48947,18.055189,1.37763,0.0
25%,500.75,32796.459717,29.043284,1939.708847,0.0
50%,1000.5,45789.117313,41.382673,3974.719419,0.0
75%,1500.25,57791.281668,52.6169,6432.410625,0.0
max,2000.0,69995.685578,63.971796,13766.051239,1.0


In [4]:
# Split dataset in Forecasters and Classes
forecasters = df.iloc[:, 1:4].values
classes = df.iloc[:, 4].values
print(forecasters)
print(classes)

[[6.61559251e+04 5.90170151e+01 8.10653213e+03]
 [3.44151540e+04 4.81171531e+01 6.56474502e+03]
 [5.73171701e+04 6.31080495e+01 8.02095330e+03]
 ...
 [4.43114493e+04 2.80171669e+01 5.52278669e+03]
 [4.37560566e+04 6.39717958e+01 1.62272260e+03]
 [6.94365796e+04 5.61526170e+01 7.37883360e+03]]
[0 0 0 ... 1 0 0]


In [5]:
# Fix missing values
imputer = Imputer(missing_values='NaN', strategy='mean', axis=0)
imputer = imputer.fit(forecasters[:, 1:4])
forecasters[:, 1:4] = imputer.transform(forecasters[:, 1:4])
print(forecasters)

[[6.61559251e+04 5.90170151e+01 8.10653213e+03]
 [3.44151540e+04 4.81171531e+01 6.56474502e+03]
 [5.73171701e+04 6.31080495e+01 8.02095330e+03]
 ...
 [4.43114493e+04 2.80171669e+01 5.52278669e+03]
 [4.37560566e+04 6.39717958e+01 1.62272260e+03]
 [6.94365796e+04 5.61526170e+01 7.37883360e+03]]




In [6]:
# Scaling values
scaler = StandardScaler()
forecasters = scaler.fit_transform(forecasters)
print(forecasters)

[[ 1.45393393  1.36538005  1.20281942]
 [-0.76217555  0.54265932  0.69642695]
 [ 0.83682073  1.67417101  1.17471147]
 ...
 [-0.07122592 -0.97448606  0.35420081]
 [-0.11000289  1.73936652 -0.92675625]
 [ 1.682986    1.14917551  0.96381038]]


In [7]:
# Split dataset into Train and Test data
forecasters_train, forecasters_test, classes_train, classes_test = train_test_split(forecasters,
                                                             classes,
                                                             test_size=0.25,
                                                             random_state=0)

In [58]:
# Machine Learning import
# Classifier Creation
estimator = SVC(kernel='rbf', random_state=1, C=2)
estimator.fit(forecasters_train, classes_train)



SVC(C=2, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=1,
  shrinking=True, tol=0.001, verbose=False)

In [59]:
# Predict new Classes
predictions = estimator.predict(forecasters_test)

In [60]:
# Line Base Classifier
counter = collections.Counter(classes_test)
print('Class A: {}\nClass B: {}\n\n\t\t\t\t\tLine Base Classifier: {}%'
      .format(counter[0], counter[1], counter[0]/(counter[0]+counter[1])*100))

Class A: 436
Class B: 64

					Line Base Classifier: 87.2%


In [61]:
# Predict Precision
precision = accuracy_score(classes_test, predictions)
print('Precision: {:.2f}%'.format(precision * 100))

Precision: 98.80%


In [62]:
# Confusion Matrix
matrix = confusion_matrix(classes_test, predictions)
print('Confusion Matrix: \n\t0\t1\n0:\t{}\t{} \n1:\t{}\t{}'
      .format(matrix[0][0], matrix[0][1], matrix[1][0], matrix[1][1]))

Confusion Matrix: 
	0	1
0:	434	2 
1:	4	60


In [42]:
?SVC