##**Mounting Drive**

In [None]:
from google.colab import drive
import os
drive.mount('/content/drive', force_remount=True)
os.chdir('/content/drive/MyDrive/Personal_Projects')

Mounted at /content/drive


##**Support Vector Machine**

In [None]:
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
import pandas as pd
import numpy as np

**Pre-Processing**

In [None]:
data = pd.read_csv("Credit_Scoring.csv")
data.head()

Unnamed: 0,Status,Seniority,Home,Time,Age,Marital,Records,Job,Expenses,Income,Assets,Debt,Amount,Price,Finrat,Savings
0,good,9,rent,60,30,married,no_rec,freelance,73,129,0,0,800,846,94.562648,4.2
1,good,17,rent,60,58,widow,no_rec,fixed,48,131,0,0,1000,1658,60.313631,4.98
2,bad,10,owner,36,46,married,yes_rec,freelance,90,200,3000,0,2000,2985,67.001675,1.98
3,good,0,rent,60,24,single,no_rec,fixed,63,182,2500,0,900,1325,67.924528,7.933333
4,good,0,rent,36,26,single,no_rec,fixed,46,107,0,0,310,910,34.065934,7.083871


In [None]:
# Checking if any Missing Values are there in the Dataset
data.isnull().any()

Status       False
Seniority    False
Home         False
Time         False
Age          False
Marital      False
Records      False
Job          False
Expenses     False
Income       False
Assets       False
Debt         False
Amount       False
Price        False
Finrat       False
Savings      False
dtype: bool

In [None]:
# Checking if any categorical Features are there in the Dataset
categorical_data = data.select_dtypes(exclude=[np.number])
print ("There are {} categorical Columns in Dataset".format(categorical_data.shape[1]))

There are 5 categorical Columns in Dataset


In [None]:
# Name of all the Categorical Features Present in the Dataset
categorical_data.any()

Status     True
Home       True
Marital    True
Records    True
Job        True
dtype: bool

In [None]:
encoder =  LabelEncoder()
data['Status'] = encoder.fit_transform(data['Status'].astype('str'))
data['Home'] = encoder.fit_transform(data['Home'].astype('str'))
data['Marital'] = encoder.fit_transform(data['Marital'].astype('str'))
data['Job'] = encoder.fit_transform(data['Job'].astype('str'))
data['Records'] = encoder.fit_transform(data['Records'].astype('str'))

In [None]:
data.head()

Unnamed: 0,Status,Seniority,Home,Time,Age,Marital,Records,Job,Expenses,Income,Assets,Debt,Amount,Price,Finrat,Savings
0,1,9,5,60,30,1,0,1,73,129,0,0,800,846,94.562648,4.2
1,1,17,5,60,58,4,0,0,48,131,0,0,1000,1658,60.313631,4.98
2,0,10,2,36,46,1,1,1,90,200,3000,0,2000,2985,67.001675,1.98
3,1,0,5,60,24,3,0,0,63,182,2500,0,900,1325,67.924528,7.933333
4,1,0,5,36,26,3,0,0,46,107,0,0,310,910,34.065934,7.083871


In [None]:
#Checking Data Types of the Features for Confirmation
data.dtypes

Status         int64
Seniority      int64
Home           int64
Time           int64
Age            int64
Marital        int64
Records        int64
Job            int64
Expenses       int64
Income         int64
Assets         int64
Debt           int64
Amount         int64
Price          int64
Finrat       float64
Savings      float64
dtype: object

In [None]:
# Value Counts of 'GOOD' Status and 'BAD' Status
# 'GOOD': 1 and 'BAD': 0
data.Status.value_counts()

1    3197
0    1249
Name: Status, dtype: int64

In [None]:
X = data.iloc[:,1:].values
y = data.iloc[:,0].values
Y = pd.get_dummies(y).values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, shuffle=True)

In [None]:
X_train.size, X_test.size, y_train.size, y_test.size, data.size 

(53340, 13350, 3556, 890, 71136)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

**Using The SVM Classifier**

In [None]:
svc = SVC(kernel='rbf', gamma='auto')
svc.fit(X_train, y_train) 
y_pred = svc.predict(X_test) 

In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.73      0.49      0.59       252
           1       0.82      0.93      0.87       638

    accuracy                           0.80       890
   macro avg       0.77      0.71      0.73       890
weighted avg       0.79      0.80      0.79       890



In [None]:
confusion_matrix(y_test, y_pred)

array([[124, 128],
       [ 47, 591]])

In [None]:
round(accuracy_score(y_test, y_pred), 2)

0.8

In [None]:
round(average_precision_score(y_test, y_pred), 2)

0.81



---



In [None]:
sgdc = SGDClassifier(max_iter=1000, tol=1e-3)
sgdc.fit(X_train, y_train) 
y_pred = sgdc.predict(X_test) 

In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.67      0.37      0.48       252
           1       0.79      0.93      0.85       638

    accuracy                           0.77       890
   macro avg       0.73      0.65      0.67       890
weighted avg       0.75      0.77      0.75       890



In [None]:
confusion_matrix(y_test, y_pred)

array([[ 94, 158],
       [ 47, 591]])

In [None]:
round(accuracy_score(y_test, y_pred), 2)

0.77

In [None]:
round(average_precision_score(y_test, y_pred), 2)

0.78



---



In [None]:
lsvc = LinearSVC(random_state=0, tol=1e-5, max_iter=30000)
lsvc.fit(X_train, y_train) 
y_pred = lsvc.predict(X_test) 

In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.73      0.48      0.58       252
           1       0.82      0.93      0.87       638

    accuracy                           0.80       890
   macro avg       0.77      0.71      0.73       890
weighted avg       0.79      0.80      0.79       890



In [None]:
confusion_matrix(y_test, y_pred)

array([[122, 130],
       [ 46, 592]])

In [None]:
round(accuracy_score(y_test, y_pred), 2)

0.8

In [None]:
round(average_precision_score(y_test, y_pred), 2)

0.81