In [1]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split 
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures # for polynomial features
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.model_selection import GridSearchCV


In [2]:
df = pd.read_csv(r"..\Cases\Bankruptcy\Bankruptcy.csv")
df = df.drop('NO', axis=1)

In [3]:
y = df['D']
X = df.drop('D', axis=1)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=2021,stratify=y)

In [5]:
from sklearn.svm import SVC

svc = SVC(C=0.5, kernel='linear')

svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)

In [6]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss, roc_auc_score

print(accuracy_score(y_test, y_pred))

0.725


In [7]:
from sklearn.model_selection import StratifiedKFold


kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

params = {'C': [0.1,0.5, 1, 1.5, 2, 3]}

gcv = GridSearchCV(svc, param_grid=params, cv=kfold)

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'C': 0.1}
0.8025641025641026


In [8]:
params = {'C': np.linspace(0.001, 5, 10)}

gcv = GridSearchCV(svc, param_grid=params, cv=kfold)

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'C': 2.2227777777777775}
0.8022792022792024


In [9]:
params = {'C': [0.1,0.5, 1, 1.5, 2, 3]}

gcv = GridSearchCV(svc, param_grid=params, cv=kfold)

gcv.fit(X, y)
pd_cv = pd.DataFrame(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)
print(pd_cv)

{'C': 0.1}
0.8025641025641026
   mean_fit_time  std_fit_time  mean_score_time  std_score_time param_C  \
0       0.005526      0.005129         0.001095        0.001351     0.1   
1       0.009375      0.012501         0.006261        0.007668     0.5   
2       0.015617      0.009882         0.000000        0.000000       1   
3       0.021500      0.009165         0.000688        0.000860     1.5   
4       0.025002      0.007656         0.000000        0.000000       2   
5       0.040628      0.021177         0.000000        0.000000       3   

       params  split0_test_score  split1_test_score  split2_test_score  \
0  {'C': 0.1}           0.814815           0.851852           0.653846   
1  {'C': 0.5}           0.814815           0.851852           0.692308   
2    {'C': 1}           0.814815           0.851852           0.730769   
3  {'C': 1.5}           0.814815           0.888889           0.730769   
4    {'C': 2}           0.814815           0.851852           0.730769   


In [10]:
svc = SVC(C=0.5, kernel='linear', probability=True, random_state=24)

svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)

y_pred = svc.predict(X_test)

y_pred_prob = svc.predict_proba(X_test)

print(accuracy_score(y_test, y_pred))
print(roc_auc_score(y_test,y_pred_prob[:,1]))
print(log_loss(y_test, y_pred_prob))

0.725
0.835
0.4791806924721035


In [11]:
params = {'C': [0.1,0.5, 1, 1.5, 2, 3]}

gcv = GridSearchCV(svc, param_grid=params, cv=kfold, scoring='neg_log_loss')

gcv.fit(X, y)
pd_cv = pd.DataFrame(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)
# print(pd_cv)

{'C': 0.5}
-0.4714746924375984


In [12]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import StratifiedKFold
import numpy as np

kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='linear', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 30),
    # 'SVC__C': [1,2,3,4,5,6,7,8,9,10], 8
    # 'SVC__C': [0.1,1,1.5,2,3],  0.1
          'SCL': [None, std_scaler, mm_scaler]
          }

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss'
                    )

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(kernel='linear', probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(kernel='linear', probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'linear', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
{'SCL': None, 'SVC__C': 0.17337931034482756}
-0.4618964678685343


In [13]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import StratifiedKFold
import numpy as np

kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='poly', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 20),
          'SCL': [None, std_scaler, mm_scaler],
          'SVC__degree':[2,3],
          'SVC__coef0':np.linspace(0,3,5)
          }

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss',verbose=2
                    )

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(kernel='poly', probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(kernel='poly', probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'poly', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
Fitting 5 folds for each of 600 candidates, totalling 3000 fits
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC__degree=2; total time=   0.0s
[CV] END SCL=None, SVC__C=0.001, SVC__coef0=0.0, SVC

In [14]:
kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='rbf', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 20),
          'SCL': [None, std_scaler, mm_scaler],
          'SVC__gamma':np.linspace(0.001,5,5)
          }

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss',verbose=1
                    )

gcv.fit(X, y)
pd_cv = pd.DataFrame(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'rbf', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
Fitting 5 folds for each of 300 candidates, totalling 1500 fits
{'SCL': MinMaxScaler(), 'SVC__C': 0.26410526315789473, 'SVC__gamma': 1.2507499999999998}
-0.4809688740028147


In [15]:
kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
svc = SVC( kernel='sigmoid', probability=True, random_state=24)

pipe = Pipeline([('SCL', None), ('SVC', svc)])

print(pipe.get_params())
params = {
    'SVC__C': np.linspace(0.001, 5, 20),
          'SCL': [None, std_scaler, mm_scaler],
          'SVC__gamma':np.linspace(0.001,5,5)
          }

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=24)

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold,
                    scoring='neg_log_loss',verbose=1
                    )

gcv.fit(X, y)
pd_cv = pd.DataFrame(gcv.cv_results_)
print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('SVC', SVC(kernel='sigmoid', probability=True, random_state=24))], 'verbose': False, 'SCL': None, 'SVC': SVC(kernel='sigmoid', probability=True, random_state=24), 'SVC__C': 1.0, 'SVC__break_ties': False, 'SVC__cache_size': 200, 'SVC__class_weight': None, 'SVC__coef0': 0.0, 'SVC__decision_function_shape': 'ovr', 'SVC__degree': 3, 'SVC__gamma': 'scale', 'SVC__kernel': 'sigmoid', 'SVC__max_iter': -1, 'SVC__probability': True, 'SVC__random_state': 24, 'SVC__shrinking': True, 'SVC__tol': 0.001, 'SVC__verbose': False}
Fitting 5 folds for each of 300 candidates, totalling 1500 fits
{'SCL': StandardScaler(), 'SVC__C': 0.5272105263157895, 'SVC__gamma': 5.0}
-0.49644194298996636
