In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score

In [None]:
df1 = pd.read_csv('actives.csv')
df2 = pd.read_csv('inactives.csv')
df_total = pd.concat([df1,df2],axis=0)
df_total.to_csv('total.csv',index=False)

# split the dataset and normalization

In [2]:
df = pd.read_csv('total.csv', delimiter=',')
data = df.iloc[:, 1:47]
target = df.iloc[:, -1]
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=22, shuffle=True)
transfer = MinMaxScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)

from collections import Counter
print(Counter(y_train))
print(Counter(y_test))

Counter({0: 3334, 1: 62})
Counter({0: 833, 1: 16})


# PCA treatment

In [3]:
transfer = PCA(n_components=8)
x_train_PCA = transfer.fit_transform(x_train)
x_test_PCA = transfer.transform(x_test)
# print(x_train_PCA.shape, x_test_PCA.shape)
# a = transfer.explained_variance_ratio_
# print(sum(list(a)))

# SVC

In [5]:
# with class weight and GridSearchCV
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [6]:
# defining parameter range
estimator = SVC(class_weight='balanced')
param_grid = {'C': [0.01, 0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf','sigmoid','poly','linear']}
grid = GridSearchCV(estimator, param_grid, scoring='roc_auc', refit = True, verbose = 3)
 
# fitting the model for grid search
grid.fit(x_train_PCA, y_train)
y_pred = grid.predict(x_test_PCA)

# print best parameter and how our model looks after hyper-parameter tuning
print(grid.best_params_)
print(grid.best_estimator_)


Fitting 5 folds for each of 120 candidates, totalling 600 fits
[CV 1/5] END .......C=0.01, gamma=1, kernel=rbf;, score=0.687 total time=   0.4s
[CV 2/5] END .......C=0.01, gamma=1, kernel=rbf;, score=0.631 total time=   0.4s
[CV 3/5] END .......C=0.01, gamma=1, kernel=rbf;, score=0.814 total time=   0.4s
[CV 4/5] END .......C=0.01, gamma=1, kernel=rbf;, score=0.637 total time=   0.3s
[CV 5/5] END .......C=0.01, gamma=1, kernel=rbf;, score=0.755 total time=   0.4s
[CV 1/5] END ...C=0.01, gamma=1, kernel=sigmoid;, score=0.587 total time=   0.3s
[CV 2/5] END ...C=0.01, gamma=1, kernel=sigmoid;, score=0.560 total time=   0.3s
[CV 3/5] END ...C=0.01, gamma=1, kernel=sigmoid;, score=0.734 total time=   0.3s
[CV 4/5] END ...C=0.01, gamma=1, kernel=sigmoid;, score=0.560 total time=   0.3s
[CV 5/5] END ...C=0.01, gamma=1, kernel=sigmoid;, score=0.729 total time=   0.3s
[CV 1/5] END ......C=0.01, gamma=1, kernel=poly;, score=0.635 total time=   0.2s
[CV 2/5] END ......C=0.01, gamma=1, kernel=pol

[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.677 total time=   0.2s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.847 total time=   0.3s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.705 total time=   0.2s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.745 total time=   0.3s
[CV 1/5] END ....C=0.1, gamma=1, kernel=sigmoid;, score=0.624 total time=   0.2s
[CV 2/5] END ....C=0.1, gamma=1, kernel=sigmoid;, score=0.542 total time=   0.3s
[CV 3/5] END ....C=0.1, gamma=1, kernel=sigmoid;, score=0.595 total time=   0.2s
[CV 4/5] END ....C=0.1, gamma=1, kernel=sigmoid;, score=0.534 total time=   0.3s
[CV 5/5] END ....C=0.1, gamma=1, kernel=sigmoid;, score=0.549 total time=   0.3s
[CV 1/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.689 total time=   0.1s
[CV 2/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.620 total time=   0.1s
[CV 3/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.769 total time=   0.1s
[CV 4/5] END .......C=0.1, g

[CV 4/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.691 total time=   0.1s
[CV 5/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.762 total time=   0.1s
[CV 1/5] END ......C=1, gamma=1, kernel=sigmoid;, score=0.715 total time=   0.1s
[CV 2/5] END ......C=1, gamma=1, kernel=sigmoid;, score=0.701 total time=   0.1s
[CV 3/5] END ......C=1, gamma=1, kernel=sigmoid;, score=0.591 total time=   0.1s
[CV 4/5] END ......C=1, gamma=1, kernel=sigmoid;, score=0.489 total time=   0.2s
[CV 5/5] END ......C=1, gamma=1, kernel=sigmoid;, score=0.540 total time=   0.2s
[CV 1/5] END .........C=1, gamma=1, kernel=poly;, score=0.721 total time=   0.1s
[CV 2/5] END .........C=1, gamma=1, kernel=poly;, score=0.669 total time=   0.1s
[CV 3/5] END .........C=1, gamma=1, kernel=poly;, score=0.787 total time=   0.1s
[CV 4/5] END .........C=1, gamma=1, kernel=poly;, score=0.665 total time=   0.1s
[CV 5/5] END .........C=1, gamma=1, kernel=poly;, score=0.735 total time=   0.1s
[CV 1/5] END .......C=1, gam

[CV 1/5] END .....C=10, gamma=1, kernel=sigmoid;, score=0.708 total time=   0.1s
[CV 2/5] END .....C=10, gamma=1, kernel=sigmoid;, score=0.702 total time=   0.1s
[CV 3/5] END .....C=10, gamma=1, kernel=sigmoid;, score=0.583 total time=   0.1s
[CV 4/5] END .....C=10, gamma=1, kernel=sigmoid;, score=0.388 total time=   0.0s
[CV 5/5] END .....C=10, gamma=1, kernel=sigmoid;, score=0.532 total time=   0.2s
[CV 1/5] END ........C=10, gamma=1, kernel=poly;, score=0.753 total time=   0.2s
[CV 2/5] END ........C=10, gamma=1, kernel=poly;, score=0.687 total time=   0.1s
[CV 3/5] END ........C=10, gamma=1, kernel=poly;, score=0.751 total time=   0.2s
[CV 4/5] END ........C=10, gamma=1, kernel=poly;, score=0.588 total time=   0.1s
[CV 5/5] END ........C=10, gamma=1, kernel=poly;, score=0.748 total time=   0.1s
[CV 1/5] END ......C=10, gamma=1, kernel=linear;, score=0.604 total time=   0.2s
[CV 2/5] END ......C=10, gamma=1, kernel=linear;, score=0.563 total time=   0.2s
[CV 3/5] END ......C=10, gam

[CV 2/5] END ....C=100, gamma=1, kernel=sigmoid;, score=0.702 total time=   0.1s
[CV 3/5] END ....C=100, gamma=1, kernel=sigmoid;, score=0.580 total time=   0.1s
[CV 4/5] END ....C=100, gamma=1, kernel=sigmoid;, score=0.389 total time=   0.0s
[CV 5/5] END ....C=100, gamma=1, kernel=sigmoid;, score=0.523 total time=   0.1s
[CV 1/5] END .......C=100, gamma=1, kernel=poly;, score=0.774 total time=   0.5s
[CV 2/5] END .......C=100, gamma=1, kernel=poly;, score=0.701 total time=   0.6s
[CV 3/5] END .......C=100, gamma=1, kernel=poly;, score=0.750 total time=   0.6s
[CV 4/5] END .......C=100, gamma=1, kernel=poly;, score=0.596 total time=   0.4s
[CV 5/5] END .......C=100, gamma=1, kernel=poly;, score=0.778 total time=   0.5s
[CV 1/5] END .....C=100, gamma=1, kernel=linear;, score=0.604 total time=   0.6s
[CV 2/5] END .....C=100, gamma=1, kernel=linear;, score=0.563 total time=   0.6s
[CV 3/5] END .....C=100, gamma=1, kernel=linear;, score=0.710 total time=   0.7s
[CV 4/5] END .....C=100, gam

[CV 4/5] END ...C=1000, gamma=1, kernel=sigmoid;, score=0.389 total time=   0.0s
[CV 5/5] END ...C=1000, gamma=1, kernel=sigmoid;, score=0.523 total time=   0.1s
[CV 1/5] END ......C=1000, gamma=1, kernel=poly;, score=0.774 total time=   3.0s
[CV 2/5] END ......C=1000, gamma=1, kernel=poly;, score=0.722 total time=   4.2s
[CV 3/5] END ......C=1000, gamma=1, kernel=poly;, score=0.758 total time=   5.4s
[CV 4/5] END ......C=1000, gamma=1, kernel=poly;, score=0.612 total time=   4.6s
[CV 5/5] END ......C=1000, gamma=1, kernel=poly;, score=0.781 total time=   2.7s
[CV 1/5] END ....C=1000, gamma=1, kernel=linear;, score=0.604 total time=   3.8s
[CV 2/5] END ....C=1000, gamma=1, kernel=linear;, score=0.563 total time=   3.6s
[CV 3/5] END ....C=1000, gamma=1, kernel=linear;, score=0.711 total time=   3.3s
[CV 4/5] END ....C=1000, gamma=1, kernel=linear;, score=0.616 total time=   3.5s
[CV 5/5] END ....C=1000, gamma=1, kernel=linear;, score=0.588 total time=   3.8s
[CV 1/5] END .....C=1000, ga

In [None]:
# model evaluation
print('The accuracy is:', grid.score(x_test_PCA, y_test))
score = classification_report(y_test, y_pred, labels=(0, 1), target_names=("inactives", "actives"))  
print(score)
print('ROC AUC score:', roc_auc_score(y_test, y_pred))

The accuracy is: 0.6311524609843938
              precision    recall  f1-score   support

   inactives       0.99      0.97      0.98       833
     actives       0.23      0.44      0.30        16

    accuracy                           0.96       849
   macro avg       0.61      0.70      0.64       849
weighted avg       0.97      0.96      0.97       849

ROC AUC score： 0.7049444777911165
