In [1]:
import pandas as pd
import numpy as np
import math
import argparse
import joblib
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.metrics import confusion_matrix

In [2]:
def overall_scorer(type=0):
    ''' create scorer
    Args:
        type: 0=svc(binary),
    Returns:
        function: scorer(clf, X, y)
    '''
    # def binary_classification_scorer(clf, X, y):
    #     y_pred = clf.predict(X)
    #     cm = confusion_matrix(y, y_pred)
    #     y_score = clf.decision_function(X)
    #     auc = roc_auc_score(y, y_score)
    #     return {'tn': cm[0, 0], 'fp': cm[0, 1],'fn': cm[1, 0], 'tp': cm[1, 1], 'auc': auc}

    def binary_classification_scorer(clf, X, y):
        y_pred = clf.predict(X)
        cm = confusion_matrix(y, y_pred)
        tn, fp, fn, tp = cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1]
        accuracy = (tp+tn)/(tn+fp+fn+tp)
        precision = tp/(tp+fp)
        sensitivity = tp/(tp+fn)
        specificity = tn/(tn+fp)
        PPV = tp / (tp+fp)
        NPV = tn / (fn+tn)
        MCC = ((tp*tn)-(fp*fn))/math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))

        y_score = clf.decision_function(X)
        auc = roc_auc_score(y, y_score)
        results = {
            'tn':tn, 
            'fp':fp, 
            'fn':fn, 
            'tp':tp,
            'auc':auc, 
            'accuracy':accuracy, 
            'precision':precision, 
            'sensitivity':sensitivity, 
            'specificity':specificity, 
            'MCC':MCC, 
            'PPV':PPV, 
            "NPV":NPV,
        }
        return results

    if type==0: return binary_classification_scorer
    # elif type==1: return binary_classification_scorer

從這邊修改設定與輸入檔案:
- 測試資料
- 模型
- 挑選特徵
- 輸出檔案
- scaler
---
HW1 只要修改：
- 模型
- 挑選特徵
- 輸出檔案

**如果有人用 random forest 的模型先讓 ```auc = None```

In [3]:
problems = [0,1,2,3,4,5,6]

raw_df = pd.read_csv("./hw1/dialysis-binary-14-ind-xday.csv")

model = joblib.load("./hw1/dialysis-binary-svc.model")

features = pd.read_csv("./hw1/dialysis-binary-features.csv")
features = list(features.columns)

outfile = "./hw1/dialysis-binary-svc-results.csv"

# if standardization
scaler = joblib.load("./hw1/dialysis-binary-std.bin")

problem_type = 0 # 0: binary classification

In [4]:
# Features number
features_num = len(features)

Predictions

In [5]:
X = raw_df.drop(columns=['label'])
if scaler: X = pd.DataFrame(scaler.transform(X), columns=X.columns) # Scale
X = X[features] # feature selection
labels = raw_df.label
preds = model.predict(X)

In [6]:
results = overall_scorer(0)(model, X, labels) # type 0 = binary classification

In [7]:
results_df = pd.DataFrame.from_dict(overall_scorer(0)(model, X, labels), orient='index').T

In [8]:
if outfile: 
    results_df.fillna("").to_csv(outfile, index=False)
    print(f"Save results:{outfile}")

Save results:./hw1/dialysis-binary-svc-results.csv
