In [1]:
import pandas as pd
import numpy as np
import math
import argparse
import joblib
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.metrics import confusion_matrix, r2_score, mean_absolute_error, mean_squared_error, max_error

In [2]:
def overall_scorer(type=0):
    ''' create scorer
    Args:
        type: 0=svc(binary),
    Returns:
        function: scorer(clf, X, y)
    '''
    # def binary_classification_scorer(clf, X, y):
    #     y_pred = clf.predict(X)
    #     cm = confusion_matrix(y, y_pred)
    #     y_score = clf.decision_function(X)
    #     auc = roc_auc_score(y, y_score)
    #     return {'tn': cm[0, 0], 'fp': cm[0, 1],'fn': cm[1, 0], 'tp': cm[1, 1], 'auc': auc}

    def binary_classification_scorer(clf, X, y):
        y_pred = clf.predict(X)
        cm = confusion_matrix(y, y_pred)
        tn, fp, fn, tp = cm[0, 0], cm[0, 1], cm[1, 0], cm[1, 1]
        accuracy = (tp+tn)/(tn+fp+fn+tp)
        precision = tp/(tp+fp)
        sensitivity = tp/(tp+fn)
        specificity = tn/(tn+fp)
        PPV = tp / (tp+fp)
        NPV = tn / (fn+tn)
        MCC = ((tp*tn)-(fp*fn))/math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))

        try:
            y_score = clf.decision_function(X)
            auc = roc_auc_score(y, y_score)
        except Exception as e:
            y_score = clf.predict_proba(X)
            auc = roc_auc_score(y, y_score[:, 1])
        
        results = {
            'tn':tn, 
            'fp':fp, 
            'fn':fn, 
            'tp':tp,
            'auc':auc, 
            'accuracy':accuracy, 
            'precision':precision, 
            'sensitivity':sensitivity, 
            'specificity':specificity, 
            'MCC':MCC, 
            'PPV':PPV, 
            "NPV":NPV,
        }
        return results

    def regression_scorer(clf, X, y):
        y_pred = clf.predict(X)
        # Evaluation: Training
        size, var_num = X.shape
        R2 = r2_score(y, y_pred)
        adj_R2 = 1-(1-R2)*(size-1)/(size-var_num-1)
        MAE = mean_absolute_error(y, y_pred)
        MSE = mean_squared_error(y, y_pred)
        RMSE = (MSE ** 0.5)
        Max_error = max_error(y, y_pred)
        
        results = {
            'R2':R2, 
            'adj_R2':adj_R2, 
            'MAE':MAE, 
            'MSE':MSE,
            'RMSE':RMSE, 
            'Max_error':Max_error
        }

        return results

    if type==0: return binary_classification_scorer
    elif type==1: return regression_scorer
    # elif type==1: return binary_classification_scorer

從這邊修改設定與輸入檔案:
- 測試資料
- 模型
- 挑選特徵
- 輸出檔案
- scaler
---
HW1 只要修改：
- 模型
- 挑選特徵
- 輸出檔案

**如果有人用 random forest 的模型先讓 ```auc = None```

In [3]:
problems = [0,1,2,3,4,5,6]

# raw_df = pd.read_csv("./data/dialysis/Regression_2weeks_data_v2.csv")
raw_df = pd.read_csv("./hw2/dialysis-regression-14-ind-xday.csv")

model = joblib.load("./hw2/dialysis-regression-14-svr.model")

# features = pd.read_csv("./hw1/dialysis-binary-features.csv")
# features = list(features.columns)
features = None

outfile = "./hw2/test-results.csv"
# outfile = "./hw2/regression-svr-results.csv"

# if standardization
scaler = joblib.load("./hw2/dialysis-regression-14-std.bin")

problem_type = 1 # 0: binary classification

In [4]:
# Features number
# features_num = len(features)
# 重新命名欄位
for idx, feature in enumerate(features):
    if feature[0] == 'c': features[idx] = f'c{int(feature[1:])+39}'

Predictions

In [5]:
X = raw_df.drop(columns=['label'])
if scaler: X = pd.DataFrame(scaler.transform(X), columns=X.columns) # Scale
if features: X = X[features] # feature selection
labels = raw_df.label
# preds = model.predict(X)

In [6]:
results_df = pd.DataFrame.from_dict(overall_scorer(problem_type)(model, X, labels), orient='index').T
# results = overall_scorer(problem_type)(model, X, labels)



In [7]:
if outfile: 
    results_df.fillna("").to_csv(outfile, index=False)
    print(f"Save results:{outfile}")

Save results:./hw2/test-results.csv
